diff options
Diffstat (limited to 'tools/testing')
308 files changed, 20401 insertions, 2425 deletions
diff --git a/tools/testing/ktest/examples/vmware.conf b/tools/testing/ktest/examples/vmware.conf new file mode 100644 index 000000000000..61958163d242 --- /dev/null +++ b/tools/testing/ktest/examples/vmware.conf @@ -0,0 +1,137 @@ +# +# This config is an example usage of ktest.pl with a vmware guest +# +# VMware Setup: +# ------------- +# - Edit the Virtual Machine ("Edit virtual machine settings") +# - Add a Serial Port +# - You almost certainly want it set "Connect at power on" +# - Select "Use socket (named pipe)" +# - Select a name that you'll recognize, like 'ktestserialpipe' +# - From: Server +# - To: A Virtual Machine +# - Save +# - Make sure you note the name, it will be in the base directory of the +# virtual machine (where the "disks" are stored. The default +# is /var/lib/vmware/<virtual machine name>/<the name you entered above> +# +# - Make note of the path to the VM +# </End VMware setup> +# +# The guest is called 'Guest' and this would be something that +# could be run on the host to test a virtual machine target. + +MACHINE = Guest + +# Name of the serial pipe you set in the VMware settings +VMWARE_SERIAL_NAME = <the name you entered above> + +# Define a variable of the name of the VM +# Noting this needs to be the name of the kmx file, and usually, the +# name of the directory that it's in. If the directory and name +# differ change the VMWARE_VM_DIR accordingly. +# Please ommit the .kmx extension +VMWARE_VM_NAME = <virtual machine name> + +# VM dir name. This is usually the same as the virtual machine's name, +# but not always the case. Change if they differ +VMWARE_VM_DIR = ${VMWARE_VM_NAME} + +# Base directory that the Virtual machine is contained in +# /var/lib/vmware is the default on Linux +VMWARE_VM_BASE_DIR = /var/lib/vmware/${VMWARE_VM_DIR} + +# Use ncat to read the unix pipe. Anything that can read the Unix Pipe +# and output it's contents to stdout will work +CONSOLE = /usr/bin/ncat -U ${VMWARE_VM_BASE_DIR}/${VMWARE_SERIAL_NAME} + +# Define what version of Workstation you are using +# This is used by vmrun to use the appropriate appripriate pieces to +# test this. In all likelihood you want 'ws' or 'player' +# Valid options: +# ws - Workstation (Windows or Linux host) +# fusion - Fusion (Mac host) +# player - Using VMware Player (Windows or Linux host) +# Note: vmrun has to run directly on the host machine +VMWARE_HOST_TYPE = ws + +# VMware provides `vmrun` to allow you to do certain things to the virtual machine +# This should hard reset the VM and force a boot +VMWARE_POWER_CYCLE = /usr/bin/vmrun -T ${VMWARE_HOST_TYPE} reset ${VMWARE_VM_BASE_DIR}/${VMWARE_VM_NAME}.kmx nogui + +#*************************************# +# This part is the same as test.conf # +#*************************************# + +# The include files will set up the type of test to run. Just set TEST to +# which test you want to run. +# +# TESTS = patchcheck, randconfig, boot, test, config-bisect, bisect, min-config +# +# See the include/*.conf files that define these tests +# +TEST := patchcheck + +# Some tests may have more than one test to run. Define MULTI := 1 to run +# the extra tests. +MULTI := 0 + +# In case you want to differentiate which type of system you are testing +BITS := 64 + +# REBOOT = none, error, fail, empty +# See include/defaults.conf +REBOOT := empty + + +# The defaults file will set up various settings that can be used by all +# machine configs. +INCLUDE include/defaults.conf + + +#*************************************# +# Now we are different from test.conf # +#*************************************# + + +# The example here assumes that Guest is running a Fedora release +# that uses dracut for its initfs. The POST_INSTALL will be executed +# after the install of the kernel and modules are complete. +# +POST_INSTALL = ${SSH} /sbin/dracut -f /boot/initramfs-test.img $KERNEL_VERSION + +# Guests sometimes get stuck on reboot. We wait 3 seconds after running +# the reboot command and then do a full power-cycle of the guest. +# This forces the guest to restart. +# +POWERCYCLE_AFTER_REBOOT = 3 + +# We do the same after the halt command, but this time we wait 20 seconds. +POWEROFF_AFTER_HALT = 20 + + +# As the defaults.conf file has a POWER_CYCLE option already defined, +# and options can not be defined in the same section more than once +# (all DEFAULTS sections are considered the same). We use the +# DEFAULTS OVERRIDE to tell ktest.pl to ignore the previous defined +# options, for the options set in the OVERRIDE section. +# +DEFAULTS OVERRIDE + +# Instead of using the default POWER_CYCLE option defined in +# defaults.conf, we use virsh to cycle it. To do so, we destroy +# the guest, wait 5 seconds, and then start it up again. +# Crude, but effective. +# +POWER_CYCLE = ${VMWARE_POWER_CYCLE} + + +DEFAULTS + +# The following files each handle a different test case. +# Having them included allows you to set up more than one machine and share +# the same tests. +INCLUDE include/patchcheck.conf +INCLUDE include/tests.conf +INCLUDE include/bisect.conf +INCLUDE include/min-config.conf diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 4e2450964517..09d1578f9d66 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -24,7 +24,7 @@ my %evals; #default opts my %default = ( - "MAILER" => "sendmail", # default mailer + "MAILER" => "sendmail", # default mailer "EMAIL_ON_ERROR" => 1, "EMAIL_WHEN_FINISHED" => 1, "EMAIL_WHEN_CANCELED" => 0, @@ -36,15 +36,15 @@ my %default = ( "CLOSE_CONSOLE_SIGNAL" => "INT", "TIMEOUT" => 120, "TMP_DIR" => "/tmp/ktest/\${MACHINE}", - "SLEEP_TIME" => 60, # sleep time between tests + "SLEEP_TIME" => 60, # sleep time between tests "BUILD_NOCLEAN" => 0, "REBOOT_ON_ERROR" => 0, "POWEROFF_ON_ERROR" => 0, "REBOOT_ON_SUCCESS" => 1, "POWEROFF_ON_SUCCESS" => 0, "BUILD_OPTIONS" => "", - "BISECT_SLEEP_TIME" => 60, # sleep time between bisects - "PATCHCHECK_SLEEP_TIME" => 60, # sleep time between patch checks + "BISECT_SLEEP_TIME" => 60, # sleep time between bisects + "PATCHCHECK_SLEEP_TIME" => 60, # sleep time between patch checks "CLEAR_LOG" => 0, "BISECT_MANUAL" => 0, "BISECT_SKIP" => 1, @@ -512,6 +512,69 @@ $config_help{"REBOOT_SCRIPT"} = << "EOF" EOF ; +# used with process_expression() +my $d = 0; + +# defined before get_test_name() +my $in_die = 0; + +# defined before process_warning_line() +my $check_build_re = ".*:.*(warning|error|Error):.*"; +my $utf8_quote = "\\x{e2}\\x{80}(\\x{98}|\\x{99})"; + +# defined before child_finished() +my $child_done; + +# config_ignore holds the configs that were set (or unset) for +# a good config and we will ignore these configs for the rest +# of a config bisect. These configs stay as they were. +my %config_ignore; + +# config_set holds what all configs were set as. +my %config_set; + +# config_off holds the set of configs that the bad config had disabled. +# We need to record them and set them in the .config when running +# olddefconfig, because olddefconfig keeps the defaults. +my %config_off; + +# config_off_tmp holds a set of configs to turn off for now +my @config_off_tmp; + +# config_list is the set of configs that are being tested +my %config_list; +my %null_config; + +my %dependency; + +# found above run_config_bisect() +my $pass = 1; + +# found above add_dep() + +my %depends; +my %depcount; +my $iflevel = 0; +my @ifdeps; + +# prevent recursion +my %read_kconfigs; + +# found above test_this_config() +my %min_configs; +my %keep_configs; +my %save_configs; +my %processed_configs; +my %nochange_config; + +# +# These are first defined here, main function later on +# +sub run_command; +sub start_monitor; +sub end_monitor; +sub wait_for_monitor; + sub _logit { if (defined($opt{"LOG_FILE"})) { print LOG @_; @@ -537,7 +600,7 @@ sub read_prompt { my $ans; for (;;) { - if ($cancel) { + if ($cancel) { print "$prompt [y/n/C] "; } else { print "$prompt [Y/n] "; @@ -760,7 +823,7 @@ sub process_variables { # remove the space added in the beginning $retval =~ s/ //; - return "$retval" + return "$retval"; } sub set_value { @@ -863,7 +926,6 @@ sub value_defined { defined($opt{$2}); } -my $d = 0; sub process_expression { my ($name, $val) = @_; @@ -978,7 +1040,6 @@ sub __read_config { $override = 0; if ($type eq "TEST_START") { - if ($num_tests_set) { die "$name: $.: Can not specify both NUM_TESTS and TEST_START\n"; } @@ -1048,7 +1109,6 @@ sub __read_config { $test_num = $old_test_num; $repeat = $old_repeat; } - } elsif (/^\s*ELSE\b(.*)$/) { if (!$if) { die "$name: $.: ELSE found with out matching IF section\n$_"; @@ -1095,7 +1155,7 @@ sub __read_config { } } } - + if ( ! -r $file ) { die "$name: $.: Can't read file $file\n$_"; } @@ -1186,13 +1246,13 @@ sub __read_config { } sub get_test_case { - print "What test case would you like to run?\n"; - print " (build, install or boot)\n"; - print " Other tests are available but require editing ktest.conf\n"; - print " (see tools/testing/ktest/sample.conf)\n"; - my $ans = <STDIN>; - chomp $ans; - $default{"TEST_TYPE"} = $ans; + print "What test case would you like to run?\n"; + print " (build, install or boot)\n"; + print " Other tests are available but require editing ktest.conf\n"; + print " (see tools/testing/ktest/sample.conf)\n"; + my $ans = <STDIN>; + chomp $ans; + $default{"TEST_TYPE"} = $ans; } sub read_config { @@ -1368,11 +1428,6 @@ sub eval_option { return $option; } -sub run_command; -sub start_monitor; -sub end_monitor; -sub wait_for_monitor; - sub reboot { my ($time) = @_; my $powercycle = 0; @@ -1457,8 +1512,6 @@ sub do_not_reboot { ($test_type eq "config_bisect" && $opt{"CONFIG_BISECT_TYPE[$i]"} eq "build"); } -my $in_die = 0; - sub get_test_name() { my $name; @@ -1471,7 +1524,6 @@ sub get_test_name() { } sub dodie { - # avoid recursion return if ($in_die); $in_die = 1; @@ -1481,10 +1533,8 @@ sub dodie { doprint "CRITICAL FAILURE... [TEST $i] ", @_, "\n"; if ($reboot_on_error && !do_not_reboot) { - doprint "REBOOTING\n"; reboot_to_good; - } elsif ($poweroff_on_error && defined($power_off)) { doprint "POWERING OFF\n"; `$power_off`; @@ -1519,13 +1569,14 @@ sub dodie { close O; close L; } - send_email("KTEST: critical failure for test $i [$name]", - "Your test started at $script_start_time has failed with:\n@_\n", $log_file); + + send_email("KTEST: critical failure for test $i [$name]", + "Your test started at $script_start_time has failed with:\n@_\n", $log_file); } if ($monitor_cnt) { - # restore terminal settings - system("stty $stty_orig"); + # restore terminal settings + system("stty $stty_orig"); } if (defined($post_test)) { @@ -1709,81 +1760,81 @@ sub wait_for_monitor { } sub save_logs { - my ($result, $basedir) = @_; - my @t = localtime; - my $date = sprintf "%04d%02d%02d%02d%02d%02d", - 1900+$t[5],$t[4],$t[3],$t[2],$t[1],$t[0]; + my ($result, $basedir) = @_; + my @t = localtime; + my $date = sprintf "%04d%02d%02d%02d%02d%02d", + 1900+$t[5],$t[4],$t[3],$t[2],$t[1],$t[0]; - my $type = $build_type; - if ($type =~ /useconfig/) { - $type = "useconfig"; - } + my $type = $build_type; + if ($type =~ /useconfig/) { + $type = "useconfig"; + } - my $dir = "$machine-$test_type-$type-$result-$date"; + my $dir = "$machine-$test_type-$type-$result-$date"; - $dir = "$basedir/$dir"; + $dir = "$basedir/$dir"; - if (!-d $dir) { - mkpath($dir) or - dodie "can't create $dir"; - } + if (!-d $dir) { + mkpath($dir) or + dodie "can't create $dir"; + } - my %files = ( - "config" => $output_config, - "buildlog" => $buildlog, - "dmesg" => $dmesg, - "testlog" => $testlog, - ); + my %files = ( + "config" => $output_config, + "buildlog" => $buildlog, + "dmesg" => $dmesg, + "testlog" => $testlog, + ); - while (my ($name, $source) = each(%files)) { - if (-f "$source") { - cp "$source", "$dir/$name" or - dodie "failed to copy $source"; - } + while (my ($name, $source) = each(%files)) { + if (-f "$source") { + cp "$source", "$dir/$name" or + dodie "failed to copy $source"; } + } - doprint "*** Saved info to $dir ***\n"; + doprint "*** Saved info to $dir ***\n"; } sub fail { - if ($die_on_failure) { - dodie @_; - } + if ($die_on_failure) { + dodie @_; + } - doprint "FAILED\n"; + doprint "FAILED\n"; - my $i = $iteration; + my $i = $iteration; - # no need to reboot for just building. - if (!do_not_reboot) { - doprint "REBOOTING\n"; - reboot_to_good $sleep_time; - } + # no need to reboot for just building. + if (!do_not_reboot) { + doprint "REBOOTING\n"; + reboot_to_good $sleep_time; + } - my $name = ""; + my $name = ""; - if (defined($test_name)) { - $name = " ($test_name)"; - } + if (defined($test_name)) { + $name = " ($test_name)"; + } - print_times; + print_times; - doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"; - doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"; - doprint "KTEST RESULT: TEST $i$name Failed: ", @_, "\n"; - doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"; - doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"; + doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"; + doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"; + doprint "KTEST RESULT: TEST $i$name Failed: ", @_, "\n"; + doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"; + doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"; - if (defined($store_failures)) { - save_logs "fail", $store_failures; - } + if (defined($store_failures)) { + save_logs "fail", $store_failures; + } - if (defined($post_test)) { - run_command $post_test; - } + if (defined($post_test)) { + run_command $post_test; + } - return 1; + return 1; } sub run_command { @@ -1915,8 +1966,8 @@ sub _get_grub_index { my ($command, $target, $skip) = @_; return if (defined($grub_number) && defined($last_grub_menu) && - $last_grub_menu eq $grub_menu && defined($last_machine) && - $last_machine eq $machine); + $last_grub_menu eq $grub_menu && defined($last_machine) && + $last_machine eq $machine); doprint "Find $reboot_type menu ... "; $grub_number = -1; @@ -1924,8 +1975,8 @@ sub _get_grub_index { my $ssh_grub = $ssh_exec; $ssh_grub =~ s,\$SSH_COMMAND,$command,g; - open(IN, "$ssh_grub |") - or dodie "unable to execute $command"; + open(IN, "$ssh_grub |") or + dodie "unable to execute $command"; my $found = 0; @@ -1969,9 +2020,9 @@ sub get_grub_index { $target = '^menuentry.*' . $grub_menu_qt; $skip = '^menuentry\s|^submenu\s'; } elsif ($reboot_type eq "grub2bls") { - $command = $grub_bls_get; - $target = '^title=.*' . $grub_menu_qt; - $skip = '^title='; + $command = $grub_bls_get; + $target = '^title=.*' . $grub_menu_qt; + $skip = '^title='; } else { return; } @@ -1979,8 +2030,7 @@ sub get_grub_index { _get_grub_index($command, $target, $skip); } -sub wait_for_input -{ +sub wait_for_input { my ($fp, $time) = @_; my $start_time; my $rin; @@ -2096,7 +2146,6 @@ sub monitor { my $version_found = 0; while (!$done) { - if ($bug && defined($stop_after_failure) && $stop_after_failure >= 0) { my $time = $stop_after_failure - (time - $failure_start); @@ -2349,9 +2398,6 @@ sub start_monitor_and_install { return monitor; } -my $check_build_re = ".*:.*(warning|error|Error):.*"; -my $utf8_quote = "\\x{e2}\\x{80}(\\x{98}|\\x{99})"; - sub process_warning_line { my ($line) = @_; @@ -2394,7 +2440,7 @@ sub check_buildlog { while (<IN>) { if (/$check_build_re/) { my $warning = process_warning_line $_; - + $warnings_list{$warning} = 1; } } @@ -2571,7 +2617,6 @@ sub build { run_command "mv $outputdir/config_temp $output_config" or dodie "moving config_temp"; } - } elsif (!$noclean) { unlink "$output_config"; run_command "$make mrproper" or @@ -2594,6 +2639,9 @@ sub build { # Run old config regardless, to enforce min configurations make_oldconfig; + if (not defined($build_options)){ + $build_options = ""; + } my $build_ret = run_command "$make $build_options", $buildlog; if (defined($post_build)) { @@ -2649,14 +2697,15 @@ sub success { print_times; - doprint "\n\n*******************************************\n"; - doprint "*******************************************\n"; - doprint "KTEST RESULT: TEST $i$name SUCCESS!!!! **\n"; - doprint "*******************************************\n"; - doprint "*******************************************\n"; + doprint "\n\n"; + doprint "*******************************************\n"; + doprint "*******************************************\n"; + doprint "KTEST RESULT: TEST $i$name SUCCESS!!!! **\n"; + doprint "*******************************************\n"; + doprint "*******************************************\n"; if (defined($store_successes)) { - save_logs "success", $store_successes; + save_logs "success", $store_successes; } if ($i != $opt{"NUM_TESTS"} && !do_not_reboot) { @@ -2698,8 +2747,6 @@ sub child_run_test { exit $run_command_status; } -my $child_done; - sub child_finished { $child_done = 1; } @@ -3031,7 +3078,6 @@ sub bisect { } if ($do_check) { - # get current HEAD my $head = get_sha1("HEAD"); @@ -3071,13 +3117,11 @@ sub bisect { run_command "git bisect replay $replay" or dodie "failed to run replay"; } else { - run_command "git bisect good $good" or dodie "could not set bisect good to $good"; run_git_bisect "git bisect bad $bad" or dodie "could not set bisect bad to $bad"; - } if (defined($start)) { @@ -3103,35 +3147,13 @@ sub bisect { success $i; } -# config_ignore holds the configs that were set (or unset) for -# a good config and we will ignore these configs for the rest -# of a config bisect. These configs stay as they were. -my %config_ignore; - -# config_set holds what all configs were set as. -my %config_set; - -# config_off holds the set of configs that the bad config had disabled. -# We need to record them and set them in the .config when running -# olddefconfig, because olddefconfig keeps the defaults. -my %config_off; - -# config_off_tmp holds a set of configs to turn off for now -my @config_off_tmp; - -# config_list is the set of configs that are being tested -my %config_list; -my %null_config; - -my %dependency; - sub assign_configs { my ($hash, $config) = @_; doprint "Reading configs from $config\n"; - open (IN, $config) - or dodie "Failed to read $config"; + open (IN, $config) or + dodie "Failed to read $config"; while (<IN>) { chomp; @@ -3219,8 +3241,6 @@ sub config_bisect_end { doprint "***************************************\n\n"; } -my $pass = 1; - sub run_config_bisect { my ($good, $bad, $last_result) = @_; my $reset = ""; @@ -3243,13 +3263,13 @@ sub run_config_bisect { $ret = run_config_bisect_test $config_bisect_type; if ($ret) { - doprint "NEW GOOD CONFIG ($pass)\n"; + doprint "NEW GOOD CONFIG ($pass)\n"; system("cp $output_config $tmpdir/good_config.tmp.$pass"); $pass++; # Return 3 for good config return 3; } else { - doprint "NEW BAD CONFIG ($pass)\n"; + doprint "NEW BAD CONFIG ($pass)\n"; system("cp $output_config $tmpdir/bad_config.tmp.$pass"); $pass++; # Return 4 for bad config @@ -3284,10 +3304,11 @@ sub config_bisect { if (!defined($config_bisect_exec)) { # First check the location that ktest.pl ran - my @locations = ( "$pwd/config-bisect.pl", - "$dirname/config-bisect.pl", - "$builddir/tools/testing/ktest/config-bisect.pl", - undef ); + my @locations = ( + "$pwd/config-bisect.pl", + "$dirname/config-bisect.pl", + "$builddir/tools/testing/ktest/config-bisect.pl", + undef ); foreach my $loc (@locations) { doprint "loc = $loc\n"; $config_bisect_exec = $loc; @@ -3368,7 +3389,7 @@ sub config_bisect { } while ($ret == 3 || $ret == 4); if ($ret == 2) { - config_bisect_end "$good_config.tmp", "$bad_config.tmp"; + config_bisect_end "$good_config.tmp", "$bad_config.tmp"; } return $ret if ($ret < 0); @@ -3511,14 +3532,6 @@ sub patchcheck { return 1; } -my %depends; -my %depcount; -my $iflevel = 0; -my @ifdeps; - -# prevent recursion -my %read_kconfigs; - sub add_dep { # $config depends on $dep my ($config, $dep) = @_; @@ -3548,7 +3561,6 @@ sub read_kconfig { my $cont = 0; my $line; - if (! -f $kconfig) { doprint "file $kconfig does not exist, skipping\n"; return; @@ -3630,8 +3642,8 @@ sub read_kconfig { sub read_depends { # find out which arch this is by the kconfig file - open (IN, $output_config) - or dodie "Failed to read $output_config"; + open (IN, $output_config) or + dodie "Failed to read $output_config"; my $arch; while (<IN>) { if (m,Linux/(\S+)\s+\S+\s+Kernel Configuration,) { @@ -3657,7 +3669,7 @@ sub read_depends { if (! -f $kconfig && $arch =~ /\d$/) { my $orig = $arch; - # some subarchs have numbers, truncate them + # some subarchs have numbers, truncate them $arch =~ s/\d*$//; $kconfig = "$builddir/arch/$arch/Kconfig"; if (! -f $kconfig) { @@ -3706,7 +3718,6 @@ sub get_depends { my @configs; while ($dep =~ /[$valid]/) { - if ($dep =~ /^[^$valid]*([$valid]+)/) { my $conf = "CONFIG_" . $1; @@ -3721,12 +3732,6 @@ sub get_depends { return @configs; } -my %min_configs; -my %keep_configs; -my %save_configs; -my %processed_configs; -my %nochange_config; - sub test_this_config { my ($config) = @_; @@ -3852,7 +3857,7 @@ sub make_min_config { foreach my $config (@config_keys) { my $kconfig = chomp_config $config; if (!defined $depcount{$kconfig}) { - $depcount{$kconfig} = 0; + $depcount{$kconfig} = 0; } } @@ -3887,7 +3892,6 @@ sub make_min_config { my $take_two = 0; while (!$done) { - my $config; my $found; @@ -3898,7 +3902,7 @@ sub make_min_config { # Sort keys by who is most dependent on @test_configs = sort { $depcount{chomp_config($b)} <=> $depcount{chomp_config($a)} } - @test_configs ; + @test_configs ; # Put configs that did not modify the config at the end. my $reset = 1; @@ -3954,13 +3958,13 @@ sub make_min_config { my $failed = 0; build "oldconfig" or $failed = 1; if (!$failed) { - start_monitor_and_install or $failed = 1; + start_monitor_and_install or $failed = 1; - if ($type eq "test" && !$failed) { - do_run_test or $failed = 1; - } + if ($type eq "test" && !$failed) { + do_run_test or $failed = 1; + } - end_monitor; + end_monitor; } $in_bisect = 0; @@ -3974,8 +3978,8 @@ sub make_min_config { # update new ignore configs if (defined($ignore_config)) { - open (OUT, ">$temp_config") - or dodie "Can't write to $temp_config"; + open (OUT, ">$temp_config") or + dodie "Can't write to $temp_config"; foreach my $config (keys %save_configs) { print OUT "$save_configs{$config}\n"; } @@ -4002,8 +4006,8 @@ sub make_min_config { } # Save off all the current mandatory configs - open (OUT, ">$temp_config") - or dodie "Can't write to $temp_config"; + open (OUT, ">$temp_config") or + dodie "Can't write to $temp_config"; foreach my $config (keys %keep_configs) { print OUT "$keep_configs{$config}\n"; } @@ -4041,7 +4045,6 @@ sub make_warnings_file { open(IN, $buildlog) or dodie "Can't open $buildlog"; while (<IN>) { - # Some compilers use UTF-8 extended for quotes # for distcc heterogeneous systems, this causes issues s/$utf8_quote/'/g; @@ -4057,98 +4060,6 @@ sub make_warnings_file { success $i; } -$#ARGV < 1 or die "ktest.pl version: $VERSION\n usage: ktest.pl [config-file]\n"; - -if ($#ARGV == 0) { - $ktest_config = $ARGV[0]; - if (! -f $ktest_config) { - print "$ktest_config does not exist.\n"; - if (!read_yn "Create it?") { - exit 0; - } - } -} - -if (! -f $ktest_config) { - $newconfig = 1; - get_test_case; - open(OUT, ">$ktest_config") or die "Can not create $ktest_config"; - print OUT << "EOF" -# Generated by ktest.pl -# - -# PWD is a ktest.pl variable that will result in the process working -# directory that ktest.pl is executed in. - -# THIS_DIR is automatically assigned the PWD of the path that generated -# the config file. It is best to use this variable when assigning other -# directory paths within this directory. This allows you to easily -# move the test cases to other locations or to other machines. -# -THIS_DIR := $variable{"PWD"} - -# Define each test with TEST_START -# The config options below it will override the defaults -TEST_START -TEST_TYPE = $default{"TEST_TYPE"} - -DEFAULTS -EOF -; - close(OUT); -} -read_config $ktest_config; - -if (defined($opt{"LOG_FILE"})) { - $opt{"LOG_FILE"} = eval_option("LOG_FILE", $opt{"LOG_FILE"}, -1); -} - -# Append any configs entered in manually to the config file. -my @new_configs = keys %entered_configs; -if ($#new_configs >= 0) { - print "\nAppending entered in configs to $ktest_config\n"; - open(OUT, ">>$ktest_config") or die "Can not append to $ktest_config"; - foreach my $config (@new_configs) { - print OUT "$config = $entered_configs{$config}\n"; - $opt{$config} = process_variables($entered_configs{$config}); - } -} - -if (defined($opt{"LOG_FILE"})) { - if ($opt{"CLEAR_LOG"}) { - unlink $opt{"LOG_FILE"}; - } - open(LOG, ">> $opt{LOG_FILE}") or die "Can't write to $opt{LOG_FILE}"; - LOG->autoflush(1); -} - -doprint "\n\nSTARTING AUTOMATED TESTS\n\n"; - -for (my $i = 0, my $repeat = 1; $i <= $opt{"NUM_TESTS"}; $i += $repeat) { - - if (!$i) { - doprint "DEFAULT OPTIONS:\n"; - } else { - doprint "\nTEST $i OPTIONS"; - if (defined($repeat_tests{$i})) { - $repeat = $repeat_tests{$i}; - doprint " ITERATE $repeat"; - } - doprint "\n"; - } - - foreach my $option (sort keys %opt) { - - if ($option =~ /\[(\d+)\]$/) { - next if ($i != $1); - } else { - next if ($i); - } - - doprint "$option = $opt{$option}\n"; - } -} - sub option_defined { my ($option) = @_; @@ -4261,7 +4172,6 @@ sub do_send_mail { } sub send_email { - if (defined($mailto)) { if (!defined($mailer)) { doprint "No email sent: email or mailer not specified in config.\n"; @@ -4274,12 +4184,103 @@ sub send_email { sub cancel_test { if ($email_when_canceled) { my $name = get_test_name; - send_email("KTEST: Your [$name] test was cancelled", - "Your test started at $script_start_time was cancelled: sig int"); + send_email("KTEST: Your [$name] test was cancelled", + "Your test started at $script_start_time was cancelled: sig int"); } die "\nCaught Sig Int, test interrupted: $!\n" } +$#ARGV < 1 or die "ktest.pl version: $VERSION\n usage: ktest.pl [config-file]\n"; + +if ($#ARGV == 0) { + $ktest_config = $ARGV[0]; + if (! -f $ktest_config) { + print "$ktest_config does not exist.\n"; + if (!read_yn "Create it?") { + exit 0; + } + } +} + +if (! -f $ktest_config) { + $newconfig = 1; + get_test_case; + open(OUT, ">$ktest_config") or die "Can not create $ktest_config"; + print OUT << "EOF" +# Generated by ktest.pl +# + +# PWD is a ktest.pl variable that will result in the process working +# directory that ktest.pl is executed in. + +# THIS_DIR is automatically assigned the PWD of the path that generated +# the config file. It is best to use this variable when assigning other +# directory paths within this directory. This allows you to easily +# move the test cases to other locations or to other machines. +# +THIS_DIR := $variable{"PWD"} + +# Define each test with TEST_START +# The config options below it will override the defaults +TEST_START +TEST_TYPE = $default{"TEST_TYPE"} + +DEFAULTS +EOF +; + close(OUT); +} +read_config $ktest_config; + +if (defined($opt{"LOG_FILE"})) { + $opt{"LOG_FILE"} = eval_option("LOG_FILE", $opt{"LOG_FILE"}, -1); +} + +# Append any configs entered in manually to the config file. +my @new_configs = keys %entered_configs; +if ($#new_configs >= 0) { + print "\nAppending entered in configs to $ktest_config\n"; + open(OUT, ">>$ktest_config") or die "Can not append to $ktest_config"; + foreach my $config (@new_configs) { + print OUT "$config = $entered_configs{$config}\n"; + $opt{$config} = process_variables($entered_configs{$config}); + } +} + +if (defined($opt{"LOG_FILE"})) { + if ($opt{"CLEAR_LOG"}) { + unlink $opt{"LOG_FILE"}; + } + open(LOG, ">> $opt{LOG_FILE}") or die "Can't write to $opt{LOG_FILE}"; + LOG->autoflush(1); +} + +doprint "\n\nSTARTING AUTOMATED TESTS\n\n"; + +for (my $i = 0, my $repeat = 1; $i <= $opt{"NUM_TESTS"}; $i += $repeat) { + + if (!$i) { + doprint "DEFAULT OPTIONS:\n"; + } else { + doprint "\nTEST $i OPTIONS"; + if (defined($repeat_tests{$i})) { + $repeat = $repeat_tests{$i}; + doprint " ITERATE $repeat"; + } + doprint "\n"; + } + + foreach my $option (sort keys %opt) { + if ($option =~ /\[(\d+)\]$/) { + next if ($i != $1); + } else { + next if ($i); + } + + doprint "$option = $opt{$option}\n"; + } +} + $SIG{INT} = qw(cancel_test); # First we need to do is the builds @@ -4323,15 +4324,15 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) { # The first test may override the PRE_KTEST option if ($i == 1) { - if (defined($pre_ktest)) { - doprint "\n"; - run_command $pre_ktest; - } - if ($email_when_started) { + if (defined($pre_ktest)) { + doprint "\n"; + run_command $pre_ktest; + } + if ($email_when_started) { my $name = get_test_name; - send_email("KTEST: Your [$name] test was started", - "Your test was started on $script_start_time"); - } + send_email("KTEST: Your [$name] test was started", + "Your test was started on $script_start_time"); + } } # Any test can override the POST_KTEST option @@ -4409,7 +4410,7 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) { my $ret = run_command $pre_test; if (!$ret && defined($pre_test_die) && $pre_test_die) { - dodie "failed to pre_test\n"; + dodie "failed to pre_test\n"; } } @@ -4503,12 +4504,11 @@ if ($opt{"POWEROFF_ON_SUCCESS"}) { run_command $switch_to_good; } - doprint "\n $successes of $opt{NUM_TESTS} tests were successful\n\n"; if ($email_when_finished) { send_email("KTEST: Your test has finished!", - "$successes of $opt{NUM_TESTS} tests started at $script_start_time were successful!"); + "$successes of $opt{NUM_TESTS} tests started at $script_start_time were successful!"); } if (defined($opt{"LOG_FILE"})) { @@ -4517,3 +4517,12 @@ if (defined($opt{"LOG_FILE"})) { } exit 0; + +## +# The following are here to standardize tabs/spaces/etc across the most likely editors +### + +# Local Variables: +# mode: perl +# End: +# vim: softtabstop=4 diff --git a/tools/testing/kunit/configs/broken_on_uml.config b/tools/testing/kunit/configs/broken_on_uml.config index a7f0603d33f6..690870043ac0 100644 --- a/tools/testing/kunit/configs/broken_on_uml.config +++ b/tools/testing/kunit/configs/broken_on_uml.config @@ -40,3 +40,5 @@ # CONFIG_RESET_BRCMSTB_RESCAL is not set # CONFIG_RESET_INTEL_GW is not set # CONFIG_ADI_AXI_ADC is not set +# CONFIG_DEBUG_PAGEALLOC is not set +# CONFIG_PAGE_POISONING is not set diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index d5144fcb03ac..5da8fb3762f9 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -184,7 +184,9 @@ def add_common_opts(parser) -> None: help='Run all KUnit tests through allyesconfig', action='store_true') parser.add_argument('--kunitconfig', - help='Path to Kconfig fragment that enables KUnit tests', + help='Path to Kconfig fragment that enables KUnit tests.' + ' If given a directory, (e.g. lib/kunit), "/.kunitconfig" ' + 'will get automatically appended.', metavar='kunitconfig') def add_build_opts(parser) -> None: diff --git a/tools/testing/kunit/kunit_config.py b/tools/testing/kunit/kunit_config.py index 0b550cbd667d..1e2683dcc0e7 100644 --- a/tools/testing/kunit/kunit_config.py +++ b/tools/testing/kunit/kunit_config.py @@ -13,7 +13,7 @@ from typing import List, Set CONFIG_IS_NOT_SET_PATTERN = r'^# CONFIG_(\w+) is not set$' CONFIG_PATTERN = r'^CONFIG_(\w+)=(\S+|".*")$' -KconfigEntryBase = collections.namedtuple('KconfigEntry', ['name', 'value']) +KconfigEntryBase = collections.namedtuple('KconfigEntryBase', ['name', 'value']) class KconfigEntry(KconfigEntryBase): diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index f309a33256cd..89a7d4024e87 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -132,6 +132,8 @@ class LinuxSourceTree(object): return if kunitconfig_path: + if os.path.isdir(kunitconfig_path): + kunitconfig_path = os.path.join(kunitconfig_path, KUNITCONFIG_PATH) if not os.path.exists(kunitconfig_path): raise ConfigError(f'Specified kunitconfig ({kunitconfig_path}) does not exist') else: diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index 1ad3049e9069..2e809dd956a7 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -251,6 +251,12 @@ class LinuxSourceTreeTest(unittest.TestCase): with tempfile.NamedTemporaryFile('wt') as kunitconfig: tree = kunit_kernel.LinuxSourceTree('', kunitconfig_path=kunitconfig.name) + def test_dir_kunitconfig(self): + with tempfile.TemporaryDirectory('') as dir: + with open(os.path.join(dir, '.kunitconfig'), 'w') as f: + pass + tree = kunit_kernel.LinuxSourceTree('', kunitconfig_path=dir) + # TODO: add more test cases. diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c index 3b796dd5e577..ca24f6839d50 100644 --- a/tools/testing/radix-tree/idr-test.c +++ b/tools/testing/radix-tree/idr-test.c @@ -296,21 +296,34 @@ static void *idr_throbber(void *arg) return NULL; } +/* + * There are always either 1 or 2 objects in the IDR. If we find nothing, + * or we find something at an ID we didn't expect, that's a bug. + */ void idr_find_test_1(int anchor_id, int throbber_id) { pthread_t throbber; time_t start = time(NULL); - pthread_create(&throbber, NULL, idr_throbber, &throbber_id); - BUG_ON(idr_alloc(&find_idr, xa_mk_value(anchor_id), anchor_id, anchor_id + 1, GFP_KERNEL) != anchor_id); + pthread_create(&throbber, NULL, idr_throbber, &throbber_id); + + rcu_read_lock(); do { int id = 0; void *entry = idr_get_next(&find_idr, &id); - BUG_ON(entry != xa_mk_value(id)); + rcu_read_unlock(); + if ((id != anchor_id && id != throbber_id) || + entry != xa_mk_value(id)) { + printf("%s(%d, %d): %p at %d\n", __func__, anchor_id, + throbber_id, entry, id); + abort(); + } + rcu_read_lock(); } while (time(NULL) < start + 11); + rcu_read_unlock(); pthread_join(throbber, NULL); @@ -577,6 +590,7 @@ void ida_tests(void) int __weak main(void) { + rcu_register_thread(); radix_tree_init(); idr_checks(); ida_tests(); @@ -584,5 +598,6 @@ int __weak main(void) rcu_barrier(); if (nr_allocated) printf("nr_allocated = %d\n", nr_allocated); + rcu_unregister_thread(); return 0; } diff --git a/tools/testing/radix-tree/linux/compiler_types.h b/tools/testing/radix-tree/linux/compiler_types.h deleted file mode 100644 index e69de29bb2d1..000000000000 --- a/tools/testing/radix-tree/linux/compiler_types.h +++ /dev/null diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index 9eae0fb5a67d..e00520cc6349 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c @@ -224,7 +224,9 @@ void multiorder_checks(void) int __weak main(void) { + rcu_register_thread(); radix_tree_init(); multiorder_checks(); + rcu_unregister_thread(); return 0; } diff --git a/tools/testing/radix-tree/xarray.c b/tools/testing/radix-tree/xarray.c index e61e43efe463..f20e12cbbfd4 100644 --- a/tools/testing/radix-tree/xarray.c +++ b/tools/testing/radix-tree/xarray.c @@ -25,11 +25,13 @@ void xarray_tests(void) int __weak main(void) { + rcu_register_thread(); radix_tree_init(); xarray_tests(); radix_tree_cpu_dead(1); rcu_barrier(); if (nr_allocated) printf("nr_allocated = %d\n", nr_allocated); + rcu_unregister_thread(); return 0; } diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 6c575cf34a71..bc3299a20338 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -25,6 +25,7 @@ TARGETS += ir TARGETS += kcmp TARGETS += kexec TARGETS += kvm +TARGETS += landlock TARGETS += lib TARGETS += livepatch TARGETS += lkdtm diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile index 2c9d012797a7..ced910fb4019 100644 --- a/tools/testing/selftests/arm64/Makefile +++ b/tools/testing/selftests/arm64/Makefile @@ -4,7 +4,7 @@ ARCH ?= $(shell uname -m 2>/dev/null || echo not) ifneq (,$(filter $(ARCH),aarch64 arm64)) -ARM64_SUBTARGETS ?= tags signal pauth fp mte +ARM64_SUBTARGETS ?= tags signal pauth fp mte bti else ARM64_SUBTARGETS := endif diff --git a/tools/testing/selftests/arm64/bti/.gitignore b/tools/testing/selftests/arm64/bti/.gitignore new file mode 100644 index 000000000000..73869fabada4 --- /dev/null +++ b/tools/testing/selftests/arm64/bti/.gitignore @@ -0,0 +1,2 @@ +btitest +nobtitest diff --git a/tools/testing/selftests/arm64/bti/Makefile b/tools/testing/selftests/arm64/bti/Makefile new file mode 100644 index 000000000000..73e013c082a6 --- /dev/null +++ b/tools/testing/selftests/arm64/bti/Makefile @@ -0,0 +1,61 @@ +# SPDX-License-Identifier: GPL-2.0 + +TEST_GEN_PROGS := btitest nobtitest + +PROGS := $(patsubst %,gen/%,$(TEST_GEN_PROGS)) + +# These tests are built as freestanding binaries since otherwise BTI +# support in ld.so is required which is not currently widespread; when +# it is available it will still be useful to test this separately as the +# cases for statically linked and dynamically lined binaries are +# slightly different. + +CFLAGS_NOBTI = -DBTI=0 +CFLAGS_BTI = -mbranch-protection=standard -DBTI=1 + +CFLAGS_COMMON = -ffreestanding -Wall -Wextra $(CFLAGS) + +BTI_CC_COMMAND = $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -c -o $@ $< +NOBTI_CC_COMMAND = $(CC) $(CFLAGS_NOBTI) $(CFLAGS_COMMON) -c -o $@ $< + +%-bti.o: %.c + $(BTI_CC_COMMAND) + +%-bti.o: %.S + $(BTI_CC_COMMAND) + +%-nobti.o: %.c + $(NOBTI_CC_COMMAND) + +%-nobti.o: %.S + $(NOBTI_CC_COMMAND) + +BTI_OBJS = \ + test-bti.o \ + signal-bti.o \ + start-bti.o \ + syscall-bti.o \ + system-bti.o \ + teststubs-bti.o \ + trampoline-bti.o +gen/btitest: $(BTI_OBJS) + $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -o $@ $^ + +NOBTI_OBJS = \ + test-nobti.o \ + signal-nobti.o \ + start-nobti.o \ + syscall-nobti.o \ + system-nobti.o \ + teststubs-nobti.o \ + trampoline-nobti.o +gen/nobtitest: $(NOBTI_OBJS) + $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -o $@ $^ + +# Including KSFT lib.mk here will also mangle the TEST_GEN_PROGS list +# to account for any OUTPUT target-dirs optionally provided by +# the toplevel makefile +include ../../lib.mk + +$(TEST_GEN_PROGS): $(PROGS) + cp $(PROGS) $(OUTPUT)/ diff --git a/tools/testing/selftests/arm64/bti/assembler.h b/tools/testing/selftests/arm64/bti/assembler.h new file mode 100644 index 000000000000..04e7b72880ef --- /dev/null +++ b/tools/testing/selftests/arm64/bti/assembler.h @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin <Dave.Martin@arm.com> + */ + +#ifndef ASSEMBLER_H +#define ASSEMBLER_H + +#define NT_GNU_PROPERTY_TYPE_0 5 +#define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000 + +/* Bits for GNU_PROPERTY_AARCH64_FEATURE_1_BTI */ +#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1U << 0) +#define GNU_PROPERTY_AARCH64_FEATURE_1_PAC (1U << 1) + + +.macro startfn name:req + .globl \name +\name: + .macro endfn + .size \name, . - \name + .type \name, @function + .purgem endfn + .endm +.endm + +.macro emit_aarch64_feature_1_and + .pushsection .note.gnu.property, "a" + .align 3 + .long 2f - 1f + .long 6f - 3f + .long NT_GNU_PROPERTY_TYPE_0 +1: .string "GNU" +2: + .align 3 +3: .long GNU_PROPERTY_AARCH64_FEATURE_1_AND + .long 5f - 4f +4: +#if BTI + .long GNU_PROPERTY_AARCH64_FEATURE_1_PAC | \ + GNU_PROPERTY_AARCH64_FEATURE_1_BTI +#else + .long 0 +#endif +5: + .align 3 +6: + .popsection +.endm + +.macro paciasp + hint 0x19 +.endm + +.macro autiasp + hint 0x1d +.endm + +.macro __bti_ + hint 0x20 +.endm + +.macro __bti_c + hint 0x22 +.endm + +.macro __bti_j + hint 0x24 +.endm + +.macro __bti_jc + hint 0x26 +.endm + +.macro bti what= + __bti_\what +.endm + +#endif /* ! ASSEMBLER_H */ diff --git a/tools/testing/selftests/arm64/bti/btitest.h b/tools/testing/selftests/arm64/bti/btitest.h new file mode 100644 index 000000000000..2aff9b10336e --- /dev/null +++ b/tools/testing/selftests/arm64/bti/btitest.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin <Dave.Martin@arm.com> + */ + +#ifndef BTITEST_H +#define BTITEST_H + +/* Trampolines for calling the test stubs: */ +void call_using_br_x0(void (*)(void)); +void call_using_br_x16(void (*)(void)); +void call_using_blr(void (*)(void)); + +/* Test stubs: */ +void nohint_func(void); +void bti_none_func(void); +void bti_c_func(void); +void bti_j_func(void); +void bti_jc_func(void); +void paciasp_func(void); + +#endif /* !BTITEST_H */ diff --git a/tools/testing/selftests/arm64/bti/compiler.h b/tools/testing/selftests/arm64/bti/compiler.h new file mode 100644 index 000000000000..ebb6204f447a --- /dev/null +++ b/tools/testing/selftests/arm64/bti/compiler.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin <Dave.Martin@arm.com> + */ + +#ifndef COMPILER_H +#define COMPILER_H + +#define __always_unused __attribute__((__unused__)) +#define __noreturn __attribute__((__noreturn__)) +#define __unreachable() __builtin_unreachable() + +/* curse(e) has value e, but the compiler cannot assume so */ +#define curse(e) ({ \ + __typeof__(e) __curse_e = (e); \ + asm ("" : "+r" (__curse_e)); \ + __curse_e; \ +}) + +#endif /* ! COMPILER_H */ diff --git a/tools/testing/selftests/arm64/bti/gen/.gitignore b/tools/testing/selftests/arm64/bti/gen/.gitignore new file mode 100644 index 000000000000..73869fabada4 --- /dev/null +++ b/tools/testing/selftests/arm64/bti/gen/.gitignore @@ -0,0 +1,2 @@ +btitest +nobtitest diff --git a/tools/testing/selftests/arm64/bti/signal.c b/tools/testing/selftests/arm64/bti/signal.c new file mode 100644 index 000000000000..f3fd29b91141 --- /dev/null +++ b/tools/testing/selftests/arm64/bti/signal.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin <Dave.Martin@arm.com> + */ + +#include "system.h" +#include "signal.h" + +int sigemptyset(sigset_t *s) +{ + unsigned int i; + + for (i = 0; i < _NSIG_WORDS; ++i) + s->sig[i] = 0; + + return 0; +} + +int sigaddset(sigset_t *s, int n) +{ + if (n < 1 || n > _NSIG) + return -EINVAL; + + s->sig[(n - 1) / _NSIG_BPW] |= 1UL << (n - 1) % _NSIG_BPW; + return 0; +} + +int sigaction(int n, struct sigaction *sa, const struct sigaction *old) +{ + return syscall(__NR_rt_sigaction, n, sa, old, sizeof(sa->sa_mask)); +} + +int sigprocmask(int how, const sigset_t *mask, sigset_t *old) +{ + return syscall(__NR_rt_sigprocmask, how, mask, old, sizeof(*mask)); +} diff --git a/tools/testing/selftests/arm64/bti/signal.h b/tools/testing/selftests/arm64/bti/signal.h new file mode 100644 index 000000000000..103457dc880e --- /dev/null +++ b/tools/testing/selftests/arm64/bti/signal.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin <Dave.Martin@arm.com> + */ + +#ifndef SIGNAL_H +#define SIGNAL_H + +#include <linux/signal.h> + +#include "system.h" + +typedef __sighandler_t sighandler_t; + +int sigemptyset(sigset_t *s); +int sigaddset(sigset_t *s, int n); +int sigaction(int n, struct sigaction *sa, const struct sigaction *old); +int sigprocmask(int how, const sigset_t *mask, sigset_t *old); + +#endif /* ! SIGNAL_H */ diff --git a/tools/testing/selftests/arm64/bti/start.S b/tools/testing/selftests/arm64/bti/start.S new file mode 100644 index 000000000000..831f952e0572 --- /dev/null +++ b/tools/testing/selftests/arm64/bti/start.S @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin <Dave.Martin@arm.com> + */ + +#include "assembler.h" + +startfn _start + mov x0, sp + b start +endfn + +emit_aarch64_feature_1_and diff --git a/tools/testing/selftests/arm64/bti/syscall.S b/tools/testing/selftests/arm64/bti/syscall.S new file mode 100644 index 000000000000..8dde8b6f3db1 --- /dev/null +++ b/tools/testing/selftests/arm64/bti/syscall.S @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin <Dave.Martin@arm.com> + */ + +#include "assembler.h" + +startfn syscall + bti c + mov w8, w0 + mov x0, x1 + mov x1, x2 + mov x2, x3 + mov x3, x4 + mov x4, x5 + mov x5, x6 + mov x6, x7 + svc #0 + ret +endfn + +emit_aarch64_feature_1_and diff --git a/tools/testing/selftests/arm64/bti/system.c b/tools/testing/selftests/arm64/bti/system.c new file mode 100644 index 000000000000..6385d8d4973b --- /dev/null +++ b/tools/testing/selftests/arm64/bti/system.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin <Dave.Martin@arm.com> + */ + +#include "system.h" + +#include <asm/unistd.h> + +#include "compiler.h" + +void __noreturn exit(int n) +{ + syscall(__NR_exit, n); + __unreachable(); +} + +ssize_t write(int fd, const void *buf, size_t size) +{ + return syscall(__NR_write, fd, buf, size); +} diff --git a/tools/testing/selftests/arm64/bti/system.h b/tools/testing/selftests/arm64/bti/system.h new file mode 100644 index 000000000000..aca118589705 --- /dev/null +++ b/tools/testing/selftests/arm64/bti/system.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin <Dave.Martin@arm.com> + */ + +#ifndef SYSTEM_H +#define SYSTEM_H + +#include <linux/types.h> +#include <linux/stddef.h> + +typedef __kernel_size_t size_t; +typedef __kernel_ssize_t ssize_t; + +#include <linux/errno.h> +#include <asm/hwcap.h> +#include <asm/ptrace.h> +#include <asm/unistd.h> + +#include "compiler.h" + +long syscall(int nr, ...); + +void __noreturn exit(int n); +ssize_t write(int fd, const void *buf, size_t size); + +#endif /* ! SYSTEM_H */ diff --git a/tools/testing/selftests/arm64/bti/test.c b/tools/testing/selftests/arm64/bti/test.c new file mode 100644 index 000000000000..656b04976ccc --- /dev/null +++ b/tools/testing/selftests/arm64/bti/test.c @@ -0,0 +1,234 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019,2021 Arm Limited + * Original author: Dave Martin <Dave.Martin@arm.com> + */ + +#include "system.h" + +#include <linux/errno.h> +#include <linux/auxvec.h> +#include <linux/signal.h> +#include <asm/sigcontext.h> +#include <asm/ucontext.h> + +typedef struct ucontext ucontext_t; + +#include "btitest.h" +#include "compiler.h" +#include "signal.h" + +#define EXPECTED_TESTS 18 + +static volatile unsigned int test_num = 1; +static unsigned int test_passed; +static unsigned int test_failed; +static unsigned int test_skipped; + +static void fdputs(int fd, const char *str) +{ + size_t len = 0; + const char *p = str; + + while (*p++) + ++len; + + write(fd, str, len); +} + +static void putstr(const char *str) +{ + fdputs(1, str); +} + +static void putnum(unsigned int num) +{ + char c; + + if (num / 10) + putnum(num / 10); + + c = '0' + (num % 10); + write(1, &c, 1); +} + +#define puttestname(test_name, trampoline_name) do { \ + putstr(test_name); \ + putstr("/"); \ + putstr(trampoline_name); \ +} while (0) + +void print_summary(void) +{ + putstr("# Totals: pass:"); + putnum(test_passed); + putstr(" fail:"); + putnum(test_failed); + putstr(" xfail:0 xpass:0 skip:"); + putnum(test_skipped); + putstr(" error:0\n"); +} + +static const char *volatile current_test_name; +static const char *volatile current_trampoline_name; +static volatile int sigill_expected, sigill_received; + +static void handler(int n, siginfo_t *si __always_unused, + void *uc_ __always_unused) +{ + ucontext_t *uc = uc_; + + putstr("# \t[SIGILL in "); + puttestname(current_test_name, current_trampoline_name); + putstr(", BTYPE="); + write(1, &"00011011"[((uc->uc_mcontext.pstate & PSR_BTYPE_MASK) + >> PSR_BTYPE_SHIFT) * 2], 2); + if (!sigill_expected) { + putstr("]\n"); + putstr("not ok "); + putnum(test_num); + putstr(" "); + puttestname(current_test_name, current_trampoline_name); + putstr("(unexpected SIGILL)\n"); + print_summary(); + exit(128 + n); + } + + putstr(" (expected)]\n"); + sigill_received = 1; + /* zap BTYPE so that resuming the faulting code will work */ + uc->uc_mcontext.pstate &= ~PSR_BTYPE_MASK; +} + +static int skip_all; + +static void __do_test(void (*trampoline)(void (*)(void)), + void (*fn)(void), + const char *trampoline_name, + const char *name, + int expect_sigill) +{ + if (skip_all) { + test_skipped++; + putstr("ok "); + putnum(test_num); + putstr(" "); + puttestname(name, trampoline_name); + putstr(" # SKIP\n"); + + return; + } + + /* Branch Target exceptions should only happen in BTI binaries: */ + if (!BTI) + expect_sigill = 0; + + sigill_expected = expect_sigill; + sigill_received = 0; + current_test_name = name; + current_trampoline_name = trampoline_name; + + trampoline(fn); + + if (expect_sigill && !sigill_received) { + putstr("not ok "); + test_failed++; + } else { + putstr("ok "); + test_passed++; + } + putnum(test_num++); + putstr(" "); + puttestname(name, trampoline_name); + putstr("\n"); +} + +#define do_test(expect_sigill_br_x0, \ + expect_sigill_br_x16, \ + expect_sigill_blr, \ + name) \ +do { \ + __do_test(call_using_br_x0, name, "call_using_br_x0", #name, \ + expect_sigill_br_x0); \ + __do_test(call_using_br_x16, name, "call_using_br_x16", #name, \ + expect_sigill_br_x16); \ + __do_test(call_using_blr, name, "call_using_blr", #name, \ + expect_sigill_blr); \ +} while (0) + +void start(int *argcp) +{ + struct sigaction sa; + void *const *p; + const struct auxv_entry { + unsigned long type; + unsigned long val; + } *auxv; + unsigned long hwcap = 0, hwcap2 = 0; + + putstr("TAP version 13\n"); + putstr("1.."); + putnum(EXPECTED_TESTS); + putstr("\n"); + + /* Gross hack for finding AT_HWCAP2 from the initial process stack: */ + p = (void *const *)argcp + 1 + *argcp + 1; /* start of environment */ + /* step over environment */ + while (*p++) + ; + for (auxv = (const struct auxv_entry *)p; auxv->type != AT_NULL; ++auxv) { + switch (auxv->type) { + case AT_HWCAP: + hwcap = auxv->val; + break; + case AT_HWCAP2: + hwcap2 = auxv->val; + break; + default: + break; + } + } + + if (hwcap & HWCAP_PACA) + putstr("# HWCAP_PACA present\n"); + else + putstr("# HWCAP_PACA not present\n"); + + if (hwcap2 & HWCAP2_BTI) { + putstr("# HWCAP2_BTI present\n"); + if (!(hwcap & HWCAP_PACA)) + putstr("# Bad hardware? Expect problems.\n"); + } else { + putstr("# HWCAP2_BTI not present\n"); + skip_all = 1; + } + + putstr("# Test binary"); + if (!BTI) + putstr(" not"); + putstr(" built for BTI\n"); + + sa.sa_handler = (sighandler_t)(void *)handler; + sa.sa_flags = SA_SIGINFO; + sigemptyset(&sa.sa_mask); + sigaction(SIGILL, &sa, NULL); + sigaddset(&sa.sa_mask, SIGILL); + sigprocmask(SIG_UNBLOCK, &sa.sa_mask, NULL); + + do_test(1, 1, 1, nohint_func); + do_test(1, 1, 1, bti_none_func); + do_test(1, 0, 0, bti_c_func); + do_test(0, 0, 1, bti_j_func); + do_test(0, 0, 0, bti_jc_func); + do_test(1, 0, 0, paciasp_func); + + print_summary(); + + if (test_num - 1 != EXPECTED_TESTS) + putstr("# WARNING - EXPECTED TEST COUNT WRONG\n"); + + if (test_failed) + exit(1); + else + exit(0); +} diff --git a/tools/testing/selftests/arm64/bti/teststubs.S b/tools/testing/selftests/arm64/bti/teststubs.S new file mode 100644 index 000000000000..b62c8c35f67e --- /dev/null +++ b/tools/testing/selftests/arm64/bti/teststubs.S @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin <Dave.Martin@arm.com> + */ + +#include "assembler.h" + +startfn bti_none_func + bti + ret +endfn + +startfn bti_c_func + bti c + ret +endfn + +startfn bti_j_func + bti j + ret +endfn + +startfn bti_jc_func + bti jc + ret +endfn + +startfn paciasp_func + paciasp + autiasp + ret +endfn + +startfn nohint_func + ret +endfn + +emit_aarch64_feature_1_and diff --git a/tools/testing/selftests/arm64/bti/trampoline.S b/tools/testing/selftests/arm64/bti/trampoline.S new file mode 100644 index 000000000000..09beb3f361f1 --- /dev/null +++ b/tools/testing/selftests/arm64/bti/trampoline.S @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin <Dave.Martin@arm.com> + */ + +#include "assembler.h" + +startfn call_using_br_x0 + bti c + br x0 +endfn + +startfn call_using_br_x16 + bti c + mov x16, x0 + br x16 +endfn + +startfn call_using_blr + paciasp + stp x29, x30, [sp, #-16]! + blr x0 + ldp x29, x30, [sp], #16 + autiasp + ret +endfn + +emit_aarch64_feature_1_and diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S index 9210691aa998..e3e08d9c7020 100644 --- a/tools/testing/selftests/arm64/fp/sve-test.S +++ b/tools/testing/selftests/arm64/fp/sve-test.S @@ -284,16 +284,28 @@ endfunction // Set up test pattern in the FFR // x0: pid // x2: generation +// +// We need to generate a canonical FFR value, which consists of a number of +// low "1" bits, followed by a number of zeros. This gives us 17 unique values +// per 16 bits of FFR, so we create a 4 bit signature out of the PID and +// generation, and use that as the initial number of ones in the pattern. +// We fill the upper lanes of FFR with zeros. // Beware: corrupts P0. function setup_ffr mov x4, x30 - bl pattern + and w0, w0, #0x3 + bfi w0, w2, #2, #2 + mov w1, #1 + lsl w1, w1, w0 + sub w1, w1, #1 + ldr x0, =ffrref - ldr x1, =scratch - rdvl x2, #1 - lsr x2, x2, #3 - bl memcpy + strh w1, [x0], 2 + rdvl x1, #1 + lsr x1, x1, #3 + sub x1, x1, #2 + bl memclr mov x0, #0 ldr x1, =ffrref diff --git a/tools/testing/selftests/arm64/mte/Makefile b/tools/testing/selftests/arm64/mte/Makefile index 0b3af552632a..409e3e53d00a 100644 --- a/tools/testing/selftests/arm64/mte/Makefile +++ b/tools/testing/selftests/arm64/mte/Makefile @@ -1,14 +1,18 @@ # SPDX-License-Identifier: GPL-2.0 # Copyright (C) 2020 ARM Limited -CFLAGS += -std=gnu99 -I. -lpthread +# preserve CC value from top level Makefile +ifeq ($(CC),cc) +CC := $(CROSS_COMPILE)gcc +endif + +CFLAGS += -std=gnu99 -I. -pthread +LDFLAGS += -pthread SRCS := $(filter-out mte_common_util.c,$(wildcard *.c)) PROGS := $(patsubst %.c,%,$(SRCS)) #Add mte compiler option -ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep gcc),) CFLAGS += -march=armv8.5-a+memtag -endif #check if the compiler works well mte_cc_support := $(shell if ($(CC) $(CFLAGS) -E -x c /dev/null -o /dev/null 2>&1) then echo "1"; fi) @@ -19,11 +23,14 @@ TEST_GEN_PROGS := $(PROGS) # Get Kernel headers installed and use them. KSFT_KHDR_INSTALL := 1 +else + $(warning compiler "$(CC)" does not support the ARMv8.5 MTE extension.) + $(warning test program "mte" will not be created.) endif # Include KSFT lib.mk. include ../../lib.mk ifeq ($(mte_cc_support),1) -$(TEST_GEN_PROGS): mte_common_util.c mte_common_util.h mte_helper.S +$(TEST_GEN_PROGS): mte_common_util.c mte_helper.S endif diff --git a/tools/testing/selftests/arm64/mte/check_ksm_options.c b/tools/testing/selftests/arm64/mte/check_ksm_options.c index 3b23c4d61d38..88c74bc46d4f 100644 --- a/tools/testing/selftests/arm64/mte/check_ksm_options.c +++ b/tools/testing/selftests/arm64/mte/check_ksm_options.c @@ -33,7 +33,10 @@ static unsigned long read_sysfs(char *str) ksft_print_msg("ERR: missing %s\n", str); return 0; } - fscanf(f, "%lu", &val); + if (fscanf(f, "%lu", &val) != 1) { + ksft_print_msg("ERR: parsing %s\n", str); + val = 0; + } fclose(f); return val; } diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c index 4bfa80f2a8c3..1de7a0abd0ae 100644 --- a/tools/testing/selftests/arm64/mte/check_user_mem.c +++ b/tools/testing/selftests/arm64/mte/check_user_mem.c @@ -33,7 +33,8 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping) if (fd == -1) return KSFT_FAIL; for (i = 0; i < len; i++) - write(fd, &val, sizeof(val)); + if (write(fd, &val, sizeof(val)) != sizeof(val)) + return KSFT_FAIL; lseek(fd, 0, 0); ptr = mte_allocate_memory(len, mem_type, mapping, true); if (check_allocated_memory(ptr, len, mem_type, true) != KSFT_PASS) { diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c index 39f8908988ea..f50ac31920d1 100644 --- a/tools/testing/selftests/arm64/mte/mte_common_util.c +++ b/tools/testing/selftests/arm64/mte/mte_common_util.c @@ -181,10 +181,17 @@ void *mte_allocate_file_memory(size_t size, int mem_type, int mapping, bool tags } /* Initialize the file for mappable size */ lseek(fd, 0, SEEK_SET); - for (index = INIT_BUFFER_SIZE; index < size; index += INIT_BUFFER_SIZE) - write(fd, buffer, INIT_BUFFER_SIZE); + for (index = INIT_BUFFER_SIZE; index < size; index += INIT_BUFFER_SIZE) { + if (write(fd, buffer, INIT_BUFFER_SIZE) != INIT_BUFFER_SIZE) { + perror("initialising buffer"); + return NULL; + } + } index -= INIT_BUFFER_SIZE; - write(fd, buffer, size - index); + if (write(fd, buffer, size - index) != size - index) { + perror("initialising buffer"); + return NULL; + } return __mte_allocate_memory_range(size, mem_type, mapping, 0, 0, tags, fd); } @@ -202,9 +209,15 @@ void *mte_allocate_file_memory_tag_range(size_t size, int mem_type, int mapping, /* Initialize the file for mappable size */ lseek(fd, 0, SEEK_SET); for (index = INIT_BUFFER_SIZE; index < map_size; index += INIT_BUFFER_SIZE) - write(fd, buffer, INIT_BUFFER_SIZE); + if (write(fd, buffer, INIT_BUFFER_SIZE) != INIT_BUFFER_SIZE) { + perror("initialising buffer"); + return NULL; + } index -= INIT_BUFFER_SIZE; - write(fd, buffer, map_size - index); + if (write(fd, buffer, map_size - index) != map_size - index) { + perror("initialising buffer"); + return NULL; + } return __mte_allocate_memory_range(size, mem_type, mapping, range_before, range_after, true, fd); } @@ -271,29 +284,20 @@ int mte_switch_mode(int mte_option, unsigned long incl_mask) en |= (incl_mask << PR_MTE_TAG_SHIFT); /* Enable address tagging ABI, mte error reporting mode and tag inclusion mask. */ - if (!prctl(PR_SET_TAGGED_ADDR_CTRL, en, 0, 0, 0) == 0) { + if (prctl(PR_SET_TAGGED_ADDR_CTRL, en, 0, 0, 0) != 0) { ksft_print_msg("FAIL:prctl PR_SET_TAGGED_ADDR_CTRL for mte mode\n"); return -EINVAL; } return 0; } -#define ID_AA64PFR1_MTE_SHIFT 8 -#define ID_AA64PFR1_MTE 2 - int mte_default_setup(void) { - unsigned long hwcaps = getauxval(AT_HWCAP); + unsigned long hwcaps2 = getauxval(AT_HWCAP2); unsigned long en = 0; int ret; - if (!(hwcaps & HWCAP_CPUID)) { - ksft_print_msg("FAIL: CPUID registers unavailable\n"); - return KSFT_FAIL; - } - /* Read ID_AA64PFR1_EL1 register */ - asm volatile("mrs %0, id_aa64pfr1_el1" : "=r"(hwcaps) : : "memory"); - if (((hwcaps >> ID_AA64PFR1_MTE_SHIFT) & MT_TAG_MASK) != ID_AA64PFR1_MTE) { + if (!(hwcaps2 & HWCAP2_MTE)) { ksft_print_msg("FAIL: MTE features unavailable\n"); return KSFT_SKIP; } @@ -333,6 +337,7 @@ int create_temp_file(void) /* Create a file in the tmpfs filesystem */ fd = mkstemp(&filename[0]); if (fd == -1) { + perror(filename); ksft_print_msg("FAIL: Unable to open temporary file\n"); return 0; } diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index c0c48fdb9ac1..4866f6a21901 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -1,4 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only +bpf-helpers* +bpf-syscall* test_verifier test_maps test_lru_map diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 044bfdcf5b74..511259c2c6c5 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -include ../../../../scripts/Kbuild.include +include ../../../build/Build.include include ../../../scripts/Makefile.arch include ../../../scripts/Makefile.include @@ -21,13 +21,18 @@ endif BPF_GCC ?= $(shell command -v bpf-gcc;) SAN_CFLAGS ?= -CFLAGS += -g -rdynamic -Wall -O2 $(GENFLAGS) $(SAN_CFLAGS) \ +CFLAGS += -g -O0 -rdynamic -Wall $(GENFLAGS) $(SAN_CFLAGS) \ -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \ -I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT) \ -Dbpf_prog_load=bpf_prog_test_load \ -Dbpf_load_program=bpf_test_load_program LDLIBS += -lcap -lelf -lz -lrt -lpthread +# Silence some warnings when compiled with clang +ifneq ($(LLVM),) +CFLAGS += -Wno-unused-command-line-argument +endif + # Order correspond to 'make run_tests' order TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ test_verifier_log test_dev_cgroup \ @@ -68,6 +73,7 @@ TEST_PROGS := test_kmod.sh \ test_bpftool_build.sh \ test_bpftool.sh \ test_bpftool_metadata.sh \ + test_doc_build.sh \ test_xsk.sh TEST_PROGS_EXTENDED := with_addr.sh \ @@ -103,6 +109,7 @@ override define CLEAN $(call msg,CLEAN) $(Q)$(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN) $(Q)$(MAKE) -C bpf_testmod clean + $(Q)$(MAKE) docs-clean endef include ../lib.mk @@ -198,18 +205,27 @@ $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/bpftool $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \ CC=$(HOSTCC) LD=$(HOSTLD) \ + EXTRA_CFLAGS='-g -O0' \ OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \ prefix= DESTDIR=$(HOST_SCRATCH_DIR)/ install - $(Q)mkdir -p $(BUILD_DIR)/bpftool/Documentation - $(Q)RST2MAN_OPTS="--exit-status=1" $(MAKE) $(submake_extras) \ - -C $(BPFTOOLDIR)/Documentation \ - OUTPUT=$(BUILD_DIR)/bpftool/Documentation/ \ - prefix= DESTDIR=$(SCRATCH_DIR)/ install + +all: docs + +docs: + $(Q)RST2MAN_OPTS="--exit-status=1" $(MAKE) $(submake_extras) \ + -f Makefile.docs \ + prefix= OUTPUT=$(OUTPUT)/ DESTDIR=$(OUTPUT)/ $@ + +docs-clean: + $(Q)$(MAKE) $(submake_extras) \ + -f Makefile.docs \ + prefix= OUTPUT=$(OUTPUT)/ DESTDIR=$(OUTPUT)/ $@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ ../../../include/uapi/linux/bpf.h \ | $(INCLUDE_DIR) $(BUILD_DIR)/libbpf $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \ + EXTRA_CFLAGS='-g -O0' \ DESTDIR=$(SCRATCH_DIR) prefix= all install_headers ifneq ($(BPFOBJ),$(HOST_BPFOBJ)) @@ -217,11 +233,12 @@ $(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ ../../../include/uapi/linux/bpf.h \ | $(INCLUDE_DIR) $(HOST_BUILD_DIR)/libbpf $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) \ - OUTPUT=$(HOST_BUILD_DIR)/libbpf/ CC=$(HOSTCC) LD=$(HOSTLD) \ + EXTRA_CFLAGS='-g -O0' \ + OUTPUT=$(HOST_BUILD_DIR)/libbpf/ CC=$(HOSTCC) LD=$(HOSTLD) \ DESTDIR=$(HOST_SCRATCH_DIR)/ prefix= all install_headers endif -$(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) | $(BPFTOOL) $(INCLUDE_DIR) +$(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL) | $(INCLUDE_DIR) ifeq ($(VMLINUX_H),) $(call msg,GEN,,$@) $(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@ @@ -292,6 +309,16 @@ endef SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c +LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \ + linked_vars.skel.h linked_maps.skel.h + +test_static_linked.skel.h-deps := test_static_linked1.o test_static_linked2.o +linked_funcs.skel.h-deps := linked_funcs1.o linked_funcs2.o +linked_vars.skel.h-deps := linked_vars1.o linked_vars2.o +linked_maps.skel.h-deps := linked_maps1.o linked_maps2.o + +LINKED_BPF_SRCS := $(patsubst %.o,%.c,$(foreach skel,$(LINKED_SKELS),$($(skel)-deps))) + # Set up extra TRUNNER_XXX "temporary" variables in the environment (relies on # $eval()) and pass control to DEFINE_TEST_RUNNER_RULES. # Parameters: @@ -310,8 +337,9 @@ TRUNNER_TESTS_HDR := $(TRUNNER_TESTS_DIR)/tests.h TRUNNER_BPF_SRCS := $$(notdir $$(wildcard $(TRUNNER_BPF_PROGS_DIR)/*.c)) TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, $$(TRUNNER_BPF_SRCS)) TRUNNER_BPF_SKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.skel.h, \ - $$(filter-out $(SKEL_BLACKLIST), \ + $$(filter-out $(SKEL_BLACKLIST) $(LINKED_BPF_SRCS),\ $$(TRUNNER_BPF_SRCS))) +TRUNNER_BPF_SKELS_LINKED := $$(addprefix $$(TRUNNER_OUTPUT)/,$(LINKED_SKELS)) TEST_GEN_FILES += $$(TRUNNER_BPF_OBJS) # Evaluate rules now with extra TRUNNER_XXX variables above already defined @@ -344,11 +372,22 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o: \ $$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \ $(TRUNNER_BPF_CFLAGS)) -$(TRUNNER_BPF_SKELS): $(TRUNNER_OUTPUT)/%.skel.h: \ - $(TRUNNER_OUTPUT)/%.o \ - | $(BPFTOOL) $(TRUNNER_OUTPUT) +$(TRUNNER_BPF_SKELS): %.skel.h: %.o $(BPFTOOL) | $(TRUNNER_OUTPUT) $$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@) - $(Q)$$(BPFTOOL) gen skeleton $$< > $$@ + $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked1.o) $$< + $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked2.o) $$(<:.o=.linked1.o) + $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked3.o) $$(<:.o=.linked2.o) + $(Q)diff $$(<:.o=.linked2.o) $$(<:.o=.linked3.o) + $(Q)$$(BPFTOOL) gen skeleton $$(<:.o=.linked3.o) name $$(notdir $$(<:.o=)) > $$@ + +$(TRUNNER_BPF_SKELS_LINKED): $(TRUNNER_BPF_OBJS) $(BPFTOOL) | $(TRUNNER_OUTPUT) + $$(call msg,LINK-BPF,$(TRUNNER_BINARY),$$(@:.skel.h=.o)) + $(Q)$$(BPFTOOL) gen object $$(@:.skel.h=.linked1.o) $$(addprefix $(TRUNNER_OUTPUT)/,$$($$(@F)-deps)) + $(Q)$$(BPFTOOL) gen object $$(@:.skel.h=.linked2.o) $$(@:.skel.h=.linked1.o) + $(Q)$$(BPFTOOL) gen object $$(@:.skel.h=.linked3.o) $$(@:.skel.h=.linked2.o) + $(Q)diff $$(@:.skel.h=.linked2.o) $$(@:.skel.h=.linked3.o) + $$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@) + $(Q)$$(BPFTOOL) gen skeleton $$(@:.skel.h=.linked3.o) name $$(notdir $$(@:.skel.h=)) > $$@ endif # ensure we set up tests.h header generation rule just once @@ -370,6 +409,7 @@ $(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o: \ $(TRUNNER_EXTRA_HDRS) \ $(TRUNNER_BPF_OBJS) \ $(TRUNNER_BPF_SKELS) \ + $(TRUNNER_BPF_SKELS_LINKED) \ $$(BPFOBJ) | $(TRUNNER_OUTPUT) $$(call msg,TEST-OBJ,$(TRUNNER_BINARY),$$@) $(Q)cd $$(@D) && $$(CC) -I. $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F) @@ -382,11 +422,12 @@ $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \ $$(call msg,EXT-OBJ,$(TRUNNER_BINARY),$$@) $(Q)$$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@ -# only copy extra resources if in flavored build +# non-flavored in-srctree builds receive special treatment, in particular, we +# do not need to copy extra resources (see e.g. test_btf_dump_case()) $(TRUNNER_BINARY)-extras: $(TRUNNER_EXTRA_FILES) | $(TRUNNER_OUTPUT) -ifneq ($2,) +ifneq ($2:$(OUTPUT),:$(shell pwd)) $$(call msg,EXT-COPY,$(TRUNNER_BINARY),$(TRUNNER_EXTRA_FILES)) - $(Q)cp -a $$^ $(TRUNNER_OUTPUT)/ + $(Q)rsync -aq $$^ $(TRUNNER_OUTPUT)/ endif $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \ @@ -452,7 +493,7 @@ $(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT) # Make sure we are able to include and link libbpf against c++. $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ) $(call msg,CXX,,$@) - $(Q)$(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@ + $(Q)$(CXX) $(CFLAGS) $(filter %.a %.o %.cpp,$^) $(LDLIBS) -o $@ # Benchmark runner $(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h @@ -476,3 +517,5 @@ EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \ prog_tests/tests.h map_tests/tests.h verifier/tests.h \ feature \ $(addprefix $(OUTPUT)/,*.o *.skel.h no_alu32 bpf_gcc bpf_testmod.ko) + +.PHONY: docs docs-clean diff --git a/tools/testing/selftests/bpf/Makefile.docs b/tools/testing/selftests/bpf/Makefile.docs new file mode 100644 index 000000000000..ccf260021e83 --- /dev/null +++ b/tools/testing/selftests/bpf/Makefile.docs @@ -0,0 +1,82 @@ +# SPDX-License-Identifier: GPL-2.0-only + +include ../../../scripts/Makefile.include +include ../../../scripts/utilities.mak + +INSTALL ?= install +RM ?= rm -f +RMDIR ?= rmdir --ignore-fail-on-non-empty + +ifeq ($(V),1) + Q = +else + Q = @ +endif + +prefix ?= /usr/local +mandir ?= $(prefix)/man +man2dir = $(mandir)/man2 +man7dir = $(mandir)/man7 + +SYSCALL_RST = bpf-syscall.rst +MAN2_RST = $(SYSCALL_RST) + +HELPERS_RST = bpf-helpers.rst +MAN7_RST = $(HELPERS_RST) + +_DOC_MAN2 = $(patsubst %.rst,%.2,$(MAN2_RST)) +DOC_MAN2 = $(addprefix $(OUTPUT),$(_DOC_MAN2)) + +_DOC_MAN7 = $(patsubst %.rst,%.7,$(MAN7_RST)) +DOC_MAN7 = $(addprefix $(OUTPUT),$(_DOC_MAN7)) + +DOCTARGETS := helpers syscall + +docs: $(DOCTARGETS) +syscall: man2 +helpers: man7 +man2: $(DOC_MAN2) +man7: $(DOC_MAN7) + +RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null) + +# Configure make rules for the man page bpf-$1.$2. +# $1 - target for scripts/bpf_doc.py +# $2 - man page section to generate the troff file +define DOCS_RULES = +$(OUTPUT)bpf-$1.rst: ../../../../include/uapi/linux/bpf.h + $$(QUIET_GEN)../../../../scripts/bpf_doc.py $1 \ + --filename $$< > $$@ + +$(OUTPUT)%.$2: $(OUTPUT)%.rst +ifndef RST2MAN_DEP + $$(error "rst2man not found, but required to generate man pages") +endif + $$(QUIET_GEN)rst2man $$< > $$@ + +docs-clean-$1: + $$(call QUIET_CLEAN, eBPF_$1-manpage) + $(Q)$(RM) $$(DOC_MAN$2) $(OUTPUT)bpf-$1.rst + +docs-install-$1: docs + $$(call QUIET_INSTALL, eBPF_$1-manpage) + $(Q)$(INSTALL) -d -m 755 $(DESTDIR)$$(man$2dir) + $(Q)$(INSTALL) -m 644 $$(DOC_MAN$2) $(DESTDIR)$$(man$2dir) + +docs-uninstall-$1: + $$(call QUIET_UNINST, eBPF_$1-manpage) + $(Q)$(RM) $$(addprefix $(DESTDIR)$$(man$2dir)/,$$(_DOC_MAN$2)) + $(Q)$(RMDIR) $(DESTDIR)$$(man$2dir) + +.PHONY: $1 docs-clean-$1 docs-install-$1 docs-uninstall-$1 +endef + +# Create the make targets to generate manual pages by name and section +$(eval $(call DOCS_RULES,helpers,7)) +$(eval $(call DOCS_RULES,syscall,2)) + +docs-clean: $(foreach doctarget,$(DOCTARGETS), docs-clean-$(doctarget)) +docs-install: $(foreach doctarget,$(DOCTARGETS), docs-install-$(doctarget)) +docs-uninstall: $(foreach doctarget,$(DOCTARGETS), docs-uninstall-$(doctarget)) + +.PHONY: docs docs-clean docs-install docs-uninstall man2 man7 diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst index fd148b8410fa..3353778c30f8 100644 --- a/tools/testing/selftests/bpf/README.rst +++ b/tools/testing/selftests/bpf/README.rst @@ -111,6 +111,45 @@ available in 10.0.1. The patch is available in llvm 11.0.0 trunk. __ https://reviews.llvm.org/D78466 +bpf_verif_scale/loop6.o test failure with Clang 12 +================================================== + +With Clang 12, the following bpf_verif_scale test failed: + * ``bpf_verif_scale/loop6.o`` + +The verifier output looks like + +.. code-block:: c + + R1 type=ctx expected=fp + The sequence of 8193 jumps is too complex. + +The reason is compiler generating the following code + +.. code-block:: c + + ; for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num); i++) { + 14: 16 05 40 00 00 00 00 00 if w5 == 0 goto +64 <LBB0_6> + 15: bc 51 00 00 00 00 00 00 w1 = w5 + 16: 04 01 00 00 ff ff ff ff w1 += -1 + 17: 67 05 00 00 20 00 00 00 r5 <<= 32 + 18: 77 05 00 00 20 00 00 00 r5 >>= 32 + 19: a6 01 01 00 05 00 00 00 if w1 < 5 goto +1 <LBB0_4> + 20: b7 05 00 00 06 00 00 00 r5 = 6 + 00000000000000a8 <LBB0_4>: + 21: b7 02 00 00 00 00 00 00 r2 = 0 + 22: b7 01 00 00 00 00 00 00 r1 = 0 + ; for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num); i++) { + 23: 7b 1a e0 ff 00 00 00 00 *(u64 *)(r10 - 32) = r1 + 24: 7b 5a c0 ff 00 00 00 00 *(u64 *)(r10 - 64) = r5 + +Note that insn #15 has w1 = w5 and w1 is refined later but +r5(w5) is eventually saved on stack at insn #24 for later use. +This cause later verifier failure. The bug has been `fixed`__ in +Clang 13. + +__ https://reviews.llvm.org/D97479 + BPF CO-RE-based tests and Clang version ======================================= @@ -131,3 +170,35 @@ failures: .. _2: https://reviews.llvm.org/D85174 .. _3: https://reviews.llvm.org/D83878 .. _4: https://reviews.llvm.org/D83242 + +Floating-point tests and Clang version +====================================== + +Certain selftests, e.g. core_reloc, require support for the floating-point +types, which was introduced in `Clang 13`__. The older Clang versions will +either crash when compiling these tests, or generate an incorrect BTF. + +__ https://reviews.llvm.org/D83289 + +Kernel function call test and Clang version +=========================================== + +Some selftests (e.g. kfunc_call and bpf_tcp_ca) require a LLVM support +to generate extern function in BTF. It was introduced in `Clang 13`__. + +Without it, the error from compiling bpf selftests looks like: + +.. code-block:: console + + libbpf: failed to find BTF for extern 'tcp_slow_start' [25] section: -2 + +__ https://reviews.llvm.org/D93563 + +Clang dependencies for static linking tests +=========================================== + +linked_vars, linked_maps, and linked_funcs tests depend on `Clang fix`__ to +generate valid BTF information for weak variables. Please make sure you use +Clang that contains the fix. + +__ https://reviews.llvm.org/D100362 diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h index 91f0fac632f4..029589c008c9 100644 --- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h +++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h @@ -187,16 +187,6 @@ struct tcp_congestion_ops { typeof(y) __y = (y); \ __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); }) -static __always_inline __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) -{ - __u32 cwnd = min(tp->snd_cwnd + acked, tp->snd_ssthresh); - - acked -= cwnd - tp->snd_cwnd; - tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp); - - return acked; -} - static __always_inline bool tcp_in_slow_start(const struct tcp_sock *tp) { return tp->snd_cwnd < tp->snd_ssthresh; @@ -213,22 +203,7 @@ static __always_inline bool tcp_is_cwnd_limited(const struct sock *sk) return !!BPF_CORE_READ_BITFIELD(tp, is_cwnd_limited); } -static __always_inline void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) -{ - /* If credits accumulated at a higher w, apply them gently now. */ - if (tp->snd_cwnd_cnt >= w) { - tp->snd_cwnd_cnt = 0; - tp->snd_cwnd++; - } - - tp->snd_cwnd_cnt += acked; - if (tp->snd_cwnd_cnt >= w) { - __u32 delta = tp->snd_cwnd_cnt / w; - - tp->snd_cwnd_cnt -= delta * w; - tp->snd_cwnd += delta; - } - tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_cwnd_clamp); -} +extern __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) __ksym; +extern void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) __ksym; #endif diff --git a/tools/testing/selftests/bpf/btf_helpers.c b/tools/testing/selftests/bpf/btf_helpers.c index 48f90490f922..b692e6ead9b5 100644 --- a/tools/testing/selftests/bpf/btf_helpers.c +++ b/tools/testing/selftests/bpf/btf_helpers.c @@ -23,6 +23,7 @@ static const char * const btf_kind_str_mapping[] = { [BTF_KIND_FUNC_PROTO] = "FUNC_PROTO", [BTF_KIND_VAR] = "VAR", [BTF_KIND_DATASEC] = "DATASEC", + [BTF_KIND_FLOAT] = "FLOAT", }; static const char *btf_kind_str(__u16 kind) @@ -173,6 +174,9 @@ int fprintf_btf_type_raw(FILE *out, const struct btf *btf, __u32 id) } break; } + case BTF_KIND_FLOAT: + fprintf(out, " size=%u", t->size); + break; default: break; } diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 37e1f303fc11..5192305159ec 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -44,3 +44,5 @@ CONFIG_SECURITYFS=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA_READ_POLICY=y CONFIG_BLK_DEV_LOOP=y +CONFIG_FUNCTION_TRACER=y +CONFIG_DYNAMIC_FTRACE=y diff --git a/tools/testing/selftests/bpf/get_cgroup_id_user.c b/tools/testing/selftests/bpf/get_cgroup_id_user.c index b8d6aef99db4..99628e1a1e58 100644 --- a/tools/testing/selftests/bpf/get_cgroup_id_user.c +++ b/tools/testing/selftests/bpf/get_cgroup_id_user.c @@ -57,6 +57,10 @@ int main(int argc, char **argv) __u32 key = 0, pid; int exit_code = 1; char buf[256]; + const struct timespec req = { + .tv_sec = 1, + .tv_nsec = 0, + }; cgroup_fd = cgroup_setup_and_join(TEST_CGROUP); if (CHECK(cgroup_fd < 0, "cgroup_setup_and_join", "err %d errno %d\n", cgroup_fd, errno)) @@ -115,7 +119,7 @@ int main(int argc, char **argv) goto close_pmu; /* trigger some syscalls */ - sleep(1); + syscall(__NR_nanosleep, &req, NULL); err = bpf_map_lookup_elem(cgidmap_fd, &key, &kcgid); if (CHECK(err, "bpf_map_lookup_elem", "err %d errno %d\n", err, errno)) diff --git a/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c index f0a64d8ac59a..f4d870da7684 100644 --- a/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c +++ b/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c @@ -9,10 +9,13 @@ #include <test_maps.h> +static int nr_cpus; + static void map_batch_update(int map_fd, __u32 max_entries, int *keys, - int *values) + __s64 *values, bool is_pcpu) { - int i, err; + int i, j, err; + int cpu_offset = 0; DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts, .elem_flags = 0, .flags = 0, @@ -20,22 +23,41 @@ static void map_batch_update(int map_fd, __u32 max_entries, int *keys, for (i = 0; i < max_entries; i++) { keys[i] = i; - values[i] = i + 1; + if (is_pcpu) { + cpu_offset = i * nr_cpus; + for (j = 0; j < nr_cpus; j++) + (values + cpu_offset)[j] = i + 1 + j; + } else { + values[i] = i + 1; + } } err = bpf_map_update_batch(map_fd, keys, values, &max_entries, &opts); CHECK(err, "bpf_map_update_batch()", "error:%s\n", strerror(errno)); } -static void map_batch_verify(int *visited, __u32 max_entries, - int *keys, int *values) +static void map_batch_verify(int *visited, __u32 max_entries, int *keys, + __s64 *values, bool is_pcpu) { - int i; + int i, j; + int cpu_offset = 0; memset(visited, 0, max_entries * sizeof(*visited)); for (i = 0; i < max_entries; i++) { - CHECK(keys[i] + 1 != values[i], "key/value checking", - "error: i %d key %d value %d\n", i, keys[i], values[i]); + if (is_pcpu) { + cpu_offset = i * nr_cpus; + for (j = 0; j < nr_cpus; j++) { + __s64 value = (values + cpu_offset)[j]; + CHECK(keys[i] + j + 1 != value, + "key/value checking", + "error: i %d j %d key %d value %lld\n", i, + j, keys[i], value); + } + } else { + CHECK(keys[i] + 1 != values[i], "key/value checking", + "error: i %d key %d value %lld\n", i, keys[i], + values[i]); + } visited[i] = 1; } for (i = 0; i < max_entries; i++) { @@ -44,20 +66,21 @@ static void map_batch_verify(int *visited, __u32 max_entries, } } -void test_array_map_batch_ops(void) +static void __test_map_lookup_and_update_batch(bool is_pcpu) { struct bpf_create_map_attr xattr = { .name = "array_map", - .map_type = BPF_MAP_TYPE_ARRAY, + .map_type = is_pcpu ? BPF_MAP_TYPE_PERCPU_ARRAY : + BPF_MAP_TYPE_ARRAY, .key_size = sizeof(int), - .value_size = sizeof(int), + .value_size = sizeof(__s64), }; - int map_fd, *keys, *values, *visited; + int map_fd, *keys, *visited; __u32 count, total, total_success; const __u32 max_entries = 10; - bool nospace_err; __u64 batch = 0; - int err, step; + int err, step, value_size; + void *values; DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts, .elem_flags = 0, .flags = 0, @@ -68,35 +91,35 @@ void test_array_map_batch_ops(void) CHECK(map_fd == -1, "bpf_create_map_xattr()", "error:%s\n", strerror(errno)); - keys = malloc(max_entries * sizeof(int)); - values = malloc(max_entries * sizeof(int)); - visited = malloc(max_entries * sizeof(int)); + value_size = sizeof(__s64); + if (is_pcpu) + value_size *= nr_cpus; + + keys = calloc(max_entries, sizeof(*keys)); + values = calloc(max_entries, value_size); + visited = calloc(max_entries, sizeof(*visited)); CHECK(!keys || !values || !visited, "malloc()", "error:%s\n", strerror(errno)); - /* populate elements to the map */ - map_batch_update(map_fd, max_entries, keys, values); - /* test 1: lookup in a loop with various steps. */ total_success = 0; for (step = 1; step < max_entries; step++) { - map_batch_update(map_fd, max_entries, keys, values); - map_batch_verify(visited, max_entries, keys, values); + map_batch_update(map_fd, max_entries, keys, values, is_pcpu); + map_batch_verify(visited, max_entries, keys, values, is_pcpu); memset(keys, 0, max_entries * sizeof(*keys)); - memset(values, 0, max_entries * sizeof(*values)); + memset(values, 0, max_entries * value_size); batch = 0; total = 0; /* iteratively lookup/delete elements with 'step' * elements each. */ count = step; - nospace_err = false; while (true) { err = bpf_map_lookup_batch(map_fd, - total ? &batch : NULL, &batch, - keys + total, - values + total, - &count, &opts); + total ? &batch : NULL, + &batch, keys + total, + values + total * value_size, + &count, &opts); CHECK((err && errno != ENOENT), "lookup with steps", "error: %s\n", strerror(errno)); @@ -107,13 +130,10 @@ void test_array_map_batch_ops(void) } - if (nospace_err == true) - continue; - CHECK(total != max_entries, "lookup with steps", "total = %u, max_entries = %u\n", total, max_entries); - map_batch_verify(visited, max_entries, keys, values); + map_batch_verify(visited, max_entries, keys, values, is_pcpu); total_success++; } @@ -121,9 +141,30 @@ void test_array_map_batch_ops(void) CHECK(total_success == 0, "check total_success", "unexpected failure\n"); - printf("%s:PASS\n", __func__); - free(keys); free(values); free(visited); } + +static void array_map_batch_ops(void) +{ + __test_map_lookup_and_update_batch(false); + printf("test_%s:PASS\n", __func__); +} + +static void array_percpu_map_batch_ops(void) +{ + __test_map_lookup_and_update_batch(true); + printf("test_%s:PASS\n", __func__); +} + +void test_array_map_batch_ops(void) +{ + nr_cpus = libbpf_num_possible_cpus(); + + CHECK(nr_cpus < 0, "nr_cpus checking", + "error: get possible cpus failed"); + + array_map_batch_ops(); + array_percpu_map_batch_ops(); +} diff --git a/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c new file mode 100644 index 000000000000..2e986e5e4cac --- /dev/null +++ b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <arpa/inet.h> +#include <linux/bpf.h> +#include <netinet/in.h> +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <stdlib.h> + +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include <test_maps.h> + +struct test_lpm_key { + __u32 prefix; + struct in_addr ipv4; +}; + +static void map_batch_update(int map_fd, __u32 max_entries, + struct test_lpm_key *keys, int *values) +{ + __u32 i; + int err; + char buff[16] = { 0 }; + DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts, + .elem_flags = 0, + .flags = 0, + ); + + for (i = 0; i < max_entries; i++) { + keys[i].prefix = 32; + snprintf(buff, 16, "192.168.1.%d", i + 1); + inet_pton(AF_INET, buff, &keys[i].ipv4); + values[i] = i + 1; + } + + err = bpf_map_update_batch(map_fd, keys, values, &max_entries, &opts); + CHECK(err, "bpf_map_update_batch()", "error:%s\n", strerror(errno)); +} + +static void map_batch_verify(int *visited, __u32 max_entries, + struct test_lpm_key *keys, int *values) +{ + char buff[16] = { 0 }; + int lower_byte = 0; + __u32 i; + + memset(visited, 0, max_entries * sizeof(*visited)); + for (i = 0; i < max_entries; i++) { + inet_ntop(AF_INET, &keys[i].ipv4, buff, 32); + CHECK(sscanf(buff, "192.168.1.%d", &lower_byte) == EOF, + "sscanf()", "error: i %d\n", i); + CHECK(lower_byte != values[i], "key/value checking", + "error: i %d key %s value %d\n", i, buff, values[i]); + visited[i] = 1; + } + for (i = 0; i < max_entries; i++) { + CHECK(visited[i] != 1, "visited checking", + "error: keys array at index %d missing\n", i); + } +} + +void test_lpm_trie_map_batch_ops(void) +{ + struct bpf_create_map_attr xattr = { + .name = "lpm_trie_map", + .map_type = BPF_MAP_TYPE_LPM_TRIE, + .key_size = sizeof(struct test_lpm_key), + .value_size = sizeof(int), + .map_flags = BPF_F_NO_PREALLOC, + }; + struct test_lpm_key *keys, key; + int map_fd, *values, *visited; + __u32 step, count, total, total_success; + const __u32 max_entries = 10; + __u64 batch = 0; + int err; + DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts, + .elem_flags = 0, + .flags = 0, + ); + + xattr.max_entries = max_entries; + map_fd = bpf_create_map_xattr(&xattr); + CHECK(map_fd == -1, "bpf_create_map_xattr()", "error:%s\n", + strerror(errno)); + + keys = malloc(max_entries * sizeof(struct test_lpm_key)); + values = malloc(max_entries * sizeof(int)); + visited = malloc(max_entries * sizeof(int)); + CHECK(!keys || !values || !visited, "malloc()", "error:%s\n", + strerror(errno)); + + total_success = 0; + for (step = 1; step < max_entries; step++) { + map_batch_update(map_fd, max_entries, keys, values); + map_batch_verify(visited, max_entries, keys, values); + memset(keys, 0, max_entries * sizeof(*keys)); + memset(values, 0, max_entries * sizeof(*values)); + batch = 0; + total = 0; + /* iteratively lookup/delete elements with 'step' + * elements each. + */ + count = step; + while (true) { + err = bpf_map_lookup_batch(map_fd, + total ? &batch : NULL, &batch, + keys + total, values + total, &count, &opts); + + CHECK((err && errno != ENOENT), "lookup with steps", + "error: %s\n", strerror(errno)); + + total += count; + if (err) + break; + } + + CHECK(total != max_entries, "lookup with steps", + "total = %u, max_entries = %u\n", total, max_entries); + + map_batch_verify(visited, max_entries, keys, values); + + total = 0; + count = step; + while (total < max_entries) { + if (max_entries - total < step) + count = max_entries - total; + err = bpf_map_delete_batch(map_fd, keys + total, &count, + &opts); + CHECK((err && errno != ENOENT), "delete batch", + "error: %s\n", strerror(errno)); + total += count; + if (err) + break; + } + CHECK(total != max_entries, "delete with steps", + "total = %u, max_entries = %u\n", total, max_entries); + + /* check map is empty, errono == ENOENT */ + err = bpf_map_get_next_key(map_fd, NULL, &key); + CHECK(!err || errno != ENOENT, "bpf_map_get_next_key()", + "error: %s\n", strerror(errno)); + + total_success++; + } + + CHECK(total_success == 0, "check total_success", + "unexpected failure\n"); + + printf("%s:PASS\n", __func__); + + free(keys); + free(values); + free(visited); +} diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c index a0ee87c8e1ea..9dc4e3dfbcf3 100644 --- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c +++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c @@ -2,6 +2,44 @@ #include <test_progs.h> #include "test_attach_probe.skel.h" +#if defined(__powerpc64__) && defined(_CALL_ELF) && _CALL_ELF == 2 + +#define OP_RT_RA_MASK 0xffff0000UL +#define LIS_R2 0x3c400000UL +#define ADDIS_R2_R12 0x3c4c0000UL +#define ADDI_R2_R2 0x38420000UL + +static ssize_t get_offset(ssize_t addr, ssize_t base) +{ + u32 *insn = (u32 *) addr; + + /* + * A PPC64 ABIv2 function may have a local and a global entry + * point. We need to use the local entry point when patching + * functions, so identify and step over the global entry point + * sequence. + * + * The global entry point sequence is always of the form: + * + * addis r2,r12,XXXX + * addi r2,r2,XXXX + * + * A linker optimisation may convert the addis to lis: + * + * lis r2,XXXX + * addi r2,r2,XXXX + */ + if ((((*insn & OP_RT_RA_MASK) == ADDIS_R2_R12) || + ((*insn & OP_RT_RA_MASK) == LIS_R2)) && + ((*(insn + 1) & OP_RT_RA_MASK) == ADDI_R2_R2)) + return (ssize_t)(insn + 2) - base; + else + return addr - base; +} +#else +#define get_offset(addr, base) (addr - base) +#endif + ssize_t get_base_addr() { size_t start, offset; char buf[256]; @@ -36,7 +74,7 @@ void test_attach_probe(void) if (CHECK(base_addr < 0, "get_base_addr", "failed to find base addr: %zd", base_addr)) return; - uprobe_offset = (size_t)&get_base_addr - base_addr; + uprobe_offset = get_offset((size_t)&get_base_addr, base_addr); skel = test_attach_probe__open_and_load(); if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 74c45d557a2b..2d3590cfb5e1 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -147,6 +147,7 @@ static void test_task_stack(void) return; do_dummy_read(skel->progs.dump_task_stack); + do_dummy_read(skel->progs.get_task_user_stacks); bpf_iter_task_stack__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index 37c5494a0381..e25917f04602 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -6,6 +6,7 @@ #include <test_progs.h> #include "bpf_dctcp.skel.h" #include "bpf_cubic.skel.h" +#include "bpf_tcp_nogpl.skel.h" #define min(a, b) ((a) < (b) ? (a) : (b)) @@ -227,10 +228,53 @@ static void test_dctcp(void) bpf_dctcp__destroy(dctcp_skel); } +static char *err_str; +static bool found; + +static int libbpf_debug_print(enum libbpf_print_level level, + const char *format, va_list args) +{ + char *log_buf; + + if (level != LIBBPF_WARN || + strcmp(format, "libbpf: \n%s\n")) { + vprintf(format, args); + return 0; + } + + log_buf = va_arg(args, char *); + if (!log_buf) + goto out; + if (err_str && strstr(log_buf, err_str) != NULL) + found = true; +out: + printf(format, log_buf); + return 0; +} + +static void test_invalid_license(void) +{ + libbpf_print_fn_t old_print_fn; + struct bpf_tcp_nogpl *skel; + + err_str = "struct ops programs must have a GPL compatible license"; + found = false; + old_print_fn = libbpf_set_print(libbpf_debug_print); + + skel = bpf_tcp_nogpl__open_and_load(); + ASSERT_NULL(skel, "bpf_tcp_nogpl"); + ASSERT_EQ(found, true, "expected_err_msg"); + + bpf_tcp_nogpl__destroy(skel); + libbpf_set_print(old_print_fn); +} + void test_bpf_tcp_ca(void) { if (test__start_subtest("dctcp")) test_dctcp(); if (test__start_subtest("cubic")) test_cubic(); + if (test__start_subtest("invalid_license")) + test_invalid_license(); } diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c index e698ee6bb6c2..3d002c245d2b 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c @@ -76,6 +76,7 @@ void test_bpf_verif_scale(void) { "loop2.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, { "loop4.o", BPF_PROG_TYPE_SCHED_CLS }, { "loop5.o", BPF_PROG_TYPE_SCHED_CLS }, + { "loop6.o", BPF_PROG_TYPE_KPROBE }, /* partial unroll. 19k insn in a loop. * Total program size 20.8k insn. diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 6a7ee7420701..0457ae32b270 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -1903,7 +1903,7 @@ static struct btf_raw_test raw_tests[] = { .raw_types = { /* int */ /* [1] */ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), - BTF_TYPE_ENC(0, 0x10000000, 4), + BTF_TYPE_ENC(0, 0x20000000, 4), BTF_END_RAW, }, .str_sec = "", @@ -3531,6 +3531,136 @@ static struct btf_raw_test raw_tests[] = { .max_entries = 1, }, +{ + .descr = "float test #1, well-formed", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), + /* [1] */ + BTF_TYPE_FLOAT_ENC(NAME_TBD, 2), /* [2] */ + BTF_TYPE_FLOAT_ENC(NAME_TBD, 4), /* [3] */ + BTF_TYPE_FLOAT_ENC(NAME_TBD, 8), /* [4] */ + BTF_TYPE_FLOAT_ENC(NAME_TBD, 12), /* [5] */ + BTF_TYPE_FLOAT_ENC(NAME_TBD, 16), /* [6] */ + BTF_STRUCT_ENC(NAME_TBD, 5, 48), /* [7] */ + BTF_MEMBER_ENC(NAME_TBD, 2, 0), + BTF_MEMBER_ENC(NAME_TBD, 3, 32), + BTF_MEMBER_ENC(NAME_TBD, 4, 64), + BTF_MEMBER_ENC(NAME_TBD, 5, 128), + BTF_MEMBER_ENC(NAME_TBD, 6, 256), + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0_Float16\0float\0double\0_Float80\0long_double" + "\0floats\0a\0b\0c\0d\0e"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "float_type_check_btf", + .key_size = sizeof(int), + .value_size = 48, + .key_type_id = 1, + .value_type_id = 7, + .max_entries = 1, +}, +{ + .descr = "float test #2, invalid vlen", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), + /* [1] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 1), 4), + /* [2] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0float"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "float_type_check_btf", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 1, + .value_type_id = 2, + .max_entries = 1, + .btf_load_err = true, + .err_str = "vlen != 0", +}, +{ + .descr = "float test #3, invalid kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), + /* [1] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FLOAT, 1, 0), 4), + /* [2] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0float"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "float_type_check_btf", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 1, + .value_type_id = 2, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid btf_info kind_flag", +}, +{ + .descr = "float test #4, member does not fit", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), + /* [1] */ + BTF_TYPE_FLOAT_ENC(NAME_TBD, 4), /* [2] */ + BTF_STRUCT_ENC(NAME_TBD, 1, 2), /* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 2, 0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0float\0floats\0x"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "float_type_check_btf", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 1, + .value_type_id = 3, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Member exceeds struct_size", +}, +{ + .descr = "float test #5, member is not properly aligned", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), + /* [1] */ + BTF_TYPE_FLOAT_ENC(NAME_TBD, 4), /* [2] */ + BTF_STRUCT_ENC(NAME_TBD, 1, 8), /* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 2, 8), + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0float\0floats\0x"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "float_type_check_btf", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 1, + .value_type_id = 3, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Member is not properly aligned", +}, +{ + .descr = "float test #6, invalid size", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), + /* [1] */ + BTF_TYPE_FLOAT_ENC(NAME_TBD, 6), /* [2] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0float"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "float_type_check_btf", + .key_size = sizeof(int), + .value_size = 6, + .key_type_id = 1, + .value_type_id = 2, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid type_size", +}, + }; /* struct btf_raw_test raw_tests[] */ static const char *get_next_str(const char *start, const char *end) @@ -6281,11 +6411,12 @@ const struct btf_dedup_test dedup_tests[] = { /* int[16] */ BTF_TYPE_ARRAY_ENC(1, 1, 16), /* [2] */ /* struct s { */ - BTF_STRUCT_ENC(NAME_NTH(2), 4, 84), /* [3] */ + BTF_STRUCT_ENC(NAME_NTH(2), 5, 88), /* [3] */ BTF_MEMBER_ENC(NAME_NTH(3), 4, 0), /* struct s *next; */ BTF_MEMBER_ENC(NAME_NTH(4), 5, 64), /* const int *a; */ BTF_MEMBER_ENC(NAME_NTH(5), 2, 128), /* int b[16]; */ BTF_MEMBER_ENC(NAME_NTH(6), 1, 640), /* int c; */ + BTF_MEMBER_ENC(NAME_NTH(8), 13, 672), /* float d; */ /* ptr -> [3] struct s */ BTF_PTR_ENC(3), /* [4] */ /* ptr -> [6] const int */ @@ -6296,39 +6427,43 @@ const struct btf_dedup_test dedup_tests[] = { /* full copy of the above */ BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), /* [7] */ BTF_TYPE_ARRAY_ENC(7, 7, 16), /* [8] */ - BTF_STRUCT_ENC(NAME_NTH(2), 4, 84), /* [9] */ + BTF_STRUCT_ENC(NAME_NTH(2), 5, 88), /* [9] */ BTF_MEMBER_ENC(NAME_NTH(3), 10, 0), BTF_MEMBER_ENC(NAME_NTH(4), 11, 64), BTF_MEMBER_ENC(NAME_NTH(5), 8, 128), BTF_MEMBER_ENC(NAME_NTH(6), 7, 640), + BTF_MEMBER_ENC(NAME_NTH(8), 13, 672), BTF_PTR_ENC(9), /* [10] */ BTF_PTR_ENC(12), /* [11] */ BTF_CONST_ENC(7), /* [12] */ + BTF_TYPE_FLOAT_ENC(NAME_NTH(7), 4), /* [13] */ BTF_END_RAW, }, - BTF_STR_SEC("\0int\0s\0next\0a\0b\0c\0"), + BTF_STR_SEC("\0int\0s\0next\0a\0b\0c\0float\0d"), }, .expect = { .raw_types = { /* int */ - BTF_TYPE_INT_ENC(NAME_NTH(4), BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(NAME_NTH(5), BTF_INT_SIGNED, 0, 32, 4), /* [1] */ /* int[16] */ BTF_TYPE_ARRAY_ENC(1, 1, 16), /* [2] */ /* struct s { */ - BTF_STRUCT_ENC(NAME_NTH(6), 4, 84), /* [3] */ - BTF_MEMBER_ENC(NAME_NTH(5), 4, 0), /* struct s *next; */ + BTF_STRUCT_ENC(NAME_NTH(8), 5, 88), /* [3] */ + BTF_MEMBER_ENC(NAME_NTH(7), 4, 0), /* struct s *next; */ BTF_MEMBER_ENC(NAME_NTH(1), 5, 64), /* const int *a; */ BTF_MEMBER_ENC(NAME_NTH(2), 2, 128), /* int b[16]; */ BTF_MEMBER_ENC(NAME_NTH(3), 1, 640), /* int c; */ + BTF_MEMBER_ENC(NAME_NTH(4), 7, 672), /* float d; */ /* ptr -> [3] struct s */ BTF_PTR_ENC(3), /* [4] */ /* ptr -> [6] const int */ BTF_PTR_ENC(6), /* [5] */ /* const -> [1] int */ BTF_CONST_ENC(1), /* [6] */ + BTF_TYPE_FLOAT_ENC(NAME_NTH(7), 4), /* [7] */ BTF_END_RAW, }, - BTF_STR_SEC("\0a\0b\0c\0int\0next\0s"), + BTF_STR_SEC("\0a\0b\0c\0d\0int\0float\0next\0s"), }, .opts = { .dont_resolve_fwds = false, @@ -6449,9 +6584,10 @@ const struct btf_dedup_test dedup_tests[] = { BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8), BTF_FUNC_ENC(NAME_TBD, 12), /* [13] func */ + BTF_TYPE_FLOAT_ENC(NAME_TBD, 2), /* [14] float */ BTF_END_RAW, }, - BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M"), + BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N"), }, .expect = { .raw_types = { @@ -6474,16 +6610,17 @@ const struct btf_dedup_test dedup_tests[] = { BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8), BTF_FUNC_ENC(NAME_TBD, 12), /* [13] func */ + BTF_TYPE_FLOAT_ENC(NAME_TBD, 2), /* [14] float */ BTF_END_RAW, }, - BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M"), + BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N"), }, .opts = { .dont_resolve_fwds = false, }, }, { - .descr = "dedup: no int duplicates", + .descr = "dedup: no int/float duplicates", .input = { .raw_types = { BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 8), @@ -6498,9 +6635,15 @@ const struct btf_dedup_test dedup_tests[] = { BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 27, 8), /* different byte size */ BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), + /* all allowed sizes */ + BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 2), + BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 4), + BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 8), + BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 12), + BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 16), BTF_END_RAW, }, - BTF_STR_SEC("\0int\0some other int"), + BTF_STR_SEC("\0int\0some other int\0float"), }, .expect = { .raw_types = { @@ -6516,9 +6659,15 @@ const struct btf_dedup_test dedup_tests[] = { BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 27, 8), /* different byte size */ BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), + /* all allowed sizes */ + BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 2), + BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 4), + BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 8), + BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 12), + BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 16), BTF_END_RAW, }, - BTF_STR_SEC("\0int\0some other int"), + BTF_STR_SEC("\0int\0some other int\0float"), }, .opts = { .dont_resolve_fwds = false, @@ -6630,6 +6779,7 @@ static int btf_type_size(const struct btf_type *t) case BTF_KIND_PTR: case BTF_KIND_TYPEDEF: case BTF_KIND_FUNC: + case BTF_KIND_FLOAT: return base_size; case BTF_KIND_INT: return base_size + sizeof(__u32); diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c index c60091ee8a21..5e129dc2073c 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c @@ -77,7 +77,7 @@ static int test_btf_dump_case(int n, struct btf_dump_test_case *t) snprintf(out_file, sizeof(out_file), "/tmp/%s.output.XXXXXX", t->file); fd = mkstemp(out_file); - if (CHECK(fd < 0, "create_tmp", "failed to create file: %d\n", fd)) { + if (!ASSERT_GE(fd, 0, "create_tmp")) { err = fd; goto done; } diff --git a/tools/testing/selftests/bpf/prog_tests/btf_endian.c b/tools/testing/selftests/bpf/prog_tests/btf_endian.c index 8c52d72c876e..8ab5d3e358dd 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_endian.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_endian.c @@ -6,8 +6,6 @@ #include <test_progs.h> #include <bpf/btf.h> -static int duration = 0; - void test_btf_endian() { #if __BYTE_ORDER == __LITTLE_ENDIAN enum btf_endianness endian = BTF_LITTLE_ENDIAN; @@ -71,7 +69,7 @@ void test_btf_endian() { /* now modify original BTF */ var_id = btf__add_var(btf, "some_var", BTF_VAR_GLOBAL_ALLOCATED, 1); - CHECK(var_id <= 0, "var_id", "failed %d\n", var_id); + ASSERT_GT(var_id, 0, "var_id"); btf__free(swap_btf); swap_btf = NULL; diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c index 4d9b514b3fd9..736796e56ed1 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c @@ -54,7 +54,7 @@ void test_cgroup_link(void) for (i = 0; i < cg_nr; i++) { cgs[i].fd = create_and_get_cgroup(cgs[i].path); - if (CHECK(cgs[i].fd < 0, "cg_create", "fail: %d\n", cgs[i].fd)) + if (!ASSERT_GE(cgs[i].fd, 0, "cg_create")) goto cleanup; } diff --git a/tools/testing/selftests/bpf/prog_tests/check_mtu.c b/tools/testing/selftests/bpf/prog_tests/check_mtu.c index 36af1c138faf..b62a39315336 100644 --- a/tools/testing/selftests/bpf/prog_tests/check_mtu.c +++ b/tools/testing/selftests/bpf/prog_tests/check_mtu.c @@ -128,6 +128,8 @@ static void test_check_mtu_xdp(__u32 mtu, __u32 ifindex) test_check_mtu_run_xdp(skel, skel->progs.xdp_use_helper, mtu); test_check_mtu_run_xdp(skel, skel->progs.xdp_exceed_mtu, mtu); test_check_mtu_run_xdp(skel, skel->progs.xdp_minus_delta, mtu); + test_check_mtu_run_xdp(skel, skel->progs.xdp_input_len, mtu); + test_check_mtu_run_xdp(skel, skel->progs.xdp_input_len_exceed, mtu); cleanup: test_check_mtu__destroy(skel); @@ -187,6 +189,8 @@ static void test_check_mtu_tc(__u32 mtu, __u32 ifindex) test_check_mtu_run_tc(skel, skel->progs.tc_exceed_mtu, mtu); test_check_mtu_run_tc(skel, skel->progs.tc_exceed_mtu_da, mtu); test_check_mtu_run_tc(skel, skel->progs.tc_minus_delta, mtu); + test_check_mtu_run_tc(skel, skel->progs.tc_input_len, mtu); + test_check_mtu_run_tc(skel, skel->progs.tc_input_len_exceed, mtu); cleanup: test_check_mtu__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index 06eb956ff7bb..607710826dca 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -210,11 +210,6 @@ static int duration = 0; .bpf_obj_file = "test_core_reloc_existence.o", \ .btf_src_file = "btf__core_reloc_" #name ".o" \ -#define FIELD_EXISTS_ERR_CASE(name) { \ - FIELD_EXISTS_CASE_COMMON(name), \ - .fails = true, \ -} - #define BITFIELDS_CASE_COMMON(objfile, test_name_prefix, name) \ .case_name = test_name_prefix#name, \ .bpf_obj_file = objfile, \ @@ -222,7 +217,7 @@ static int duration = 0; #define BITFIELDS_CASE(name, ...) { \ BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_probed.o", \ - "direct:", name), \ + "probed:", name), \ .input = STRUCT_TO_CHAR_PTR(core_reloc_##name) __VA_ARGS__, \ .input_len = sizeof(struct core_reloc_##name), \ .output = STRUCT_TO_CHAR_PTR(core_reloc_bitfields_output) \ @@ -230,7 +225,7 @@ static int duration = 0; .output_len = sizeof(struct core_reloc_bitfields_output), \ }, { \ BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_direct.o", \ - "probed:", name), \ + "direct:", name), \ .input = STRUCT_TO_CHAR_PTR(core_reloc_##name) __VA_ARGS__, \ .input_len = sizeof(struct core_reloc_##name), \ .output = STRUCT_TO_CHAR_PTR(core_reloc_bitfields_output) \ @@ -266,6 +261,7 @@ static int duration = 0; .arr_elem_sz = sizeof(((type *)0)->arr_field[0]), \ .ptr_sz = 8, /* always 8-byte pointer for BPF */ \ .enum_sz = sizeof(((type *)0)->enum_field), \ + .float_sz = sizeof(((type *)0)->float_field), \ } #define SIZE_CASE(name) { \ @@ -550,8 +546,7 @@ static struct core_reloc_test_case test_cases[] = { ARRAYS_ERR_CASE(arrays___err_too_small), ARRAYS_ERR_CASE(arrays___err_too_shallow), ARRAYS_ERR_CASE(arrays___err_non_array), - ARRAYS_ERR_CASE(arrays___err_wrong_val_type1), - ARRAYS_ERR_CASE(arrays___err_wrong_val_type2), + ARRAYS_ERR_CASE(arrays___err_wrong_val_type), ARRAYS_ERR_CASE(arrays___err_bad_zero_sz_arr), /* enum/ptr/int handling scenarios */ @@ -642,13 +637,25 @@ static struct core_reloc_test_case test_cases[] = { }, .output_len = sizeof(struct core_reloc_existence_output), }, - - FIELD_EXISTS_ERR_CASE(existence__err_int_sz), - FIELD_EXISTS_ERR_CASE(existence__err_int_type), - FIELD_EXISTS_ERR_CASE(existence__err_int_kind), - FIELD_EXISTS_ERR_CASE(existence__err_arr_kind), - FIELD_EXISTS_ERR_CASE(existence__err_arr_value_type), - FIELD_EXISTS_ERR_CASE(existence__err_struct_type), + { + FIELD_EXISTS_CASE_COMMON(existence___wrong_field_defs), + .input = STRUCT_TO_CHAR_PTR(core_reloc_existence___wrong_field_defs) { + }, + .input_len = sizeof(struct core_reloc_existence___wrong_field_defs), + .output = STRUCT_TO_CHAR_PTR(core_reloc_existence_output) { + .a_exists = 0, + .b_exists = 0, + .c_exists = 0, + .arr_exists = 0, + .s_exists = 0, + .a_value = 0xff000001u, + .b_value = 0xff000002u, + .c_value = 0xff000003u, + .arr_value = 0xff000004u, + .s_value = 0xff000005u, + }, + .output_len = sizeof(struct core_reloc_existence_output), + }, /* bitfield relocation checks */ BITFIELDS_CASE(bitfields, { @@ -857,13 +864,20 @@ void test_core_reloc(void) "prog '%s' not found\n", probe_name)) goto cleanup; + + if (test_case->btf_src_file) { + err = access(test_case->btf_src_file, R_OK); + if (!ASSERT_OK(err, "btf_src_file")) + goto cleanup; + } + load_attr.obj = obj; load_attr.log_level = 0; load_attr.target_btf_path = test_case->btf_src_file; err = bpf_object__load_xattr(&load_attr); if (err) { if (!test_case->fails) - CHECK(false, "obj_load", "failed to load prog '%s': %d\n", probe_name, err); + ASSERT_OK(err, "obj_load"); goto cleanup; } @@ -902,10 +916,8 @@ void test_core_reloc(void) goto cleanup; } - if (test_case->fails) { - CHECK(false, "obj_load_fail", "should fail to load prog '%s'\n", probe_name); + if (!ASSERT_FALSE(test_case->fails, "obj_load_should_fail")) goto cleanup; - } equal = memcmp(data->out, test_case->output, test_case->output_len) == 0; diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c index 04ebbf1cb390..7cb111b11995 100644 --- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c +++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c @@ -3,35 +3,57 @@ #include <test_progs.h> #include "fentry_test.skel.h" -void test_fentry_test(void) +static int fentry_test(struct fentry_test *fentry_skel) { - struct fentry_test *fentry_skel = NULL; int err, prog_fd, i; __u32 duration = 0, retval; + struct bpf_link *link; __u64 *result; - fentry_skel = fentry_test__open_and_load(); - if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n")) - goto cleanup; - err = fentry_test__attach(fentry_skel); - if (CHECK(err, "fentry_attach", "fentry attach failed: %d\n", err)) - goto cleanup; + if (!ASSERT_OK(err, "fentry_attach")) + return err; + + /* Check that already linked program can't be attached again. */ + link = bpf_program__attach(fentry_skel->progs.test1); + if (!ASSERT_ERR_PTR(link, "fentry_attach_link")) + return -1; prog_fd = bpf_program__fd(fentry_skel->progs.test1); err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, NULL, &retval, &duration); - CHECK(err || retval, "test_run", - "err %d errno %d retval %d duration %d\n", - err, errno, retval, duration); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(retval, 0, "test_run"); result = (__u64 *)fentry_skel->bss; - for (i = 0; i < 6; i++) { - if (CHECK(result[i] != 1, "result", - "fentry_test%d failed err %lld\n", i + 1, result[i])) - goto cleanup; + for (i = 0; i < sizeof(*fentry_skel->bss) / sizeof(__u64); i++) { + if (!ASSERT_EQ(result[i], 1, "fentry_result")) + return -1; } + fentry_test__detach(fentry_skel); + + /* zero results for re-attach test */ + memset(fentry_skel->bss, 0, sizeof(*fentry_skel->bss)); + return 0; +} + +void test_fentry_test(void) +{ + struct fentry_test *fentry_skel = NULL; + int err; + + fentry_skel = fentry_test__open_and_load(); + if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_load")) + goto cleanup; + + err = fentry_test(fentry_skel); + if (!ASSERT_OK(err, "fentry_first_attach")) + goto cleanup; + + err = fentry_test(fentry_skel); + ASSERT_OK(err, "fentry_second_attach"); + cleanup: fentry_test__destroy(fentry_skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c index 5c0448910426..63990842d20f 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c @@ -58,42 +58,73 @@ static void test_fexit_bpf2bpf_common(const char *obj_file, test_cb cb) { struct bpf_object *obj = NULL, *tgt_obj; + __u32 retval, tgt_prog_id, info_len; + struct bpf_prog_info prog_info = {}; struct bpf_program **prog = NULL; struct bpf_link **link = NULL; - __u32 duration = 0, retval; int err, tgt_fd, i; + struct btf *btf; err = bpf_prog_load(target_obj_file, BPF_PROG_TYPE_UNSPEC, &tgt_obj, &tgt_fd); - if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n", - target_obj_file, err, errno)) + if (!ASSERT_OK(err, "tgt_prog_load")) return; DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, .attach_prog_fd = tgt_fd, ); + info_len = sizeof(prog_info); + err = bpf_obj_get_info_by_fd(tgt_fd, &prog_info, &info_len); + if (!ASSERT_OK(err, "tgt_fd_get_info")) + goto close_prog; + + tgt_prog_id = prog_info.id; + btf = bpf_object__btf(tgt_obj); + link = calloc(sizeof(struct bpf_link *), prog_cnt); + if (!ASSERT_OK_PTR(link, "link_ptr")) + goto close_prog; + prog = calloc(sizeof(struct bpf_program *), prog_cnt); - if (CHECK(!link || !prog, "alloc_memory", "failed to alloc memory")) + if (!ASSERT_OK_PTR(prog, "prog_ptr")) goto close_prog; obj = bpf_object__open_file(obj_file, &opts); - if (CHECK(IS_ERR_OR_NULL(obj), "obj_open", - "failed to open %s: %ld\n", obj_file, - PTR_ERR(obj))) + if (!ASSERT_OK_PTR(obj, "obj_open")) goto close_prog; err = bpf_object__load(obj); - if (CHECK(err, "obj_load", "err %d\n", err)) + if (!ASSERT_OK(err, "obj_load")) goto close_prog; for (i = 0; i < prog_cnt; i++) { + struct bpf_link_info link_info; + char *tgt_name; + __s32 btf_id; + + tgt_name = strstr(prog_name[i], "/"); + if (!ASSERT_OK_PTR(tgt_name, "tgt_name")) + goto close_prog; + btf_id = btf__find_by_name_kind(btf, tgt_name + 1, BTF_KIND_FUNC); + prog[i] = bpf_object__find_program_by_title(obj, prog_name[i]); - if (CHECK(!prog[i], "find_prog", "prog %s not found\n", prog_name[i])) + if (!ASSERT_OK_PTR(prog[i], prog_name[i])) goto close_prog; + link[i] = bpf_program__attach_trace(prog[i]); - if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n")) + if (!ASSERT_OK_PTR(link[i], "attach_trace")) goto close_prog; + + info_len = sizeof(link_info); + memset(&link_info, 0, sizeof(link_info)); + err = bpf_obj_get_info_by_fd(bpf_link__fd(link[i]), + &link_info, &info_len); + ASSERT_OK(err, "link_fd_get_info"); + ASSERT_EQ(link_info.tracing.attach_type, + bpf_program__get_expected_attach_type(prog[i]), + "link_attach_type"); + ASSERT_EQ(link_info.tracing.target_obj_id, tgt_prog_id, "link_tgt_obj_id"); + ASSERT_EQ(link_info.tracing.target_btf_id, btf_id, "link_tgt_btf_id"); } if (cb) { @@ -106,10 +137,9 @@ static void test_fexit_bpf2bpf_common(const char *obj_file, goto close_prog; err = bpf_prog_test_run(tgt_fd, 1, &pkt_v6, sizeof(pkt_v6), - NULL, NULL, &retval, &duration); - CHECK(err || retval, "ipv6", - "err %d errno %d retval %d duration %d\n", - err, errno, retval, duration); + NULL, NULL, &retval, NULL); + ASSERT_OK(err, "prog_run"); + ASSERT_EQ(retval, 0, "prog_run_ret"); if (check_data_map(obj, prog_cnt, false)) goto close_prog; diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c new file mode 100644 index 000000000000..ccc7e8a34ab6 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#define _GNU_SOURCE +#include <sched.h> +#include <test_progs.h> +#include <time.h> +#include <sys/mman.h> +#include <sys/syscall.h> +#include "fexit_sleep.skel.h" + +static int do_sleep(void *skel) +{ + struct fexit_sleep *fexit_skel = skel; + struct timespec ts1 = { .tv_nsec = 1 }; + struct timespec ts2 = { .tv_sec = 10 }; + + fexit_skel->bss->pid = getpid(); + (void)syscall(__NR_nanosleep, &ts1, NULL); + (void)syscall(__NR_nanosleep, &ts2, NULL); + return 0; +} + +#define STACK_SIZE (1024 * 1024) +static char child_stack[STACK_SIZE]; + +void test_fexit_sleep(void) +{ + struct fexit_sleep *fexit_skel = NULL; + int wstatus, duration = 0; + pid_t cpid; + int err, fexit_cnt; + + fexit_skel = fexit_sleep__open_and_load(); + if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n")) + goto cleanup; + + err = fexit_sleep__attach(fexit_skel); + if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err)) + goto cleanup; + + cpid = clone(do_sleep, child_stack + STACK_SIZE, CLONE_FILES | SIGCHLD, fexit_skel); + if (CHECK(cpid == -1, "clone", "%s\n", strerror(errno))) + goto cleanup; + + /* wait until first sys_nanosleep ends and second sys_nanosleep starts */ + while (READ_ONCE(fexit_skel->bss->fentry_cnt) != 2); + fexit_cnt = READ_ONCE(fexit_skel->bss->fexit_cnt); + if (CHECK(fexit_cnt != 1, "fexit_cnt", "%d", fexit_cnt)) + goto cleanup; + + /* close progs and detach them. That will trigger two nop5->jmp5 rewrites + * in the trampolines to skip nanosleep_fexit prog. + * The nanosleep_fentry prog will get detached first. + * The nanosleep_fexit prog will get detached second. + * Detaching will trigger freeing of both progs JITed images. + * There will be two dying bpf_tramp_image-s, but only the initial + * bpf_tramp_image (with both _fentry and _fexit progs will be stuck + * waiting for percpu_ref_kill to confirm). The other one + * will be freed quickly. + */ + close(bpf_program__fd(fexit_skel->progs.nanosleep_fentry)); + close(bpf_program__fd(fexit_skel->progs.nanosleep_fexit)); + fexit_sleep__detach(fexit_skel); + + /* kill the thread to unwind sys_nanosleep stack through the trampoline */ + kill(cpid, 9); + + if (CHECK(waitpid(cpid, &wstatus, 0) == -1, "waitpid", "%s\n", strerror(errno))) + goto cleanup; + if (CHECK(WEXITSTATUS(wstatus) != 0, "exitstatus", "failed")) + goto cleanup; + + /* The bypassed nanosleep_fexit prog shouldn't have executed. + * Unlike progs the maps were not freed and directly accessible. + */ + fexit_cnt = READ_ONCE(fexit_skel->bss->fexit_cnt); + if (CHECK(fexit_cnt != 1, "fexit_cnt", "%d", fexit_cnt)) + goto cleanup; + +cleanup: + fexit_sleep__destroy(fexit_skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c index 78d7a2765c27..6792e41f7f69 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c @@ -3,35 +3,57 @@ #include <test_progs.h> #include "fexit_test.skel.h" -void test_fexit_test(void) +static int fexit_test(struct fexit_test *fexit_skel) { - struct fexit_test *fexit_skel = NULL; int err, prog_fd, i; __u32 duration = 0, retval; + struct bpf_link *link; __u64 *result; - fexit_skel = fexit_test__open_and_load(); - if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n")) - goto cleanup; - err = fexit_test__attach(fexit_skel); - if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err)) - goto cleanup; + if (!ASSERT_OK(err, "fexit_attach")) + return err; + + /* Check that already linked program can't be attached again. */ + link = bpf_program__attach(fexit_skel->progs.test1); + if (!ASSERT_ERR_PTR(link, "fexit_attach_link")) + return -1; prog_fd = bpf_program__fd(fexit_skel->progs.test1); err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, NULL, &retval, &duration); - CHECK(err || retval, "test_run", - "err %d errno %d retval %d duration %d\n", - err, errno, retval, duration); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(retval, 0, "test_run"); result = (__u64 *)fexit_skel->bss; - for (i = 0; i < 6; i++) { - if (CHECK(result[i] != 1, "result", - "fexit_test%d failed err %lld\n", i + 1, result[i])) - goto cleanup; + for (i = 0; i < sizeof(*fexit_skel->bss) / sizeof(__u64); i++) { + if (!ASSERT_EQ(result[i], 1, "fexit_result")) + return -1; } + fexit_test__detach(fexit_skel); + + /* zero results for re-attach test */ + memset(fexit_skel->bss, 0, sizeof(*fexit_skel->bss)); + return 0; +} + +void test_fexit_test(void) +{ + struct fexit_test *fexit_skel = NULL; + int err; + + fexit_skel = fexit_test__open_and_load(); + if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_load")) + goto cleanup; + + err = fexit_test(fexit_skel); + if (!ASSERT_OK(err, "fexit_first_attach")) + goto cleanup; + + err = fexit_test(fexit_skel); + ASSERT_OK(err, "fexit_second_attach"); + cleanup: fexit_test__destroy(fexit_skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/for_each.c b/tools/testing/selftests/bpf/prog_tests/for_each.c new file mode 100644 index 000000000000..68eb12a287d4 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/for_each.c @@ -0,0 +1,130 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include <test_progs.h> +#include <network_helpers.h> +#include "for_each_hash_map_elem.skel.h" +#include "for_each_array_map_elem.skel.h" + +static unsigned int duration; + +static void test_hash_map(void) +{ + int i, err, hashmap_fd, max_entries, percpu_map_fd; + struct for_each_hash_map_elem *skel; + __u64 *percpu_valbuf = NULL; + __u32 key, num_cpus, retval; + __u64 val; + + skel = for_each_hash_map_elem__open_and_load(); + if (!ASSERT_OK_PTR(skel, "for_each_hash_map_elem__open_and_load")) + return; + + hashmap_fd = bpf_map__fd(skel->maps.hashmap); + max_entries = bpf_map__max_entries(skel->maps.hashmap); + for (i = 0; i < max_entries; i++) { + key = i; + val = i + 1; + err = bpf_map_update_elem(hashmap_fd, &key, &val, BPF_ANY); + if (!ASSERT_OK(err, "map_update")) + goto out; + } + + num_cpus = bpf_num_possible_cpus(); + percpu_map_fd = bpf_map__fd(skel->maps.percpu_map); + percpu_valbuf = malloc(sizeof(__u64) * num_cpus); + if (!ASSERT_OK_PTR(percpu_valbuf, "percpu_valbuf")) + goto out; + + key = 1; + for (i = 0; i < num_cpus; i++) + percpu_valbuf[i] = i + 1; + err = bpf_map_update_elem(percpu_map_fd, &key, percpu_valbuf, BPF_ANY); + if (!ASSERT_OK(err, "percpu_map_update")) + goto out; + + err = bpf_prog_test_run(bpf_program__fd(skel->progs.test_pkt_access), + 1, &pkt_v4, sizeof(pkt_v4), NULL, NULL, + &retval, &duration); + if (CHECK(err || retval, "ipv4", "err %d errno %d retval %d\n", + err, errno, retval)) + goto out; + + ASSERT_EQ(skel->bss->hashmap_output, 4, "hashmap_output"); + ASSERT_EQ(skel->bss->hashmap_elems, max_entries, "hashmap_elems"); + + key = 1; + err = bpf_map_lookup_elem(hashmap_fd, &key, &val); + ASSERT_ERR(err, "hashmap_lookup"); + + ASSERT_EQ(skel->bss->percpu_called, 1, "percpu_called"); + ASSERT_LT(skel->bss->cpu, num_cpus, "num_cpus"); + ASSERT_EQ(skel->bss->percpu_map_elems, 1, "percpu_map_elems"); + ASSERT_EQ(skel->bss->percpu_key, 1, "percpu_key"); + ASSERT_EQ(skel->bss->percpu_val, skel->bss->cpu + 1, "percpu_val"); + ASSERT_EQ(skel->bss->percpu_output, 100, "percpu_output"); +out: + free(percpu_valbuf); + for_each_hash_map_elem__destroy(skel); +} + +static void test_array_map(void) +{ + __u32 key, num_cpus, max_entries, retval; + int i, arraymap_fd, percpu_map_fd, err; + struct for_each_array_map_elem *skel; + __u64 *percpu_valbuf = NULL; + __u64 val, expected_total; + + skel = for_each_array_map_elem__open_and_load(); + if (!ASSERT_OK_PTR(skel, "for_each_array_map_elem__open_and_load")) + return; + + arraymap_fd = bpf_map__fd(skel->maps.arraymap); + expected_total = 0; + max_entries = bpf_map__max_entries(skel->maps.arraymap); + for (i = 0; i < max_entries; i++) { + key = i; + val = i + 1; + /* skip the last iteration for expected total */ + if (i != max_entries - 1) + expected_total += val; + err = bpf_map_update_elem(arraymap_fd, &key, &val, BPF_ANY); + if (!ASSERT_OK(err, "map_update")) + goto out; + } + + num_cpus = bpf_num_possible_cpus(); + percpu_map_fd = bpf_map__fd(skel->maps.percpu_map); + percpu_valbuf = malloc(sizeof(__u64) * num_cpus); + if (!ASSERT_OK_PTR(percpu_valbuf, "percpu_valbuf")) + goto out; + + key = 0; + for (i = 0; i < num_cpus; i++) + percpu_valbuf[i] = i + 1; + err = bpf_map_update_elem(percpu_map_fd, &key, percpu_valbuf, BPF_ANY); + if (!ASSERT_OK(err, "percpu_map_update")) + goto out; + + err = bpf_prog_test_run(bpf_program__fd(skel->progs.test_pkt_access), + 1, &pkt_v4, sizeof(pkt_v4), NULL, NULL, + &retval, &duration); + if (CHECK(err || retval, "ipv4", "err %d errno %d retval %d\n", + err, errno, retval)) + goto out; + + ASSERT_EQ(skel->bss->arraymap_output, expected_total, "array_output"); + ASSERT_EQ(skel->bss->cpu + 1, skel->bss->percpu_val, "percpu_val"); + +out: + free(percpu_valbuf); + for_each_array_map_elem__destroy(skel); +} + +void test_for_each(void) +{ + if (test__start_subtest("hash_map")) + test_hash_map(); + if (test__start_subtest("array_map")) + test_array_map(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c index 42c3a3103c26..d65107919998 100644 --- a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c +++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c @@ -134,7 +134,7 @@ void test_kfree_skb(void) /* make sure kfree_skb program was triggered * and it sent expected skb into ring buffer */ - CHECK_FAIL(!passed); + ASSERT_TRUE(passed, "passed"); err = bpf_map_lookup_elem(bpf_map__fd(global_data), &zero, test_ok); if (CHECK(err, "get_result", diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c new file mode 100644 index 000000000000..7fc0951ee75f --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include <test_progs.h> +#include <network_helpers.h> +#include "kfunc_call_test.skel.h" +#include "kfunc_call_test_subprog.skel.h" + +static void test_main(void) +{ + struct kfunc_call_test *skel; + int prog_fd, retval, err; + + skel = kfunc_call_test__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel")) + return; + + prog_fd = bpf_program__fd(skel->progs.kfunc_call_test1); + err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), + NULL, NULL, (__u32 *)&retval, NULL); + ASSERT_OK(err, "bpf_prog_test_run(test1)"); + ASSERT_EQ(retval, 12, "test1-retval"); + + prog_fd = bpf_program__fd(skel->progs.kfunc_call_test2); + err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), + NULL, NULL, (__u32 *)&retval, NULL); + ASSERT_OK(err, "bpf_prog_test_run(test2)"); + ASSERT_EQ(retval, 3, "test2-retval"); + + kfunc_call_test__destroy(skel); +} + +static void test_subprog(void) +{ + struct kfunc_call_test_subprog *skel; + int prog_fd, retval, err; + + skel = kfunc_call_test_subprog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel")) + return; + + prog_fd = bpf_program__fd(skel->progs.kfunc_call_test1); + err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), + NULL, NULL, (__u32 *)&retval, NULL); + ASSERT_OK(err, "bpf_prog_test_run(test1)"); + ASSERT_EQ(retval, 10, "test1-retval"); + ASSERT_NEQ(skel->data->active_res, -1, "active_res"); + ASSERT_EQ(skel->data->sk_state, BPF_TCP_CLOSE, "sk_state"); + + kfunc_call_test_subprog__destroy(skel); +} + +void test_kfunc_call(void) +{ + if (test__start_subtest("main")) + test_main(); + + if (test__start_subtest("subprog")) + test_subprog(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/linked_funcs.c b/tools/testing/selftests/bpf/prog_tests/linked_funcs.c new file mode 100644 index 000000000000..e9916f2817ec --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/linked_funcs.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include <test_progs.h> +#include <sys/syscall.h> +#include "linked_funcs.skel.h" + +void test_linked_funcs(void) +{ + int err; + struct linked_funcs *skel; + + skel = linked_funcs__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + skel->rodata->my_tid = syscall(SYS_gettid); + skel->bss->syscall_id = SYS_getpgid; + + err = linked_funcs__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + err = linked_funcs__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto cleanup; + + /* trigger */ + syscall(SYS_getpgid); + + ASSERT_EQ(skel->bss->output_val1, 2000 + 2000, "output_val1"); + ASSERT_EQ(skel->bss->output_ctx1, SYS_getpgid, "output_ctx1"); + ASSERT_EQ(skel->bss->output_weak1, 42, "output_weak1"); + + ASSERT_EQ(skel->bss->output_val2, 2 * 1000 + 2 * (2 * 1000), "output_val2"); + ASSERT_EQ(skel->bss->output_ctx2, SYS_getpgid, "output_ctx2"); + /* output_weak2 should never be updated */ + ASSERT_EQ(skel->bss->output_weak2, 0, "output_weak2"); + +cleanup: + linked_funcs__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/linked_maps.c b/tools/testing/selftests/bpf/prog_tests/linked_maps.c new file mode 100644 index 000000000000..85dcaaaf2775 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/linked_maps.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include <test_progs.h> +#include <sys/syscall.h> +#include "linked_maps.skel.h" + +void test_linked_maps(void) +{ + int err; + struct linked_maps *skel; + + skel = linked_maps__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + err = linked_maps__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto cleanup; + + /* trigger */ + syscall(SYS_getpgid); + + ASSERT_EQ(skel->bss->output_first1, 2000, "output_first1"); + ASSERT_EQ(skel->bss->output_second1, 2, "output_second1"); + ASSERT_EQ(skel->bss->output_weak1, 2, "output_weak1"); + +cleanup: + linked_maps__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/linked_vars.c b/tools/testing/selftests/bpf/prog_tests/linked_vars.c new file mode 100644 index 000000000000..267166abe4c1 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/linked_vars.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include <test_progs.h> +#include <sys/syscall.h> +#include "linked_vars.skel.h" + +void test_linked_vars(void) +{ + int err; + struct linked_vars *skel; + + skel = linked_vars__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + skel->bss->input_bss1 = 1000; + skel->bss->input_bss2 = 2000; + skel->bss->input_bss_weak = 3000; + + err = linked_vars__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + err = linked_vars__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto cleanup; + + /* trigger */ + syscall(SYS_getpgid); + + ASSERT_EQ(skel->bss->output_bss1, 1000 + 2000 + 3000, "output_bss1"); + ASSERT_EQ(skel->bss->output_bss2, 1000 + 2000 + 3000, "output_bss2"); + /* 10 comes from "winner" input_data_weak in first obj file */ + ASSERT_EQ(skel->bss->output_data1, 1 + 2 + 10, "output_bss1"); + ASSERT_EQ(skel->bss->output_data2, 1 + 2 + 10, "output_bss2"); + /* 100 comes from "winner" input_rodata_weak in first obj file */ + ASSERT_EQ(skel->bss->output_rodata1, 11 + 22 + 100, "output_weak1"); + ASSERT_EQ(skel->bss->output_rodata2, 11 + 22 + 100, "output_weak2"); + +cleanup: + linked_vars__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/map_ptr.c b/tools/testing/selftests/bpf/prog_tests/map_ptr.c index c230a573c373..4972f92205c7 100644 --- a/tools/testing/selftests/bpf/prog_tests/map_ptr.c +++ b/tools/testing/selftests/bpf/prog_tests/map_ptr.c @@ -12,11 +12,22 @@ void test_map_ptr(void) __u32 duration = 0, retval; char buf[128]; int err; + int page_size = getpagesize(); - skel = map_ptr_kern__open_and_load(); - if (CHECK(!skel, "skel_open_load", "open_load failed\n")) + skel = map_ptr_kern__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) return; + err = bpf_map__set_max_entries(skel->maps.m_ringbuf, page_size); + if (!ASSERT_OK(err, "bpf_map__set_max_entries")) + goto cleanup; + + err = map_ptr_kern__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + skel->bss->page_size = page_size; + err = bpf_prog_test_run(bpf_program__fd(skel->progs.cg_skb), 1, &pkt_v4, sizeof(pkt_v4), buf, NULL, &retval, NULL); diff --git a/tools/testing/selftests/bpf/prog_tests/mmap.c b/tools/testing/selftests/bpf/prog_tests/mmap.c index 9c3c5c0f068f..37b002ca1167 100644 --- a/tools/testing/selftests/bpf/prog_tests/mmap.c +++ b/tools/testing/selftests/bpf/prog_tests/mmap.c @@ -29,22 +29,36 @@ void test_mmap(void) struct test_mmap *skel; __u64 val = 0; - skel = test_mmap__open_and_load(); - if (CHECK(!skel, "skel_open_and_load", "skeleton open/load failed\n")) + skel = test_mmap__open(); + if (CHECK(!skel, "skel_open", "skeleton open failed\n")) return; + err = bpf_map__set_max_entries(skel->maps.rdonly_map, page_size); + if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n")) + goto cleanup; + + /* at least 4 pages of data */ + err = bpf_map__set_max_entries(skel->maps.data_map, + 4 * (page_size / sizeof(u64))); + if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n")) + goto cleanup; + + err = test_mmap__load(skel); + if (CHECK(err != 0, "skel_load", "skeleton load failed\n")) + goto cleanup; + bss_map = skel->maps.bss; data_map = skel->maps.data_map; data_map_fd = bpf_map__fd(data_map); rdmap_fd = bpf_map__fd(skel->maps.rdonly_map); - tmp1 = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, rdmap_fd, 0); + tmp1 = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rdmap_fd, 0); if (CHECK(tmp1 != MAP_FAILED, "rdonly_write_mmap", "unexpected success\n")) { - munmap(tmp1, 4096); + munmap(tmp1, page_size); goto cleanup; } /* now double-check if it's mmap()'able at all */ - tmp1 = mmap(NULL, 4096, PROT_READ, MAP_SHARED, rdmap_fd, 0); + tmp1 = mmap(NULL, page_size, PROT_READ, MAP_SHARED, rdmap_fd, 0); if (CHECK(tmp1 == MAP_FAILED, "rdonly_read_mmap", "failed: %d\n", errno)) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c b/tools/testing/selftests/bpf/prog_tests/module_attach.c index 5bc53d53d86e..d85a69b7ce44 100644 --- a/tools/testing/selftests/bpf/prog_tests/module_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c @@ -45,12 +45,18 @@ static int trigger_module_test_write(int write_sz) return 0; } +static int delete_module(const char *name, int flags) +{ + return syscall(__NR_delete_module, name, flags); +} + void test_module_attach(void) { const int READ_SZ = 456; const int WRITE_SZ = 457; struct test_module_attach* skel; struct test_module_attach__bss *bss; + struct bpf_link *link; int err; skel = test_module_attach__open(); @@ -84,6 +90,23 @@ void test_module_attach(void) ASSERT_EQ(bss->fexit_ret, -EIO, "fexit_tet"); ASSERT_EQ(bss->fmod_ret_read_sz, READ_SZ, "fmod_ret"); + test_module_attach__detach(skel); + + /* attach fentry/fexit and make sure it get's module reference */ + link = bpf_program__attach(skel->progs.handle_fentry); + if (!ASSERT_OK_PTR(link, "attach_fentry")) + goto cleanup; + + ASSERT_ERR(delete_module("bpf_testmod", 0), "delete_module"); + bpf_link__destroy(link); + + link = bpf_program__attach(skel->progs.handle_fexit); + if (!ASSERT_OK_PTR(link, "attach_fexit")) + goto cleanup; + + ASSERT_ERR(delete_module("bpf_testmod", 0), "delete_module"); + bpf_link__destroy(link); + cleanup: test_module_attach__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c index 31a3114906e2..2535788e135f 100644 --- a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c +++ b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c @@ -68,10 +68,10 @@ static void test_ns_current_pid_tgid_new_ns(void) cpid = clone(test_current_pid_tgid, child_stack + STACK_SIZE, CLONE_NEWPID | SIGCHLD, NULL); - if (CHECK(cpid == -1, "clone", strerror(errno))) + if (CHECK(cpid == -1, "clone", "%s\n", strerror(errno))) return; - if (CHECK(waitpid(cpid, &wstatus, 0) == -1, "waitpid", strerror(errno))) + if (CHECK(waitpid(cpid, &wstatus, 0) == -1, "waitpid", "%s\n", strerror(errno))) return; if (CHECK(WEXITSTATUS(wstatus) != 0, "newns_pidtgid", "failed")) diff --git a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c index 935a294f049a..131d7f7eeb42 100644 --- a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c +++ b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c @@ -2,12 +2,31 @@ #include <test_progs.h> #include <network_helpers.h> -void test_prog_run_xattr(void) +#include "test_pkt_access.skel.h" + +static const __u32 duration; + +static void check_run_cnt(int prog_fd, __u64 run_cnt) { - const char *file = "./test_pkt_access.o"; - struct bpf_object *obj; - char buf[10]; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); int err; + + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (CHECK(err, "get_prog_info", "failed to get bpf_prog_info for fd %d\n", prog_fd)) + return; + + CHECK(run_cnt != info.run_cnt, "run_cnt", + "incorrect number of repetitions, want %llu have %llu\n", run_cnt, info.run_cnt); +} + +void test_prog_run_xattr(void) +{ + struct test_pkt_access *skel; + int err, stats_fd = -1; + char buf[10] = {}; + __u64 run_cnt = 0; + struct bpf_prog_test_run_attr tattr = { .repeat = 1, .data_in = &pkt_v4, @@ -16,12 +35,15 @@ void test_prog_run_xattr(void) .data_size_out = 5, }; - err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, - &tattr.prog_fd); - if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno)) + stats_fd = bpf_enable_stats(BPF_STATS_RUN_TIME); + if (CHECK_ATTR(stats_fd < 0, "enable_stats", "failed %d\n", errno)) return; - memset(buf, 0, sizeof(buf)); + skel = test_pkt_access__open_and_load(); + if (CHECK_ATTR(!skel, "open_and_load", "failed\n")) + goto cleanup; + + tattr.prog_fd = bpf_program__fd(skel->progs.test_pkt_access); err = bpf_prog_test_run_xattr(&tattr); CHECK_ATTR(err != -1 || errno != ENOSPC || tattr.retval, "run", @@ -34,8 +56,12 @@ void test_prog_run_xattr(void) CHECK_ATTR(buf[5] != 0, "overflow", "BPF_PROG_TEST_RUN ignored size hint\n"); + run_cnt += tattr.repeat; + check_run_cnt(tattr.prog_fd, run_cnt); + tattr.data_out = NULL; tattr.data_size_out = 0; + tattr.repeat = 2; errno = 0; err = bpf_prog_test_run_xattr(&tattr); @@ -46,5 +72,12 @@ void test_prog_run_xattr(void) err = bpf_prog_test_run_xattr(&tattr); CHECK_ATTR(err != -EINVAL, "run_wrong_size_out", "err %d\n", err); - bpf_object__close(obj); + run_cnt += tattr.repeat; + check_run_cnt(tattr.prog_fd, run_cnt); + +cleanup: + if (skel) + test_pkt_access__destroy(skel); + if (stats_fd != -1) + close(stats_fd); } diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c index 6ace5e9efec1..d3c2de2c24d1 100644 --- a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c +++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c @@ -160,11 +160,8 @@ int test_resolve_btfids(void) break; if (i > 0) { - ret = CHECK(test_set.ids[i - 1] > test_set.ids[i], - "sort_check", - "test_set is not sorted\n"); - if (ret) - break; + if (!ASSERT_LE(test_set.ids[i - 1], test_set.ids[i], "sort_check")) + return -1; } } diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c index fddbc5db5d6a..de78617f6550 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c @@ -87,11 +87,20 @@ void test_ringbuf(void) pthread_t thread; long bg_ret = -1; int err, cnt; + int page_size = getpagesize(); - skel = test_ringbuf__open_and_load(); - if (CHECK(!skel, "skel_open_load", "skeleton open&load failed\n")) + skel = test_ringbuf__open(); + if (CHECK(!skel, "skel_open", "skeleton open failed\n")) return; + err = bpf_map__set_max_entries(skel->maps.ringbuf, page_size); + if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n")) + goto cleanup; + + err = test_ringbuf__load(skel); + if (CHECK(err != 0, "skel_load", "skeleton load failed\n")) + goto cleanup; + /* only trigger BPF program for current process */ skel->bss->pid = getpid(); @@ -110,9 +119,9 @@ void test_ringbuf(void) CHECK(skel->bss->avail_data != 3 * rec_sz, "err_avail_size", "exp %ld, got %ld\n", 3L * rec_sz, skel->bss->avail_data); - CHECK(skel->bss->ring_size != 4096, + CHECK(skel->bss->ring_size != page_size, "err_ring_size", "exp %ld, got %ld\n", - 4096L, skel->bss->ring_size); + (long)page_size, skel->bss->ring_size); CHECK(skel->bss->cons_pos != 0, "err_cons_pos", "exp %ld, got %ld\n", 0L, skel->bss->cons_pos); diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c index d37161e59bb2..cef63e703924 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c @@ -41,13 +41,42 @@ static int process_sample(void *ctx, void *data, size_t len) void test_ringbuf_multi(void) { struct test_ringbuf_multi *skel; - struct ring_buffer *ringbuf; + struct ring_buffer *ringbuf = NULL; int err; + int page_size = getpagesize(); + int proto_fd = -1; - skel = test_ringbuf_multi__open_and_load(); - if (CHECK(!skel, "skel_open_load", "skeleton open&load failed\n")) + skel = test_ringbuf_multi__open(); + if (CHECK(!skel, "skel_open", "skeleton open failed\n")) return; + err = bpf_map__set_max_entries(skel->maps.ringbuf1, page_size); + if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n")) + goto cleanup; + + err = bpf_map__set_max_entries(skel->maps.ringbuf2, page_size); + if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n")) + goto cleanup; + + err = bpf_map__set_max_entries(bpf_map__inner_map(skel->maps.ringbuf_arr), page_size); + if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n")) + goto cleanup; + + proto_fd = bpf_create_map(BPF_MAP_TYPE_RINGBUF, 0, 0, page_size, 0); + if (CHECK(proto_fd == -1, "bpf_create_map", "bpf_create_map failed\n")) + goto cleanup; + + err = bpf_map__set_inner_map_fd(skel->maps.ringbuf_hash, proto_fd); + if (CHECK(err != 0, "bpf_map__set_inner_map_fd", "bpf_map__set_inner_map_fd failed\n")) + goto cleanup; + + err = test_ringbuf_multi__load(skel); + if (CHECK(err != 0, "skel_load", "skeleton load failed\n")) + goto cleanup; + + close(proto_fd); + proto_fd = -1; + /* only trigger BPF program for current process */ skel->bss->pid = getpid(); @@ -97,6 +126,8 @@ void test_ringbuf_multi(void) 2L, skel->bss->total); cleanup: + if (proto_fd >= 0) + close(proto_fd); ring_buffer__free(ringbuf); test_ringbuf_multi__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c index 9ff0412e1fd3..45c82db3c58c 100644 --- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c +++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c @@ -241,6 +241,48 @@ fail: return -1; } +static __u64 socket_cookie(int fd) +{ + __u64 cookie; + socklen_t cookie_len = sizeof(cookie); + + if (CHECK(getsockopt(fd, SOL_SOCKET, SO_COOKIE, &cookie, &cookie_len) < 0, + "getsockopt(SO_COOKIE)", "%s\n", strerror(errno))) + return 0; + return cookie; +} + +static int fill_sk_lookup_ctx(struct bpf_sk_lookup *ctx, const char *local_ip, __u16 local_port, + const char *remote_ip, __u16 remote_port) +{ + void *local, *remote; + int err; + + memset(ctx, 0, sizeof(*ctx)); + ctx->local_port = local_port; + ctx->remote_port = htons(remote_port); + + if (is_ipv6(local_ip)) { + ctx->family = AF_INET6; + local = &ctx->local_ip6[0]; + remote = &ctx->remote_ip6[0]; + } else { + ctx->family = AF_INET; + local = &ctx->local_ip4; + remote = &ctx->remote_ip4; + } + + err = inet_pton(ctx->family, local_ip, local); + if (CHECK(err != 1, "inet_pton", "local_ip failed\n")) + return 1; + + err = inet_pton(ctx->family, remote_ip, remote); + if (CHECK(err != 1, "inet_pton", "remote_ip failed\n")) + return 1; + + return 0; +} + static int send_byte(int fd) { ssize_t n; @@ -1009,18 +1051,27 @@ static void test_drop_on_reuseport(struct test_sk_lookup *skel) static void run_sk_assign(struct test_sk_lookup *skel, struct bpf_program *lookup_prog, - const char *listen_ip, const char *connect_ip) + const char *remote_ip, const char *local_ip) { - int client_fd, peer_fd, server_fds[MAX_SERVERS] = { -1 }; - struct bpf_link *lookup_link; + int server_fds[MAX_SERVERS] = { -1 }; + struct bpf_sk_lookup ctx; + __u64 server_cookie; int i, err; - lookup_link = attach_lookup_prog(lookup_prog); - if (!lookup_link) + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, + .ctx_in = &ctx, + .ctx_size_in = sizeof(ctx), + .ctx_out = &ctx, + .ctx_size_out = sizeof(ctx), + ); + + if (fill_sk_lookup_ctx(&ctx, local_ip, EXT_PORT, remote_ip, INT_PORT)) return; + ctx.protocol = IPPROTO_TCP; + for (i = 0; i < ARRAY_SIZE(server_fds); i++) { - server_fds[i] = make_server(SOCK_STREAM, listen_ip, 0, NULL); + server_fds[i] = make_server(SOCK_STREAM, local_ip, 0, NULL); if (server_fds[i] < 0) goto close_servers; @@ -1030,23 +1081,25 @@ static void run_sk_assign(struct test_sk_lookup *skel, goto close_servers; } - client_fd = make_client(SOCK_STREAM, connect_ip, EXT_PORT); - if (client_fd < 0) + server_cookie = socket_cookie(server_fds[SERVER_B]); + if (!server_cookie) + return; + + err = bpf_prog_test_run_opts(bpf_program__fd(lookup_prog), &opts); + if (CHECK(err, "test_run", "failed with error %d\n", errno)) + goto close_servers; + + if (CHECK(ctx.cookie == 0, "ctx.cookie", "no socket selected\n")) goto close_servers; - peer_fd = accept(server_fds[SERVER_B], NULL, NULL); - if (CHECK(peer_fd < 0, "accept", "failed\n")) - goto close_client; + CHECK(ctx.cookie != server_cookie, "ctx.cookie", + "selected sk %llu instead of %llu\n", ctx.cookie, server_cookie); - close(peer_fd); -close_client: - close(client_fd); close_servers: for (i = 0; i < ARRAY_SIZE(server_fds); i++) { if (server_fds[i] != -1) close(server_fds[i]); } - bpf_link__destroy(lookup_link); } static void run_sk_assign_v4(struct test_sk_lookup *skel, diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf.c b/tools/testing/selftests/bpf/prog_tests/snprintf.c new file mode 100644 index 000000000000..a958c22aec75 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/snprintf.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Google LLC. */ + +#include <test_progs.h> +#include "test_snprintf.skel.h" +#include "test_snprintf_single.skel.h" + +#define EXP_NUM_OUT "-8 9 96 -424242 1337 DABBAD00" +#define EXP_NUM_RET sizeof(EXP_NUM_OUT) + +#define EXP_IP_OUT "127.000.000.001 0000:0000:0000:0000:0000:0000:0000:0001" +#define EXP_IP_RET sizeof(EXP_IP_OUT) + +/* The third specifier, %pB, depends on compiler inlining so don't check it */ +#define EXP_SYM_OUT "schedule schedule+0x0/" +#define MIN_SYM_RET sizeof(EXP_SYM_OUT) + +/* The third specifier, %p, is a hashed pointer which changes on every reboot */ +#define EXP_ADDR_OUT "0000000000000000 ffff00000add4e55 " +#define EXP_ADDR_RET sizeof(EXP_ADDR_OUT "unknownhashedptr") + +#define EXP_STR_OUT "str1 longstr" +#define EXP_STR_RET sizeof(EXP_STR_OUT) + +#define EXP_OVER_OUT "%over" +#define EXP_OVER_RET 10 + +#define EXP_PAD_OUT " 4 000" +#define EXP_PAD_RET 900007 + +#define EXP_NO_ARG_OUT "simple case" +#define EXP_NO_ARG_RET 12 + +#define EXP_NO_BUF_RET 29 + +void test_snprintf_positive(void) +{ + char exp_addr_out[] = EXP_ADDR_OUT; + char exp_sym_out[] = EXP_SYM_OUT; + struct test_snprintf *skel; + + skel = test_snprintf__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + if (!ASSERT_OK(test_snprintf__attach(skel), "skel_attach")) + goto cleanup; + + /* trigger tracepoint */ + usleep(1); + + ASSERT_STREQ(skel->bss->num_out, EXP_NUM_OUT, "num_out"); + ASSERT_EQ(skel->bss->num_ret, EXP_NUM_RET, "num_ret"); + + ASSERT_STREQ(skel->bss->ip_out, EXP_IP_OUT, "ip_out"); + ASSERT_EQ(skel->bss->ip_ret, EXP_IP_RET, "ip_ret"); + + ASSERT_OK(memcmp(skel->bss->sym_out, exp_sym_out, + sizeof(exp_sym_out) - 1), "sym_out"); + ASSERT_LT(MIN_SYM_RET, skel->bss->sym_ret, "sym_ret"); + + ASSERT_OK(memcmp(skel->bss->addr_out, exp_addr_out, + sizeof(exp_addr_out) - 1), "addr_out"); + ASSERT_EQ(skel->bss->addr_ret, EXP_ADDR_RET, "addr_ret"); + + ASSERT_STREQ(skel->bss->str_out, EXP_STR_OUT, "str_out"); + ASSERT_EQ(skel->bss->str_ret, EXP_STR_RET, "str_ret"); + + ASSERT_STREQ(skel->bss->over_out, EXP_OVER_OUT, "over_out"); + ASSERT_EQ(skel->bss->over_ret, EXP_OVER_RET, "over_ret"); + + ASSERT_STREQ(skel->bss->pad_out, EXP_PAD_OUT, "pad_out"); + ASSERT_EQ(skel->bss->pad_ret, EXP_PAD_RET, "pad_ret"); + + ASSERT_STREQ(skel->bss->noarg_out, EXP_NO_ARG_OUT, "no_arg_out"); + ASSERT_EQ(skel->bss->noarg_ret, EXP_NO_ARG_RET, "no_arg_ret"); + + ASSERT_EQ(skel->bss->nobuf_ret, EXP_NO_BUF_RET, "no_buf_ret"); + +cleanup: + test_snprintf__destroy(skel); +} + +#define min(a, b) ((a) < (b) ? (a) : (b)) + +/* Loads an eBPF object calling bpf_snprintf with up to 10 characters of fmt */ +static int load_single_snprintf(char *fmt) +{ + struct test_snprintf_single *skel; + int ret; + + skel = test_snprintf_single__open(); + if (!skel) + return -EINVAL; + + memcpy(skel->rodata->fmt, fmt, min(strlen(fmt) + 1, 10)); + + ret = test_snprintf_single__load(skel); + test_snprintf_single__destroy(skel); + + return ret; +} + +void test_snprintf_negative(void) +{ + ASSERT_OK(load_single_snprintf("valid %d"), "valid usage"); + + ASSERT_ERR(load_single_snprintf("0123456789"), "no terminating zero"); + ASSERT_ERR(load_single_snprintf("%d %d"), "too many specifiers"); + ASSERT_ERR(load_single_snprintf("%pi5"), "invalid specifier 1"); + ASSERT_ERR(load_single_snprintf("%a"), "invalid specifier 2"); + ASSERT_ERR(load_single_snprintf("%"), "invalid specifier 3"); + ASSERT_ERR(load_single_snprintf("%12345678"), "invalid specifier 4"); + ASSERT_ERR(load_single_snprintf("%--------"), "invalid specifier 5"); + ASSERT_ERR(load_single_snprintf("\x80"), "non ascii character"); + ASSERT_ERR(load_single_snprintf("\x1"), "non printable character"); +} + +void test_snprintf(void) +{ + if (test__start_subtest("snprintf_positive")) + test_snprintf_positive(); + if (test__start_subtest("snprintf_negative")) + test_snprintf_negative(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c b/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c index 686b40f11a45..76e1f5fe18fa 100644 --- a/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c +++ b/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c @@ -42,9 +42,7 @@ void test_snprintf_btf(void) * and it set expected return values from bpf_trace_printk()s * and all tests ran. */ - if (CHECK(bss->ret <= 0, - "bpf_snprintf_btf: got return value", - "ret <= 0 %ld test %d\n", bss->ret, bss->ran_subtests)) + if (!ASSERT_GT(bss->ret, 0, "bpf_snprintf_ret")) goto cleanup; if (CHECK(bss->ran_subtests == 0, "check if subtests ran", diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index b8b48cac2ac3..ab77596b64e3 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -7,6 +7,7 @@ #include "test_skmsg_load_helpers.skel.h" #include "test_sockmap_update.skel.h" #include "test_sockmap_invalid_update.skel.h" +#include "test_sockmap_skb_verdict_attach.skel.h" #include "bpf_iter_sockmap.skel.h" #define TCP_REPAIR 19 /* TCP sock is under repair right now */ @@ -281,6 +282,39 @@ out: bpf_iter_sockmap__destroy(skel); } +static void test_sockmap_skb_verdict_attach(enum bpf_attach_type first, + enum bpf_attach_type second) +{ + struct test_sockmap_skb_verdict_attach *skel; + int err, map, verdict; + + skel = test_sockmap_skb_verdict_attach__open_and_load(); + if (CHECK_FAIL(!skel)) { + perror("test_sockmap_skb_verdict_attach__open_and_load"); + return; + } + + verdict = bpf_program__fd(skel->progs.prog_skb_verdict); + map = bpf_map__fd(skel->maps.sock_map); + + err = bpf_prog_attach(verdict, map, first, 0); + if (CHECK_FAIL(err)) { + perror("bpf_prog_attach"); + goto out; + } + + err = bpf_prog_attach(verdict, map, second, 0); + assert(err == -1 && errno == EBUSY); + + err = bpf_prog_detach2(verdict, map, first); + if (CHECK_FAIL(err)) { + perror("bpf_prog_detach2"); + goto out; + } +out: + test_sockmap_skb_verdict_attach__destroy(skel); +} + void test_sockmap_basic(void) { if (test__start_subtest("sockmap create_update_free")) @@ -301,4 +335,10 @@ void test_sockmap_basic(void) test_sockmap_copy(BPF_MAP_TYPE_SOCKMAP); if (test__start_subtest("sockhash copy")) test_sockmap_copy(BPF_MAP_TYPE_SOCKHASH); + if (test__start_subtest("sockmap skb_verdict attach")) { + test_sockmap_skb_verdict_attach(BPF_SK_SKB_VERDICT, + BPF_SK_SKB_STREAM_VERDICT); + test_sockmap_skb_verdict_attach(BPF_SK_SKB_STREAM_VERDICT, + BPF_SK_SKB_VERDICT); + } } diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index d7d65a700799..648d9ae898d2 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -1014,8 +1014,8 @@ static void test_skb_redir_to_connected(struct test_sockmap_listen *skel, struct bpf_map *inner_map, int family, int sotype) { - int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); - int parser = bpf_program__fd(skel->progs.prog_skb_parser); + int verdict = bpf_program__fd(skel->progs.prog_stream_verdict); + int parser = bpf_program__fd(skel->progs.prog_stream_parser); int verdict_map = bpf_map__fd(skel->maps.verdict_map); int sock_map = bpf_map__fd(inner_map); int err; @@ -1125,8 +1125,8 @@ static void test_skb_redir_to_listening(struct test_sockmap_listen *skel, struct bpf_map *inner_map, int family, int sotype) { - int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); - int parser = bpf_program__fd(skel->progs.prog_skb_parser); + int verdict = bpf_program__fd(skel->progs.prog_stream_verdict); + int parser = bpf_program__fd(skel->progs.prog_stream_parser); int verdict_map = bpf_map__fd(skel->maps.verdict_map); int sock_map = bpf_map__fd(inner_map); int err; @@ -1603,6 +1603,141 @@ static void test_reuseport(struct test_sockmap_listen *skel, } } +static void udp_redir_to_connected(int family, int sotype, int sock_mapfd, + int verd_mapfd, enum redir_mode mode) +{ + const char *log_prefix = redir_mode_str(mode); + struct sockaddr_storage addr; + int c0, c1, p0, p1; + unsigned int pass; + socklen_t len; + int err, n; + u64 value; + u32 key; + char b; + + zero_verdict_count(verd_mapfd); + + p0 = socket_loopback(family, sotype | SOCK_NONBLOCK); + if (p0 < 0) + return; + len = sizeof(addr); + err = xgetsockname(p0, sockaddr(&addr), &len); + if (err) + goto close_peer0; + + c0 = xsocket(family, sotype | SOCK_NONBLOCK, 0); + if (c0 < 0) + goto close_peer0; + err = xconnect(c0, sockaddr(&addr), len); + if (err) + goto close_cli0; + err = xgetsockname(c0, sockaddr(&addr), &len); + if (err) + goto close_cli0; + err = xconnect(p0, sockaddr(&addr), len); + if (err) + goto close_cli0; + + p1 = socket_loopback(family, sotype | SOCK_NONBLOCK); + if (p1 < 0) + goto close_cli0; + err = xgetsockname(p1, sockaddr(&addr), &len); + if (err) + goto close_cli0; + + c1 = xsocket(family, sotype | SOCK_NONBLOCK, 0); + if (c1 < 0) + goto close_peer1; + err = xconnect(c1, sockaddr(&addr), len); + if (err) + goto close_cli1; + err = xgetsockname(c1, sockaddr(&addr), &len); + if (err) + goto close_cli1; + err = xconnect(p1, sockaddr(&addr), len); + if (err) + goto close_cli1; + + key = 0; + value = p0; + err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); + if (err) + goto close_cli1; + + key = 1; + value = p1; + err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); + if (err) + goto close_cli1; + + n = write(c1, "a", 1); + if (n < 0) + FAIL_ERRNO("%s: write", log_prefix); + if (n == 0) + FAIL("%s: incomplete write", log_prefix); + if (n < 1) + goto close_cli1; + + key = SK_PASS; + err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass); + if (err) + goto close_cli1; + if (pass != 1) + FAIL("%s: want pass count 1, have %d", log_prefix, pass); + + n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1); + if (n < 0) + FAIL_ERRNO("%s: read", log_prefix); + if (n == 0) + FAIL("%s: incomplete read", log_prefix); + +close_cli1: + xclose(c1); +close_peer1: + xclose(p1); +close_cli0: + xclose(c0); +close_peer0: + xclose(p0); +} + +static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel, + struct bpf_map *inner_map, int family) +{ + int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); + int verdict_map = bpf_map__fd(skel->maps.verdict_map); + int sock_map = bpf_map__fd(inner_map); + int err; + + err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0); + if (err) + return; + + skel->bss->test_ingress = false; + udp_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map, + REDIR_EGRESS); + skel->bss->test_ingress = true; + udp_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map, + REDIR_INGRESS); + + xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); +} + +static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map, + int family) +{ + const char *family_name, *map_name; + char s[MAX_TEST_NAME]; + + family_name = family_str(family); + map_name = map_type_str(map); + snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__); + if (!test__start_subtest(s)) + return; + udp_skb_redir_to_connected(skel, map, family); +} + static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map, int family) { @@ -1611,6 +1746,7 @@ static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map, test_redir(skel, map, family, SOCK_STREAM); test_reuseport(skel, map, family, SOCK_STREAM); test_reuseport(skel, map, family, SOCK_DGRAM); + test_udp_redir(skel, map, family); } void test_sockmap_listen(void) diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c index d5b44b135c00..4b937e5dbaca 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c @@ -3,6 +3,7 @@ #include "cgroup_helpers.h" #include <linux/tcp.h> +#include "sockopt_sk.skel.h" #ifndef SOL_TCP #define SOL_TCP IPPROTO_TCP @@ -191,60 +192,30 @@ err: return -1; } -static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title) +static void run_test(int cgroup_fd) { - enum bpf_attach_type attach_type; - enum bpf_prog_type prog_type; - struct bpf_program *prog; - int err; + struct sockopt_sk *skel; - err = libbpf_prog_type_by_name(title, &prog_type, &attach_type); - if (err) { - log_err("Failed to deduct types for %s BPF program", title); - return -1; - } + skel = sockopt_sk__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; - prog = bpf_object__find_program_by_title(obj, title); - if (!prog) { - log_err("Failed to find %s BPF program", title); - return -1; - } - - err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd, - attach_type, 0); - if (err) { - log_err("Failed to attach %s BPF program", title); - return -1; - } - - return 0; -} - -static void run_test(int cgroup_fd) -{ - struct bpf_prog_load_attr attr = { - .file = "./sockopt_sk.o", - }; - struct bpf_object *obj; - int ignored; - int err; - - err = bpf_prog_load_xattr(&attr, &obj, &ignored); - if (CHECK_FAIL(err)) - return; + skel->bss->page_size = getpagesize(); - err = prog_attach(obj, cgroup_fd, "cgroup/getsockopt"); - if (CHECK_FAIL(err)) - goto close_bpf_object; + skel->links._setsockopt = + bpf_program__attach_cgroup(skel->progs._setsockopt, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links._setsockopt, "setsockopt_link")) + goto cleanup; - err = prog_attach(obj, cgroup_fd, "cgroup/setsockopt"); - if (CHECK_FAIL(err)) - goto close_bpf_object; + skel->links._getsockopt = + bpf_program__attach_cgroup(skel->progs._getsockopt, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links._getsockopt, "getsockopt_link")) + goto cleanup; - CHECK_FAIL(getsetsockopt()); + ASSERT_OK(getsetsockopt(), "getsetsockopt"); -close_bpf_object: - bpf_object__close(obj); +cleanup: + sockopt_sk__destroy(skel); } void test_sockopt_sk(void) diff --git a/tools/testing/selftests/bpf/prog_tests/static_linked.c b/tools/testing/selftests/bpf/prog_tests/static_linked.c new file mode 100644 index 000000000000..46556976dccc --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/static_linked.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ + +#include <test_progs.h> +#include "test_static_linked.skel.h" + +void test_static_linked(void) +{ + int err; + struct test_static_linked* skel; + + skel = test_static_linked__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + skel->rodata->rovar1 = 1; + skel->bss->static_var1 = 2; + skel->bss->static_var11 = 3; + + skel->rodata->rovar2 = 4; + skel->bss->static_var2 = 5; + skel->bss->static_var22 = 6; + + err = test_static_linked__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + err = test_static_linked__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto cleanup; + + /* trigger */ + usleep(1); + + ASSERT_EQ(skel->bss->var1, 1 * 2 + 2 + 3, "var1"); + ASSERT_EQ(skel->bss->var2, 4 * 3 + 5 + 6, "var2"); + +cleanup: + test_static_linked__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c new file mode 100644 index 000000000000..035c263aab1b --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#define _GNU_SOURCE /* See feature_test_macros(7) */ +#include <unistd.h> +#include <sys/syscall.h> /* For SYS_xxx definitions */ +#include <sys/types.h> +#include <test_progs.h> +#include "task_local_storage.skel.h" +#include "task_local_storage_exit_creds.skel.h" +#include "task_ls_recursion.skel.h" + +static void test_sys_enter_exit(void) +{ + struct task_local_storage *skel; + int err; + + skel = task_local_storage__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + skel->bss->target_pid = syscall(SYS_gettid); + + err = task_local_storage__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto out; + + syscall(SYS_gettid); + syscall(SYS_gettid); + + /* 3x syscalls: 1x attach and 2x gettid */ + ASSERT_EQ(skel->bss->enter_cnt, 3, "enter_cnt"); + ASSERT_EQ(skel->bss->exit_cnt, 3, "exit_cnt"); + ASSERT_EQ(skel->bss->mismatch_cnt, 0, "mismatch_cnt"); +out: + task_local_storage__destroy(skel); +} + +static void test_exit_creds(void) +{ + struct task_local_storage_exit_creds *skel; + int err; + + skel = task_local_storage_exit_creds__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + err = task_local_storage_exit_creds__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto out; + + /* trigger at least one exit_creds() */ + if (CHECK_FAIL(system("ls > /dev/null"))) + goto out; + + /* sync rcu to make sure exit_creds() is called for "ls" */ + kern_sync_rcu(); + ASSERT_EQ(skel->bss->valid_ptr_count, 0, "valid_ptr_count"); + ASSERT_NEQ(skel->bss->null_ptr_count, 0, "null_ptr_count"); +out: + task_local_storage_exit_creds__destroy(skel); +} + +static void test_recursion(void) +{ + struct task_ls_recursion *skel; + int err; + + skel = task_ls_recursion__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + err = task_ls_recursion__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto out; + + /* trigger sys_enter, make sure it does not cause deadlock */ + syscall(SYS_gettid); + +out: + task_ls_recursion__destroy(skel); +} + +void test_task_local_storage(void) +{ + if (test__start_subtest("sys_enter_exit")) + test_sys_enter_exit(); + if (test__start_subtest("exit_creds")) + test_exit_creds(); + if (test__start_subtest("recursion")) + test_recursion(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_ima.c b/tools/testing/selftests/bpf/prog_tests/test_ima.c index b54bc0c351b7..0252f61d611a 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_ima.c +++ b/tools/testing/selftests/bpf/prog_tests/test_ima.c @@ -68,7 +68,8 @@ void test_test_ima(void) goto close_prog; snprintf(cmd, sizeof(cmd), "./ima_setup.sh setup %s", measured_dir); - if (CHECK_FAIL(system(cmd))) + err = system(cmd); + if (CHECK(err, "failed to run command", "%s, errno = %d\n", cmd, errno)) goto close_clean; err = run_measured_process(measured_dir, &skel->bss->monitored_pid); @@ -81,7 +82,8 @@ void test_test_ima(void) close_clean: snprintf(cmd, sizeof(cmd), "./ima_setup.sh cleanup %s", measured_dir); - CHECK_FAIL(system(cmd)); + err = system(cmd); + CHECK(err, "failed to run command", "%s, errno = %d\n", cmd, errno); close_prog: ima__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/test_lsm.c b/tools/testing/selftests/bpf/prog_tests/test_lsm.c index 2755e4f81499..244c01125126 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_lsm.c +++ b/tools/testing/selftests/bpf/prog_tests/test_lsm.c @@ -51,43 +51,64 @@ int exec_cmd(int *monitored_pid) return -EINVAL; } -void test_test_lsm(void) +static int test_lsm(struct lsm *skel) { - struct lsm *skel = NULL; - int err, duration = 0; + struct bpf_link *link; int buf = 1234; - - skel = lsm__open_and_load(); - if (CHECK(!skel, "skel_load", "lsm skeleton failed\n")) - goto close_prog; + int err; err = lsm__attach(skel); - if (CHECK(err, "attach", "lsm attach failed: %d\n", err)) - goto close_prog; + if (!ASSERT_OK(err, "attach")) + return err; + + /* Check that already linked program can't be attached again. */ + link = bpf_program__attach(skel->progs.test_int_hook); + if (!ASSERT_ERR_PTR(link, "attach_link")) + return -1; err = exec_cmd(&skel->bss->monitored_pid); - if (CHECK(err < 0, "exec_cmd", "err %d errno %d\n", err, errno)) - goto close_prog; + if (!ASSERT_OK(err, "exec_cmd")) + return err; - CHECK(skel->bss->bprm_count != 1, "bprm_count", "bprm_count = %d\n", - skel->bss->bprm_count); + ASSERT_EQ(skel->bss->bprm_count, 1, "bprm_count"); skel->bss->monitored_pid = getpid(); err = stack_mprotect(); - if (CHECK(errno != EPERM, "stack_mprotect", "want err=EPERM, got %d\n", - errno)) - goto close_prog; + if (!ASSERT_EQ(errno, EPERM, "stack_mprotect")) + return err; - CHECK(skel->bss->mprotect_count != 1, "mprotect_count", - "mprotect_count = %d\n", skel->bss->mprotect_count); + ASSERT_EQ(skel->bss->mprotect_count, 1, "mprotect_count"); syscall(__NR_setdomainname, &buf, -2L); syscall(__NR_setdomainname, 0, -3L); syscall(__NR_setdomainname, ~0L, -4L); - CHECK(skel->bss->copy_test != 3, "copy_test", - "copy_test = %d\n", skel->bss->copy_test); + ASSERT_EQ(skel->bss->copy_test, 3, "copy_test"); + + lsm__detach(skel); + + skel->bss->copy_test = 0; + skel->bss->bprm_count = 0; + skel->bss->mprotect_count = 0; + return 0; +} + +void test_test_lsm(void) +{ + struct lsm *skel = NULL; + int err; + + skel = lsm__open_and_load(); + if (!ASSERT_OK_PTR(skel, "lsm_skel_load")) + goto close_prog; + + err = test_lsm(skel); + if (!ASSERT_OK(err, "test_lsm_first_attach")) + goto close_prog; + + err = test_lsm(skel); + ASSERT_OK(err, "test_lsm_second_attach"); close_prog: lsm__destroy(skel); diff --git a/tools/testing/selftests/bpf/progs/bind4_prog.c b/tools/testing/selftests/bpf/progs/bind4_prog.c index 115a3b0ad984..474c6a62078a 100644 --- a/tools/testing/selftests/bpf/progs/bind4_prog.c +++ b/tools/testing/selftests/bpf/progs/bind4_prog.c @@ -57,6 +57,27 @@ static __inline int bind_to_device(struct bpf_sock_addr *ctx) return 0; } +static __inline int bind_reuseport(struct bpf_sock_addr *ctx) +{ + int val = 1; + + if (bpf_setsockopt(ctx, SOL_SOCKET, SO_REUSEPORT, + &val, sizeof(val))) + return 1; + if (bpf_getsockopt(ctx, SOL_SOCKET, SO_REUSEPORT, + &val, sizeof(val)) || !val) + return 1; + val = 0; + if (bpf_setsockopt(ctx, SOL_SOCKET, SO_REUSEPORT, + &val, sizeof(val))) + return 1; + if (bpf_getsockopt(ctx, SOL_SOCKET, SO_REUSEPORT, + &val, sizeof(val)) || val) + return 1; + + return 0; +} + static __inline int misc_opts(struct bpf_sock_addr *ctx, int opt) { int old, tmp, new = 0xeb9f; @@ -127,6 +148,10 @@ int bind_v4_prog(struct bpf_sock_addr *ctx) if (misc_opts(ctx, SO_MARK) || misc_opts(ctx, SO_PRIORITY)) return 0; + /* Set reuseport and unset */ + if (bind_reuseport(ctx)) + return 0; + ctx->user_ip4 = bpf_htonl(SERV4_REWRITE_IP); ctx->user_port = bpf_htons(SERV4_REWRITE_PORT); diff --git a/tools/testing/selftests/bpf/progs/bind6_prog.c b/tools/testing/selftests/bpf/progs/bind6_prog.c index 4c0d348034b9..c19cfa869f30 100644 --- a/tools/testing/selftests/bpf/progs/bind6_prog.c +++ b/tools/testing/selftests/bpf/progs/bind6_prog.c @@ -63,6 +63,27 @@ static __inline int bind_to_device(struct bpf_sock_addr *ctx) return 0; } +static __inline int bind_reuseport(struct bpf_sock_addr *ctx) +{ + int val = 1; + + if (bpf_setsockopt(ctx, SOL_SOCKET, SO_REUSEPORT, + &val, sizeof(val))) + return 1; + if (bpf_getsockopt(ctx, SOL_SOCKET, SO_REUSEPORT, + &val, sizeof(val)) || !val) + return 1; + val = 0; + if (bpf_setsockopt(ctx, SOL_SOCKET, SO_REUSEPORT, + &val, sizeof(val))) + return 1; + if (bpf_getsockopt(ctx, SOL_SOCKET, SO_REUSEPORT, + &val, sizeof(val)) || val) + return 1; + + return 0; +} + static __inline int misc_opts(struct bpf_sock_addr *ctx, int opt) { int old, tmp, new = 0xeb9f; @@ -141,6 +162,10 @@ int bind_v6_prog(struct bpf_sock_addr *ctx) if (misc_opts(ctx, SO_MARK) || misc_opts(ctx, SO_PRIORITY)) return 0; + /* Set reuseport and unset */ + if (bind_reuseport(ctx)) + return 0; + ctx->user_ip6[0] = bpf_htonl(SERV6_REWRITE_IP_0); ctx->user_ip6[1] = bpf_htonl(SERV6_REWRITE_IP_1); ctx->user_ip6[2] = bpf_htonl(SERV6_REWRITE_IP_2); diff --git a/tools/testing/selftests/bpf/progs/bpf_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cubic.c index 6939bfd8690f..f62df4d023f9 100644 --- a/tools/testing/selftests/bpf/progs/bpf_cubic.c +++ b/tools/testing/selftests/bpf/progs/bpf_cubic.c @@ -174,8 +174,8 @@ static __always_inline void bictcp_hystart_reset(struct sock *sk) * as long as it is used in one of the func ptr * under SEC(".struct_ops"). */ -SEC("struct_ops/bictcp_init") -void BPF_PROG(bictcp_init, struct sock *sk) +SEC("struct_ops/bpf_cubic_init") +void BPF_PROG(bpf_cubic_init, struct sock *sk) { struct bictcp *ca = inet_csk_ca(sk); @@ -192,7 +192,7 @@ void BPF_PROG(bictcp_init, struct sock *sk) * The remaining tcp-cubic functions have an easier way. */ SEC("no-sec-prefix-bictcp_cwnd_event") -void BPF_PROG(bictcp_cwnd_event, struct sock *sk, enum tcp_ca_event event) +void BPF_PROG(bpf_cubic_cwnd_event, struct sock *sk, enum tcp_ca_event event) { if (event == CA_EVENT_TX_START) { struct bictcp *ca = inet_csk_ca(sk); @@ -384,7 +384,7 @@ tcp_friendliness: } /* Or simply use the BPF_STRUCT_OPS to avoid the SEC boiler plate. */ -void BPF_STRUCT_OPS(bictcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked) +void BPF_STRUCT_OPS(bpf_cubic_cong_avoid, struct sock *sk, __u32 ack, __u32 acked) { struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); @@ -403,7 +403,7 @@ void BPF_STRUCT_OPS(bictcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked) tcp_cong_avoid_ai(tp, ca->cnt, acked); } -__u32 BPF_STRUCT_OPS(bictcp_recalc_ssthresh, struct sock *sk) +__u32 BPF_STRUCT_OPS(bpf_cubic_recalc_ssthresh, struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); @@ -420,7 +420,7 @@ __u32 BPF_STRUCT_OPS(bictcp_recalc_ssthresh, struct sock *sk) return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U); } -void BPF_STRUCT_OPS(bictcp_state, struct sock *sk, __u8 new_state) +void BPF_STRUCT_OPS(bpf_cubic_state, struct sock *sk, __u8 new_state) { if (new_state == TCP_CA_Loss) { bictcp_reset(inet_csk_ca(sk)); @@ -496,7 +496,7 @@ static __always_inline void hystart_update(struct sock *sk, __u32 delay) } } -void BPF_STRUCT_OPS(bictcp_acked, struct sock *sk, +void BPF_STRUCT_OPS(bpf_cubic_acked, struct sock *sk, const struct ack_sample *sample) { const struct tcp_sock *tp = tcp_sk(sk); @@ -525,21 +525,21 @@ void BPF_STRUCT_OPS(bictcp_acked, struct sock *sk, hystart_update(sk, delay); } -__u32 BPF_STRUCT_OPS(tcp_reno_undo_cwnd, struct sock *sk) -{ - const struct tcp_sock *tp = tcp_sk(sk); +extern __u32 tcp_reno_undo_cwnd(struct sock *sk) __ksym; - return max(tp->snd_cwnd, tp->prior_cwnd); +__u32 BPF_STRUCT_OPS(bpf_cubic_undo_cwnd, struct sock *sk) +{ + return tcp_reno_undo_cwnd(sk); } SEC(".struct_ops") struct tcp_congestion_ops cubic = { - .init = (void *)bictcp_init, - .ssthresh = (void *)bictcp_recalc_ssthresh, - .cong_avoid = (void *)bictcp_cong_avoid, - .set_state = (void *)bictcp_state, - .undo_cwnd = (void *)tcp_reno_undo_cwnd, - .cwnd_event = (void *)bictcp_cwnd_event, - .pkts_acked = (void *)bictcp_acked, + .init = (void *)bpf_cubic_init, + .ssthresh = (void *)bpf_cubic_recalc_ssthresh, + .cong_avoid = (void *)bpf_cubic_cong_avoid, + .set_state = (void *)bpf_cubic_state, + .undo_cwnd = (void *)bpf_cubic_undo_cwnd, + .cwnd_event = (void *)bpf_cubic_cwnd_event, + .pkts_acked = (void *)bpf_cubic_acked, .name = "bpf_cubic", }; diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c index 4dc1a967776a..fd42247da8b4 100644 --- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c +++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c @@ -194,22 +194,12 @@ __u32 BPF_PROG(dctcp_cwnd_undo, struct sock *sk) return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd); } -SEC("struct_ops/tcp_reno_cong_avoid") -void BPF_PROG(tcp_reno_cong_avoid, struct sock *sk, __u32 ack, __u32 acked) -{ - struct tcp_sock *tp = tcp_sk(sk); - - if (!tcp_is_cwnd_limited(sk)) - return; +extern void tcp_reno_cong_avoid(struct sock *sk, __u32 ack, __u32 acked) __ksym; - /* In "safe" area, increase. */ - if (tcp_in_slow_start(tp)) { - acked = tcp_slow_start(tp, acked); - if (!acked) - return; - } - /* In dangerous area, increase slowly. */ - tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked); +SEC("struct_ops/dctcp_reno_cong_avoid") +void BPF_PROG(dctcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked) +{ + tcp_reno_cong_avoid(sk, ack, acked); } SEC(".struct_ops") @@ -226,7 +216,7 @@ struct tcp_congestion_ops dctcp = { .in_ack_event = (void *)dctcp_update_alpha, .cwnd_event = (void *)dctcp_cwnd_event, .ssthresh = (void *)dctcp_ssthresh, - .cong_avoid = (void *)tcp_reno_cong_avoid, + .cong_avoid = (void *)dctcp_cong_avoid, .undo_cwnd = (void *)dctcp_cwnd_undo, .set_state = (void *)dctcp_state, .flags = TCP_CONG_NEEDS_ECN, diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c index 50e59a2e142e..43c36f5f7649 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c @@ -35,3 +35,30 @@ int dump_task_stack(struct bpf_iter__task *ctx) return 0; } + +SEC("iter/task") +int get_task_user_stacks(struct bpf_iter__task *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + struct task_struct *task = ctx->task; + uint64_t buf_sz = 0; + int64_t res; + + if (task == (void *)0) + return 0; + + res = bpf_get_task_stack(task, entries, + MAX_STACK_TRACE_DEPTH * SIZE_OF_ULONG, BPF_F_USER_STACK); + if (res <= 0) + return 0; + + buf_sz += res; + + /* If the verifier doesn't refine bpf_get_task_stack res, and instead + * assumes res is entirely unknown, this program will fail to load as + * the verifier will believe that max buf_sz value allows reading + * past the end of entries in bpf_seq_write call + */ + bpf_seq_write(seq, &entries, buf_sz); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_tcp_nogpl.c b/tools/testing/selftests/bpf/progs/bpf_tcp_nogpl.c new file mode 100644 index 000000000000..2ecd833dcd41 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_tcp_nogpl.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <linux/types.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_tcp_helpers.h" + +char _license[] SEC("license") = "X"; + +void BPF_STRUCT_OPS(nogpltcp_init, struct sock *sk) +{ +} + +SEC(".struct_ops") +struct tcp_congestion_ops bpf_nogpltcp = { + .init = (void *)nogpltcp_init, + .name = "bpf_nogpltcp", +}; diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_kind.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_kind.c deleted file mode 100644 index dd0ffa518f36..000000000000 --- a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_kind.c +++ /dev/null @@ -1,3 +0,0 @@ -#include "core_reloc_types.h" - -void f(struct core_reloc_existence___err_wrong_arr_kind x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_value_type.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_value_type.c deleted file mode 100644 index bc83372088ad..000000000000 --- a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_value_type.c +++ /dev/null @@ -1,3 +0,0 @@ -#include "core_reloc_types.h" - -void f(struct core_reloc_existence___err_wrong_arr_value_type x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_kind.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_kind.c deleted file mode 100644 index 917bec41be08..000000000000 --- a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_kind.c +++ /dev/null @@ -1,3 +0,0 @@ -#include "core_reloc_types.h" - -void f(struct core_reloc_existence___err_wrong_int_kind x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_sz.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_sz.c deleted file mode 100644 index 6ec7e6ec1c91..000000000000 --- a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_sz.c +++ /dev/null @@ -1,3 +0,0 @@ -#include "core_reloc_types.h" - -void f(struct core_reloc_existence___err_wrong_int_sz x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_type.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_type.c deleted file mode 100644 index 7bbcacf2b0d1..000000000000 --- a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_type.c +++ /dev/null @@ -1,3 +0,0 @@ -#include "core_reloc_types.h" - -void f(struct core_reloc_existence___err_wrong_int_type x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_struct_type.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_struct_type.c deleted file mode 100644 index f384dd38ec70..000000000000 --- a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_struct_type.c +++ /dev/null @@ -1,3 +0,0 @@ -#include "core_reloc_types.h" - -void f(struct core_reloc_existence___err_wrong_struct_type x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___wrong_field_defs.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___wrong_field_defs.c new file mode 100644 index 000000000000..d14b496190c3 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___wrong_field_defs.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_existence___wrong_field_defs x) {} diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c index 31975c96e2c9..8aaa24a00322 100644 --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c @@ -174,6 +174,12 @@ struct struct_in_struct { }; }; +struct struct_in_array {}; + +struct struct_in_array_typed {}; + +typedef struct struct_in_array_typed struct_in_array_t[2]; + struct struct_with_embedded_stuff { int a; struct { @@ -203,6 +209,14 @@ struct struct_with_embedded_stuff { } r[5]; struct struct_in_struct s[10]; int t[11]; + struct struct_in_array (*u)[2]; + struct_in_array_t *v; +}; + +struct float_struct { + float f; + const double *d; + volatile long double *ld; }; struct root_struct { @@ -219,6 +233,7 @@ struct root_struct { union_fwd_t *_12; union_fwd_ptr_t _13; struct struct_with_embedded_stuff _14; + struct float_struct _15; }; /* ------ END-EXPECTED-OUTPUT ------ */ diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h index 9a2850850121..c95c0cabe951 100644 --- a/tools/testing/selftests/bpf/progs/core_reloc_types.h +++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h @@ -700,27 +700,11 @@ struct core_reloc_existence___minimal { int a; }; -struct core_reloc_existence___err_wrong_int_sz { - short a; -}; - -struct core_reloc_existence___err_wrong_int_type { +struct core_reloc_existence___wrong_field_defs { + void *a; int b[1]; -}; - -struct core_reloc_existence___err_wrong_int_kind { struct{ int x; } c; -}; - -struct core_reloc_existence___err_wrong_arr_kind { int arr; -}; - -struct core_reloc_existence___err_wrong_arr_value_type { - short arr[1]; -}; - -struct core_reloc_existence___err_wrong_struct_type { int s; }; @@ -807,6 +791,7 @@ struct core_reloc_size_output { int arr_elem_sz; int ptr_sz; int enum_sz; + int float_sz; }; struct core_reloc_size { @@ -816,6 +801,7 @@ struct core_reloc_size { int arr_field[4]; void *ptr_field; enum { VALUE = 123 } enum_field; + float float_field; }; struct core_reloc_size___diff_sz { @@ -825,6 +811,7 @@ struct core_reloc_size___diff_sz { char arr_field[10]; void *ptr_field; enum { OTHER_VALUE = 0xFFFFFFFFFFFFFFFF } enum_field; + double float_field; }; /* Error case of two candidates with the fields (int_field) at the same @@ -839,6 +826,7 @@ struct core_reloc_size___err_ambiguous1 { int arr_field[4]; void *ptr_field; enum { VALUE___1 = 123 } enum_field; + float float_field; }; struct core_reloc_size___err_ambiguous2 { @@ -850,6 +838,7 @@ struct core_reloc_size___err_ambiguous2 { int arr_field[4]; void *ptr_field; enum { VALUE___2 = 123 } enum_field; + float float_field; }; /* diff --git a/tools/testing/selftests/bpf/progs/fentry_test.c b/tools/testing/selftests/bpf/progs/fentry_test.c index 5f645fdaba6f..52a550d281d9 100644 --- a/tools/testing/selftests/bpf/progs/fentry_test.c +++ b/tools/testing/selftests/bpf/progs/fentry_test.c @@ -64,7 +64,7 @@ __u64 test7_result = 0; SEC("fentry/bpf_fentry_test7") int BPF_PROG(test7, struct bpf_fentry_test_t *arg) { - if (arg == 0) + if (!arg) test7_result = 1; return 0; } diff --git a/tools/testing/selftests/bpf/progs/fexit_sleep.c b/tools/testing/selftests/bpf/progs/fexit_sleep.c new file mode 100644 index 000000000000..03a672d76353 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/fexit_sleep.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char LICENSE[] SEC("license") = "GPL"; + +int pid = 0; +int fentry_cnt = 0; +int fexit_cnt = 0; + +SEC("fentry/__x64_sys_nanosleep") +int BPF_PROG(nanosleep_fentry, const struct pt_regs *regs) +{ + if ((int)bpf_get_current_pid_tgid() != pid) + return 0; + + fentry_cnt++; + return 0; +} + +SEC("fexit/__x64_sys_nanosleep") +int BPF_PROG(nanosleep_fexit, const struct pt_regs *regs, int ret) +{ + if ((int)bpf_get_current_pid_tgid() != pid) + return 0; + + fexit_cnt++; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/fexit_test.c b/tools/testing/selftests/bpf/progs/fexit_test.c index 0952affb22a6..8f1ccb7302e1 100644 --- a/tools/testing/selftests/bpf/progs/fexit_test.c +++ b/tools/testing/selftests/bpf/progs/fexit_test.c @@ -65,7 +65,7 @@ __u64 test7_result = 0; SEC("fexit/bpf_fentry_test7") int BPF_PROG(test7, struct bpf_fentry_test_t *arg) { - if (arg == 0) + if (!arg) test7_result = 1; return 0; } @@ -74,7 +74,7 @@ __u64 test8_result = 0; SEC("fexit/bpf_fentry_test8") int BPF_PROG(test8, struct bpf_fentry_test_t *arg) { - if (arg->a == 0) + if (!arg->a) test8_result = 1; return 0; } diff --git a/tools/testing/selftests/bpf/progs/for_each_array_map_elem.c b/tools/testing/selftests/bpf/progs/for_each_array_map_elem.c new file mode 100644 index 000000000000..75e8e1069fe7 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/for_each_array_map_elem.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 3); + __type(key, __u32); + __type(value, __u64); +} arraymap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} percpu_map SEC(".maps"); + +struct callback_ctx { + int output; +}; + +static __u64 +check_array_elem(struct bpf_map *map, __u32 *key, __u64 *val, + struct callback_ctx *data) +{ + data->output += *val; + if (*key == 1) + return 1; /* stop the iteration */ + return 0; +} + +__u32 cpu = 0; +__u64 percpu_val = 0; + +static __u64 +check_percpu_elem(struct bpf_map *map, __u32 *key, __u64 *val, + struct callback_ctx *data) +{ + cpu = bpf_get_smp_processor_id(); + percpu_val = *val; + return 0; +} + +u32 arraymap_output = 0; + +SEC("classifier") +int test_pkt_access(struct __sk_buff *skb) +{ + struct callback_ctx data; + + data.output = 0; + bpf_for_each_map_elem(&arraymap, check_array_elem, &data, 0); + arraymap_output = data.output; + + bpf_for_each_map_elem(&percpu_map, check_percpu_elem, (void *)0, 0); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/for_each_hash_map_elem.c b/tools/testing/selftests/bpf/progs/for_each_hash_map_elem.c new file mode 100644 index 000000000000..913dd91aafff --- /dev/null +++ b/tools/testing/selftests/bpf/progs/for_each_hash_map_elem.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 3); + __type(key, __u32); + __type(value, __u64); +} hashmap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} percpu_map SEC(".maps"); + +struct callback_ctx { + struct __sk_buff *ctx; + int input; + int output; +}; + +static __u64 +check_hash_elem(struct bpf_map *map, __u32 *key, __u64 *val, + struct callback_ctx *data) +{ + struct __sk_buff *skb = data->ctx; + __u32 k; + __u64 v; + + if (skb) { + k = *key; + v = *val; + if (skb->len == 10000 && k == 10 && v == 10) + data->output = 3; /* impossible path */ + else + data->output = 4; + } else { + data->output = data->input; + bpf_map_delete_elem(map, key); + } + + return 0; +} + +__u32 cpu = 0; +__u32 percpu_called = 0; +__u32 percpu_key = 0; +__u64 percpu_val = 0; +int percpu_output = 0; + +static __u64 +check_percpu_elem(struct bpf_map *map, __u32 *key, __u64 *val, + struct callback_ctx *unused) +{ + struct callback_ctx data; + + percpu_called++; + cpu = bpf_get_smp_processor_id(); + percpu_key = *key; + percpu_val = *val; + + data.ctx = 0; + data.input = 100; + data.output = 0; + bpf_for_each_map_elem(&hashmap, check_hash_elem, &data, 0); + percpu_output = data.output; + + return 0; +} + +int hashmap_output = 0; +int hashmap_elems = 0; +int percpu_map_elems = 0; + +SEC("classifier") +int test_pkt_access(struct __sk_buff *skb) +{ + struct callback_ctx data; + + data.ctx = skb; + data.input = 10; + data.output = 0; + hashmap_elems = bpf_for_each_map_elem(&hashmap, check_hash_elem, &data, 0); + hashmap_output = data.output; + + percpu_map_elems = bpf_for_each_map_elem(&percpu_map, check_percpu_elem, + (void *)0, 0); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test.c b/tools/testing/selftests/bpf/progs/kfunc_call_test.c new file mode 100644 index 000000000000..470f8723e463 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/kfunc_call_test.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_tcp_helpers.h" + +extern int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym; +extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b, + __u32 c, __u64 d) __ksym; + +SEC("classifier") +int kfunc_call_test2(struct __sk_buff *skb) +{ + struct bpf_sock *sk = skb->sk; + + if (!sk) + return -1; + + sk = bpf_sk_fullsock(sk); + if (!sk) + return -1; + + return bpf_kfunc_call_test2((struct sock *)sk, 1, 2); +} + +SEC("classifier") +int kfunc_call_test1(struct __sk_buff *skb) +{ + struct bpf_sock *sk = skb->sk; + __u64 a = 1ULL << 32; + __u32 ret; + + if (!sk) + return -1; + + sk = bpf_sk_fullsock(sk); + if (!sk) + return -1; + + a = bpf_kfunc_call_test1((struct sock *)sk, 1, a | 2, 3, a | 4); + ret = a >> 32; /* ret should be 2 */ + ret += (__u32)a; /* ret should be 12 */ + + return ret; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c new file mode 100644 index 000000000000..b2dcb7d9cb03 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_tcp_helpers.h" + +extern const int bpf_prog_active __ksym; +extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b, + __u32 c, __u64 d) __ksym; +extern struct sock *bpf_kfunc_call_test3(struct sock *sk) __ksym; +int active_res = -1; +int sk_state = -1; + +int __noinline f1(struct __sk_buff *skb) +{ + struct bpf_sock *sk = skb->sk; + int *active; + + if (!sk) + return -1; + + sk = bpf_sk_fullsock(sk); + if (!sk) + return -1; + + active = (int *)bpf_per_cpu_ptr(&bpf_prog_active, + bpf_get_smp_processor_id()); + if (active) + active_res = *active; + + sk_state = bpf_kfunc_call_test3((struct sock *)sk)->__sk_common.skc_state; + + return (__u32)bpf_kfunc_call_test1((struct sock *)sk, 1, 2, 3, 4); +} + +SEC("classifier") +int kfunc_call_test1(struct __sk_buff *skb) +{ + return f1(skb); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/linked_funcs1.c b/tools/testing/selftests/bpf/progs/linked_funcs1.c new file mode 100644 index 000000000000..b964ec1390c2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/linked_funcs1.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +/* weak and shared between two files */ +const volatile int my_tid __weak; +long syscall_id __weak; + +int output_val1; +int output_ctx1; +int output_weak1; + +/* same "subprog" name in all files, but it's ok because they all are static */ +static __noinline int subprog(int x) +{ + /* but different formula */ + return x * 1; +} + +/* Global functions can't be void */ +int set_output_val1(int x) +{ + output_val1 = x + subprog(x); + return x; +} + +/* This function can't be verified as global, as it assumes raw_tp/sys_enter + * context and accesses syscall id (second argument). So we mark it as + * __hidden, so that libbpf will mark it as static in the final object file, + * right before verifying it in the kernel. + * + * But we don't mark it as __hidden here, rather at extern site. __hidden is + * "contaminating" visibility, so it will get propagated from either extern or + * actual definition (including from the losing __weak definition). + */ +void set_output_ctx1(__u64 *ctx) +{ + output_ctx1 = ctx[1]; /* long id, same as in BPF_PROG below */ +} + +/* this weak instance should win because it's the first one */ +__weak int set_output_weak(int x) +{ + output_weak1 = x; + return x; +} + +extern int set_output_val2(int x); + +/* here we'll force set_output_ctx2() to be __hidden in the final obj file */ +__hidden extern void set_output_ctx2(__u64 *ctx); + +SEC("raw_tp/sys_enter") +int BPF_PROG(handler1, struct pt_regs *regs, long id) +{ + if (my_tid != (u32)bpf_get_current_pid_tgid() || id != syscall_id) + return 0; + + set_output_val2(1000); + set_output_ctx2(ctx); /* ctx definition is hidden in BPF_PROG macro */ + + /* keep input value the same across both files to avoid dependency on + * handler call order; differentiate by output_weak1 vs output_weak2. + */ + set_output_weak(42); + + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/linked_funcs2.c b/tools/testing/selftests/bpf/progs/linked_funcs2.c new file mode 100644 index 000000000000..575e958e60b7 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/linked_funcs2.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +/* weak and shared between both files */ +const volatile int my_tid __weak; +long syscall_id __weak; + +int output_val2; +int output_ctx2; +int output_weak2; /* should stay zero */ + +/* same "subprog" name in all files, but it's ok because they all are static */ +static __noinline int subprog(int x) +{ + /* but different formula */ + return x * 2; +} + +/* Global functions can't be void */ +int set_output_val2(int x) +{ + output_val2 = 2 * x + 2 * subprog(x); + return 2 * x; +} + +/* This function can't be verified as global, as it assumes raw_tp/sys_enter + * context and accesses syscall id (second argument). So we mark it as + * __hidden, so that libbpf will mark it as static in the final object file, + * right before verifying it in the kernel. + * + * But we don't mark it as __hidden here, rather at extern site. __hidden is + * "contaminating" visibility, so it will get propagated from either extern or + * actual definition (including from the losing __weak definition). + */ +void set_output_ctx2(__u64 *ctx) +{ + output_ctx2 = ctx[1]; /* long id, same as in BPF_PROG below */ +} + +/* this weak instance should lose, because it will be processed second */ +__weak int set_output_weak(int x) +{ + output_weak2 = x; + return 2 * x; +} + +extern int set_output_val1(int x); + +/* here we'll force set_output_ctx1() to be __hidden in the final obj file */ +__hidden extern void set_output_ctx1(__u64 *ctx); + +SEC("raw_tp/sys_enter") +int BPF_PROG(handler2, struct pt_regs *regs, long id) +{ + if (my_tid != (u32)bpf_get_current_pid_tgid() || id != syscall_id) + return 0; + + set_output_val1(2000); + set_output_ctx1(ctx); /* ctx definition is hidden in BPF_PROG macro */ + + /* keep input value the same across both files to avoid dependency on + * handler call order; differentiate by output_weak1 vs output_weak2. + */ + set_output_weak(42); + + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/linked_maps1.c b/tools/testing/selftests/bpf/progs/linked_maps1.c new file mode 100644 index 000000000000..52291515cc72 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/linked_maps1.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +struct my_key { long x; }; +struct my_value { long x; }; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct my_key); + __type(value, struct my_value); + __uint(max_entries, 16); +} map1 SEC(".maps"); + + /* Matches map2 definition in linked_maps2.c. Order of the attributes doesn't + * matter. + */ +typedef struct { + __uint(max_entries, 8); + __type(key, int); + __type(value, int); + __uint(type, BPF_MAP_TYPE_ARRAY); +} map2_t; + +extern map2_t map2 SEC(".maps"); + +/* This should be the winning map definition, but we have no way of verifying, + * so we just make sure that it links and works without errors + */ +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, int); + __uint(max_entries, 16); +} map_weak __weak SEC(".maps"); + +int output_first1; +int output_second1; +int output_weak1; + +SEC("raw_tp/sys_enter") +int BPF_PROG(handler_enter1) +{ + /* update values with key = 1 */ + int key = 1, val = 1; + struct my_key key_struct = { .x = 1 }; + struct my_value val_struct = { .x = 1000 }; + + bpf_map_update_elem(&map1, &key_struct, &val_struct, 0); + bpf_map_update_elem(&map2, &key, &val, 0); + bpf_map_update_elem(&map_weak, &key, &val, 0); + + return 0; +} + +SEC("raw_tp/sys_exit") +int BPF_PROG(handler_exit1) +{ + /* lookup values with key = 2, set in another file */ + int key = 2, *val; + struct my_key key_struct = { .x = 2 }; + struct my_value *value_struct; + + value_struct = bpf_map_lookup_elem(&map1, &key_struct); + if (value_struct) + output_first1 = value_struct->x; + + val = bpf_map_lookup_elem(&map2, &key); + if (val) + output_second1 = *val; + + val = bpf_map_lookup_elem(&map_weak, &key); + if (val) + output_weak1 = *val; + + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/linked_maps2.c b/tools/testing/selftests/bpf/progs/linked_maps2.c new file mode 100644 index 000000000000..0693687474ed --- /dev/null +++ b/tools/testing/selftests/bpf/progs/linked_maps2.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +/* modifiers and typedefs are ignored when comparing key/value types */ +typedef struct my_key { long x; } key_type; +typedef struct my_value { long x; } value_type; + +extern struct { + __uint(max_entries, 16); + __type(key, key_type); + __type(value, value_type); + __uint(type, BPF_MAP_TYPE_HASH); +} map1 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, int); + __uint(max_entries, 8); +} map2 SEC(".maps"); + +/* this definition will lose, but it has to exactly match the winner */ +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, int); + __uint(max_entries, 16); +} map_weak __weak SEC(".maps"); + +int output_first2; +int output_second2; +int output_weak2; + +SEC("raw_tp/sys_enter") +int BPF_PROG(handler_enter2) +{ + /* update values with key = 2 */ + int key = 2, val = 2; + key_type key_struct = { .x = 2 }; + value_type val_struct = { .x = 2000 }; + + bpf_map_update_elem(&map1, &key_struct, &val_struct, 0); + bpf_map_update_elem(&map2, &key, &val, 0); + bpf_map_update_elem(&map_weak, &key, &val, 0); + + return 0; +} + +SEC("raw_tp/sys_exit") +int BPF_PROG(handler_exit2) +{ + /* lookup values with key = 1, set in another file */ + int key = 1, *val; + key_type key_struct = { .x = 1 }; + value_type *value_struct; + + value_struct = bpf_map_lookup_elem(&map1, &key_struct); + if (value_struct) + output_first2 = value_struct->x; + + val = bpf_map_lookup_elem(&map2, &key); + if (val) + output_second2 = *val; + + val = bpf_map_lookup_elem(&map_weak, &key); + if (val) + output_weak2 = *val; + + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/linked_vars1.c b/tools/testing/selftests/bpf/progs/linked_vars1.c new file mode 100644 index 000000000000..ef9e9d0bb0ca --- /dev/null +++ b/tools/testing/selftests/bpf/progs/linked_vars1.c @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +extern int LINUX_KERNEL_VERSION __kconfig; +/* this weak extern will be strict due to the other file's strong extern */ +extern bool CONFIG_BPF_SYSCALL __kconfig __weak; +extern const void bpf_link_fops __ksym __weak; + +int input_bss1; +int input_data1 = 1; +const volatile int input_rodata1 = 11; + +int input_bss_weak __weak; +/* these two definitions should win */ +int input_data_weak __weak = 10; +const volatile int input_rodata_weak __weak = 100; + +extern int input_bss2; +extern int input_data2; +extern const int input_rodata2; + +int output_bss1; +int output_data1; +int output_rodata1; + +long output_sink1; + +static __noinline int get_bss_res(void) +{ + /* just make sure all the relocations work against .text as well */ + return input_bss1 + input_bss2 + input_bss_weak; +} + +SEC("raw_tp/sys_enter") +int BPF_PROG(handler1) +{ + output_bss1 = get_bss_res(); + output_data1 = input_data1 + input_data2 + input_data_weak; + output_rodata1 = input_rodata1 + input_rodata2 + input_rodata_weak; + + /* make sure we actually use above special externs, otherwise compiler + * will optimize them out + */ + output_sink1 = LINUX_KERNEL_VERSION + + CONFIG_BPF_SYSCALL + + (long)&bpf_link_fops; + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/linked_vars2.c b/tools/testing/selftests/bpf/progs/linked_vars2.c new file mode 100644 index 000000000000..e4f5bd388a3c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/linked_vars2.c @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +extern int LINUX_KERNEL_VERSION __kconfig; +/* when an extern is defined as both strong and weak, resulting symbol will be strong */ +extern bool CONFIG_BPF_SYSCALL __kconfig; +extern const void __start_BTF __ksym; + +int input_bss2; +int input_data2 = 2; +const volatile int input_rodata2 = 22; + +int input_bss_weak __weak; +/* these two weak variables should lose */ +int input_data_weak __weak = 20; +const volatile int input_rodata_weak __weak = 200; + +extern int input_bss1; +extern int input_data1; +extern const int input_rodata1; + +int output_bss2; +int output_data2; +int output_rodata2; + +int output_sink2; + +static __noinline int get_data_res(void) +{ + /* just make sure all the relocations work against .text as well */ + return input_data1 + input_data2 + input_data_weak; +} + +SEC("raw_tp/sys_enter") +int BPF_PROG(handler2) +{ + output_bss2 = input_bss1 + input_bss2 + input_bss_weak; + output_data2 = get_data_res(); + output_rodata2 = input_rodata1 + input_rodata2 + input_rodata_weak; + + /* make sure we actually use above special externs, otherwise compiler + * will optimize them out + */ + output_sink2 = LINUX_KERNEL_VERSION + + CONFIG_BPF_SYSCALL + + (long)&__start_BTF; + + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/loop6.c b/tools/testing/selftests/bpf/progs/loop6.c new file mode 100644 index 000000000000..38de0331e6b4 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/loop6.c @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/ptrace.h> +#include <stddef.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +/* typically virtio scsi has max SGs of 6 */ +#define VIRTIO_MAX_SGS 6 + +/* Verifier will fail with SG_MAX = 128. The failure can be + * workarounded with a smaller SG_MAX, e.g. 10. + */ +#define WORKAROUND +#ifdef WORKAROUND +#define SG_MAX 10 +#else +/* typically virtio blk has max SEG of 128 */ +#define SG_MAX 128 +#endif + +#define SG_CHAIN 0x01UL +#define SG_END 0x02UL + +struct scatterlist { + unsigned long page_link; + unsigned int offset; + unsigned int length; +}; + +#define sg_is_chain(sg) ((sg)->page_link & SG_CHAIN) +#define sg_is_last(sg) ((sg)->page_link & SG_END) +#define sg_chain_ptr(sg) \ + ((struct scatterlist *) ((sg)->page_link & ~(SG_CHAIN | SG_END))) + +static inline struct scatterlist *__sg_next(struct scatterlist *sgp) +{ + struct scatterlist sg; + + bpf_probe_read_kernel(&sg, sizeof(sg), sgp); + if (sg_is_last(&sg)) + return NULL; + + sgp++; + + bpf_probe_read_kernel(&sg, sizeof(sg), sgp); + if (sg_is_chain(&sg)) + sgp = sg_chain_ptr(&sg); + + return sgp; +} + +static inline struct scatterlist *get_sgp(struct scatterlist **sgs, int i) +{ + struct scatterlist *sgp; + + bpf_probe_read_kernel(&sgp, sizeof(sgp), sgs + i); + return sgp; +} + +int config = 0; +int result = 0; + +SEC("kprobe/virtqueue_add_sgs") +int BPF_KPROBE(trace_virtqueue_add_sgs, void *unused, struct scatterlist **sgs, + unsigned int out_sgs, unsigned int in_sgs) +{ + struct scatterlist *sgp = NULL; + __u64 length1 = 0, length2 = 0; + unsigned int i, n, len; + + if (config != 0) + return 0; + + for (i = 0; (i < VIRTIO_MAX_SGS) && (i < out_sgs); i++) { + for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX); + sgp = __sg_next(sgp)) { + bpf_probe_read_kernel(&len, sizeof(len), &sgp->length); + length1 += len; + n++; + } + } + + for (i = 0; (i < VIRTIO_MAX_SGS) && (i < in_sgs); i++) { + for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX); + sgp = __sg_next(sgp)) { + bpf_probe_read_kernel(&len, sizeof(len), &sgp->length); + length2 += len; + n++; + } + } + + config = 1; + result = length2 - length1; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c index d8850bc6a9f1..d1d304c980f0 100644 --- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c +++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c @@ -12,6 +12,7 @@ _Static_assert(MAX_ENTRIES < LOOP_BOUND, "MAX_ENTRIES must be < LOOP_BOUND"); enum bpf_map_type g_map_type = BPF_MAP_TYPE_UNSPEC; __u32 g_line = 0; +int page_size = 0; /* userspace should set it */ #define VERIFY_TYPE(type, func) ({ \ g_map_type = type; \ @@ -635,7 +636,6 @@ struct bpf_ringbuf_map { struct { __uint(type, BPF_MAP_TYPE_RINGBUF); - __uint(max_entries, 1 << 12); } m_ringbuf SEC(".maps"); static inline int check_ringbuf(void) @@ -643,7 +643,7 @@ static inline int check_ringbuf(void) struct bpf_ringbuf_map *ringbuf = (struct bpf_ringbuf_map *)&m_ringbuf; struct bpf_map *map = (struct bpf_map *)&m_ringbuf; - VERIFY(check(&ringbuf->map, map, 0, 0, 1 << 12)); + VERIFY(check(&ringbuf->map, map, 0, 0, page_size)); return 1; } diff --git a/tools/testing/selftests/bpf/progs/skb_pkt_end.c b/tools/testing/selftests/bpf/progs/skb_pkt_end.c index cf6823f42e80..7f2eaa2f89f8 100644 --- a/tools/testing/selftests/bpf/progs/skb_pkt_end.c +++ b/tools/testing/selftests/bpf/progs/skb_pkt_end.c @@ -4,7 +4,6 @@ #include <bpf/bpf_core_read.h> #include <bpf/bpf_helpers.h> -#define NULL 0 #define INLINE __always_inline #define skb_shorter(skb, len) ((void *)(long)(skb)->data + (len) > (void *)(long)skb->data_end) diff --git a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c index fdb4bf4408fa..eeaf6e75c9a2 100644 --- a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c +++ b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c @@ -8,18 +8,6 @@ int _version SEC("version") = 1; SEC("sk_msg1") int bpf_prog1(struct sk_msg_md *msg) { - void *data_end = (void *)(long) msg->data_end; - void *data = (void *)(long) msg->data; - - char *d; - - if (data + 8 > data_end) - return SK_DROP; - - bpf_printk("data length %i\n", (__u64)msg->data_end - (__u64)msg->data); - d = (char *)data; - bpf_printk("hello sendmsg hook %i %i\n", d[0], d[1]); - return SK_PASS; } diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c index d3597f81e6e9..8acdb99b5959 100644 --- a/tools/testing/selftests/bpf/progs/sockopt_sk.c +++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c @@ -6,11 +6,8 @@ #include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; -__u32 _version SEC("version") = 1; -#ifndef PAGE_SIZE -#define PAGE_SIZE 4096 -#endif +int page_size = 0; /* userspace should set it */ #ifndef SOL_TCP #define SOL_TCP IPPROTO_TCP @@ -90,7 +87,7 @@ int _getsockopt(struct bpf_sockopt *ctx) * program can only see the first PAGE_SIZE * bytes of data. */ - if (optval_end - optval != PAGE_SIZE) + if (optval_end - optval != page_size) return 0; /* EPERM, unexpected data size */ return 1; @@ -161,7 +158,7 @@ int _setsockopt(struct bpf_sockopt *ctx) if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) { /* Original optlen is larger than PAGE_SIZE. */ - if (ctx->optlen != PAGE_SIZE * 2) + if (ctx->optlen != page_size * 2) return 0; /* EPERM, unexpected data size */ if (optval + 1 > optval_end) @@ -175,7 +172,7 @@ int _setsockopt(struct bpf_sockopt *ctx) * program can only see the first PAGE_SIZE * bytes of data. */ - if (optval_end - optval != PAGE_SIZE) + if (optval_end - optval != page_size) return 0; /* EPERM, unexpected data size */ return 1; diff --git a/tools/testing/selftests/bpf/progs/task_local_storage.c b/tools/testing/selftests/bpf/progs/task_local_storage.c new file mode 100644 index 000000000000..80a0a20db88d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/task_local_storage.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, long); +} enter_id SEC(".maps"); + +#define MAGIC_VALUE 0xabcd1234 + +pid_t target_pid = 0; +int mismatch_cnt = 0; +int enter_cnt = 0; +int exit_cnt = 0; + +SEC("tp_btf/sys_enter") +int BPF_PROG(on_enter, struct pt_regs *regs, long id) +{ + struct task_struct *task; + long *ptr; + + task = bpf_get_current_task_btf(); + if (task->pid != target_pid) + return 0; + + ptr = bpf_task_storage_get(&enter_id, task, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!ptr) + return 0; + + __sync_fetch_and_add(&enter_cnt, 1); + *ptr = MAGIC_VALUE + enter_cnt; + + return 0; +} + +SEC("tp_btf/sys_exit") +int BPF_PROG(on_exit, struct pt_regs *regs, long id) +{ + struct task_struct *task; + long *ptr; + + task = bpf_get_current_task_btf(); + if (task->pid != target_pid) + return 0; + + ptr = bpf_task_storage_get(&enter_id, task, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!ptr) + return 0; + + __sync_fetch_and_add(&exit_cnt, 1); + if (*ptr != MAGIC_VALUE + exit_cnt) + __sync_fetch_and_add(&mismatch_cnt, 1); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c b/tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c new file mode 100644 index 000000000000..81758c0aef99 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, __u64); +} task_storage SEC(".maps"); + +int valid_ptr_count = 0; +int null_ptr_count = 0; + +SEC("fentry/exit_creds") +int BPF_PROG(trace_exit_creds, struct task_struct *task) +{ + __u64 *ptr; + + ptr = bpf_task_storage_get(&task_storage, task, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (ptr) + __sync_fetch_and_add(&valid_ptr_count, 1); + else + __sync_fetch_and_add(&null_ptr_count, 1); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/task_ls_recursion.c b/tools/testing/selftests/bpf/progs/task_ls_recursion.c new file mode 100644 index 000000000000..564583dca7c8 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/task_ls_recursion.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, long); +} map_a SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, long); +} map_b SEC(".maps"); + +SEC("fentry/bpf_local_storage_lookup") +int BPF_PROG(on_lookup) +{ + struct task_struct *task = bpf_get_current_task_btf(); + + bpf_task_storage_delete(&map_a, task); + bpf_task_storage_delete(&map_b, task); + return 0; +} + +SEC("fentry/bpf_local_storage_update") +int BPF_PROG(on_update) +{ + struct task_struct *task = bpf_get_current_task_btf(); + long *ptr; + + ptr = bpf_task_storage_get(&map_a, task, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (ptr) + *ptr += 1; + + ptr = bpf_task_storage_get(&map_b, task, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (ptr) + *ptr += 1; + + return 0; +} + +SEC("tp_btf/sys_enter") +int BPF_PROG(on_enter, struct pt_regs *regs, long id) +{ + struct task_struct *task; + long *ptr; + + task = bpf_get_current_task_btf(); + ptr = bpf_task_storage_get(&map_a, task, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (ptr) + *ptr = 200; + + ptr = bpf_task_storage_get(&map_b, task, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (ptr) + *ptr = 100; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_check_mtu.c b/tools/testing/selftests/bpf/progs/test_check_mtu.c index b7787b43f9db..c4a9bae96e75 100644 --- a/tools/testing/selftests/bpf/progs/test_check_mtu.c +++ b/tools/testing/selftests/bpf/progs/test_check_mtu.c @@ -105,6 +105,54 @@ int xdp_minus_delta(struct xdp_md *ctx) return retval; } +SEC("xdp") +int xdp_input_len(struct xdp_md *ctx) +{ + int retval = XDP_PASS; /* Expected retval on successful test */ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + __u32 ifindex = GLOBAL_USER_IFINDEX; + __u32 data_len = data_end - data; + + /* API allow user give length to check as input via mtu_len param, + * resulting MTU value is still output in mtu_len param after call. + * + * Input len is L3, like MTU and iph->tot_len. + * Remember XDP data_len is L2. + */ + __u32 mtu_len = data_len - ETH_HLEN; + + if (bpf_check_mtu(ctx, ifindex, &mtu_len, 0, 0)) + retval = XDP_ABORTED; + + global_bpf_mtu_xdp = mtu_len; + return retval; +} + +SEC("xdp") +int xdp_input_len_exceed(struct xdp_md *ctx) +{ + int retval = XDP_ABORTED; /* Fail */ + __u32 ifindex = GLOBAL_USER_IFINDEX; + int err; + + /* API allow user give length to check as input via mtu_len param, + * resulting MTU value is still output in mtu_len param after call. + * + * Input length value is L3 size like MTU. + */ + __u32 mtu_len = GLOBAL_USER_MTU; + + mtu_len += 1; /* Exceed with 1 */ + + err = bpf_check_mtu(ctx, ifindex, &mtu_len, 0, 0); + if (err == BPF_MTU_CHK_RET_FRAG_NEEDED) + retval = XDP_PASS ; /* Success in exceeding MTU check */ + + global_bpf_mtu_xdp = mtu_len; + return retval; +} + SEC("classifier") int tc_use_helper(struct __sk_buff *ctx) { @@ -196,3 +244,47 @@ int tc_minus_delta(struct __sk_buff *ctx) global_bpf_mtu_xdp = mtu_len; return retval; } + +SEC("classifier") +int tc_input_len(struct __sk_buff *ctx) +{ + int retval = BPF_OK; /* Expected retval on successful test */ + __u32 ifindex = GLOBAL_USER_IFINDEX; + + /* API allow user give length to check as input via mtu_len param, + * resulting MTU value is still output in mtu_len param after call. + * + * Input length value is L3 size. + */ + __u32 mtu_len = GLOBAL_USER_MTU; + + if (bpf_check_mtu(ctx, ifindex, &mtu_len, 0, 0)) + retval = BPF_DROP; + + global_bpf_mtu_xdp = mtu_len; + return retval; +} + +SEC("classifier") +int tc_input_len_exceed(struct __sk_buff *ctx) +{ + int retval = BPF_DROP; /* Fail */ + __u32 ifindex = GLOBAL_USER_IFINDEX; + int err; + + /* API allow user give length to check as input via mtu_len param, + * resulting MTU value is still output in mtu_len param after call. + * + * Input length value is L3 size like MTU. + */ + __u32 mtu_len = GLOBAL_USER_MTU; + + mtu_len += 1; /* Exceed with 1 */ + + err = bpf_check_mtu(ctx, ifindex, &mtu_len, 0, 0); + if (err == BPF_MTU_CHK_RET_FRAG_NEEDED) + retval = BPF_OK; /* Success in exceeding MTU check */ + + global_bpf_mtu_xdp = mtu_len; + return retval; +} diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_size.c b/tools/testing/selftests/bpf/progs/test_core_reloc_size.c index d7fb6cfc7891..7b2d576aeea1 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_size.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_size.c @@ -21,6 +21,7 @@ struct core_reloc_size_output { int arr_elem_sz; int ptr_sz; int enum_sz; + int float_sz; }; struct core_reloc_size { @@ -30,6 +31,7 @@ struct core_reloc_size { int arr_field[4]; void *ptr_field; enum { VALUE = 123 } enum_field; + float float_field; }; SEC("raw_tracepoint/sys_enter") @@ -45,6 +47,7 @@ int test_core_size(void *ctx) out->arr_elem_sz = bpf_core_field_size(in->arr_field[0]); out->ptr_sz = bpf_core_field_size(in->ptr_field); out->enum_sz = bpf_core_field_size(in->enum_field); + out->float_sz = bpf_core_field_size(in->float_field); return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_global_func10.c b/tools/testing/selftests/bpf/progs/test_global_func10.c index 61c2ae92ce41..97b7031d0e22 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func10.c +++ b/tools/testing/selftests/bpf/progs/test_global_func10.c @@ -14,7 +14,7 @@ struct Big { __noinline int foo(const struct Big *big) { - if (big == 0) + if (!big) return 0; return bpf_get_prandom_u32() < big->y; diff --git a/tools/testing/selftests/bpf/progs/test_mmap.c b/tools/testing/selftests/bpf/progs/test_mmap.c index 4eb42cff5fe9..5a5cc19a15bf 100644 --- a/tools/testing/selftests/bpf/progs/test_mmap.c +++ b/tools/testing/selftests/bpf/progs/test_mmap.c @@ -9,7 +9,6 @@ char _license[] SEC("license") = "GPL"; struct { __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, 4096); __uint(map_flags, BPF_F_MMAPABLE | BPF_F_RDONLY_PROG); __type(key, __u32); __type(value, char); @@ -17,7 +16,6 @@ struct { struct { __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, 512 * 4); /* at least 4 pages of data */ __uint(map_flags, BPF_F_MMAPABLE); __type(key, __u32); __type(value, __u64); diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf.c b/tools/testing/selftests/bpf/progs/test_ringbuf.c index 8ba9959b036b..6b3f288b7c63 100644 --- a/tools/testing/selftests/bpf/progs/test_ringbuf.c +++ b/tools/testing/selftests/bpf/progs/test_ringbuf.c @@ -15,7 +15,6 @@ struct sample { struct { __uint(type, BPF_MAP_TYPE_RINGBUF); - __uint(max_entries, 1 << 12); } ringbuf SEC(".maps"); /* inputs */ diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c index edf3b6953533..197b86546dca 100644 --- a/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c +++ b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c @@ -15,7 +15,6 @@ struct sample { struct ringbuf_map { __uint(type, BPF_MAP_TYPE_RINGBUF); - __uint(max_entries, 1 << 12); } ringbuf1 SEC(".maps"), ringbuf2 SEC(".maps"); @@ -31,6 +30,17 @@ struct { }, }; +struct { + __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); + __uint(max_entries, 1); + __type(key, int); + __array(values, struct ringbuf_map); +} ringbuf_hash SEC(".maps") = { + .values = { + [0] = &ringbuf1, + }, +}; + /* inputs */ int pid = 0; int target_ring = 0; diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup.c b/tools/testing/selftests/bpf/progs/test_sk_lookup.c index 1032b292af5b..ac6f7f205e25 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_lookup.c +++ b/tools/testing/selftests/bpf/progs/test_sk_lookup.c @@ -64,6 +64,10 @@ static const int PROG_DONE = 1; static const __u32 KEY_SERVER_A = SERVER_A; static const __u32 KEY_SERVER_B = SERVER_B; +static const __u16 SRC_PORT = bpf_htons(8008); +static const __u32 SRC_IP4 = IP4(127, 0, 0, 2); +static const __u32 SRC_IP6[] = IP6(0xfd000000, 0x0, 0x0, 0x00000002); + static const __u16 DST_PORT = 7007; /* Host byte order */ static const __u32 DST_IP4 = IP4(127, 0, 0, 1); static const __u32 DST_IP6[] = IP6(0xfd000000, 0x0, 0x0, 0x00000001); @@ -398,11 +402,12 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx) if (LSW(ctx->protocol, 0) != IPPROTO_TCP) return SK_DROP; - /* Narrow loads from remote_port field. Expect non-0 value. */ - if (LSB(ctx->remote_port, 0) == 0 && LSB(ctx->remote_port, 1) == 0 && - LSB(ctx->remote_port, 2) == 0 && LSB(ctx->remote_port, 3) == 0) + /* Narrow loads from remote_port field. Expect SRC_PORT. */ + if (LSB(ctx->remote_port, 0) != ((SRC_PORT >> 0) & 0xff) || + LSB(ctx->remote_port, 1) != ((SRC_PORT >> 8) & 0xff) || + LSB(ctx->remote_port, 2) != 0 || LSB(ctx->remote_port, 3) != 0) return SK_DROP; - if (LSW(ctx->remote_port, 0) == 0) + if (LSW(ctx->remote_port, 0) != SRC_PORT) return SK_DROP; /* Narrow loads from local_port field. Expect DST_PORT. */ @@ -415,11 +420,14 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx) /* Narrow loads from IPv4 fields */ if (v4) { - /* Expect non-0.0.0.0 in remote_ip4 */ - if (LSB(ctx->remote_ip4, 0) == 0 && LSB(ctx->remote_ip4, 1) == 0 && - LSB(ctx->remote_ip4, 2) == 0 && LSB(ctx->remote_ip4, 3) == 0) + /* Expect SRC_IP4 in remote_ip4 */ + if (LSB(ctx->remote_ip4, 0) != ((SRC_IP4 >> 0) & 0xff) || + LSB(ctx->remote_ip4, 1) != ((SRC_IP4 >> 8) & 0xff) || + LSB(ctx->remote_ip4, 2) != ((SRC_IP4 >> 16) & 0xff) || + LSB(ctx->remote_ip4, 3) != ((SRC_IP4 >> 24) & 0xff)) return SK_DROP; - if (LSW(ctx->remote_ip4, 0) == 0 && LSW(ctx->remote_ip4, 1) == 0) + if (LSW(ctx->remote_ip4, 0) != ((SRC_IP4 >> 0) & 0xffff) || + LSW(ctx->remote_ip4, 1) != ((SRC_IP4 >> 16) & 0xffff)) return SK_DROP; /* Expect DST_IP4 in local_ip4 */ @@ -448,20 +456,32 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx) /* Narrow loads from IPv6 fields */ if (!v4) { - /* Expect non-:: IP in remote_ip6 */ - if (LSB(ctx->remote_ip6[0], 0) == 0 && LSB(ctx->remote_ip6[0], 1) == 0 && - LSB(ctx->remote_ip6[0], 2) == 0 && LSB(ctx->remote_ip6[0], 3) == 0 && - LSB(ctx->remote_ip6[1], 0) == 0 && LSB(ctx->remote_ip6[1], 1) == 0 && - LSB(ctx->remote_ip6[1], 2) == 0 && LSB(ctx->remote_ip6[1], 3) == 0 && - LSB(ctx->remote_ip6[2], 0) == 0 && LSB(ctx->remote_ip6[2], 1) == 0 && - LSB(ctx->remote_ip6[2], 2) == 0 && LSB(ctx->remote_ip6[2], 3) == 0 && - LSB(ctx->remote_ip6[3], 0) == 0 && LSB(ctx->remote_ip6[3], 1) == 0 && - LSB(ctx->remote_ip6[3], 2) == 0 && LSB(ctx->remote_ip6[3], 3) == 0) + /* Expect SRC_IP6 in remote_ip6 */ + if (LSB(ctx->remote_ip6[0], 0) != ((SRC_IP6[0] >> 0) & 0xff) || + LSB(ctx->remote_ip6[0], 1) != ((SRC_IP6[0] >> 8) & 0xff) || + LSB(ctx->remote_ip6[0], 2) != ((SRC_IP6[0] >> 16) & 0xff) || + LSB(ctx->remote_ip6[0], 3) != ((SRC_IP6[0] >> 24) & 0xff) || + LSB(ctx->remote_ip6[1], 0) != ((SRC_IP6[1] >> 0) & 0xff) || + LSB(ctx->remote_ip6[1], 1) != ((SRC_IP6[1] >> 8) & 0xff) || + LSB(ctx->remote_ip6[1], 2) != ((SRC_IP6[1] >> 16) & 0xff) || + LSB(ctx->remote_ip6[1], 3) != ((SRC_IP6[1] >> 24) & 0xff) || + LSB(ctx->remote_ip6[2], 0) != ((SRC_IP6[2] >> 0) & 0xff) || + LSB(ctx->remote_ip6[2], 1) != ((SRC_IP6[2] >> 8) & 0xff) || + LSB(ctx->remote_ip6[2], 2) != ((SRC_IP6[2] >> 16) & 0xff) || + LSB(ctx->remote_ip6[2], 3) != ((SRC_IP6[2] >> 24) & 0xff) || + LSB(ctx->remote_ip6[3], 0) != ((SRC_IP6[3] >> 0) & 0xff) || + LSB(ctx->remote_ip6[3], 1) != ((SRC_IP6[3] >> 8) & 0xff) || + LSB(ctx->remote_ip6[3], 2) != ((SRC_IP6[3] >> 16) & 0xff) || + LSB(ctx->remote_ip6[3], 3) != ((SRC_IP6[3] >> 24) & 0xff)) return SK_DROP; - if (LSW(ctx->remote_ip6[0], 0) == 0 && LSW(ctx->remote_ip6[0], 1) == 0 && - LSW(ctx->remote_ip6[1], 0) == 0 && LSW(ctx->remote_ip6[1], 1) == 0 && - LSW(ctx->remote_ip6[2], 0) == 0 && LSW(ctx->remote_ip6[2], 1) == 0 && - LSW(ctx->remote_ip6[3], 0) == 0 && LSW(ctx->remote_ip6[3], 1) == 0) + if (LSW(ctx->remote_ip6[0], 0) != ((SRC_IP6[0] >> 0) & 0xffff) || + LSW(ctx->remote_ip6[0], 1) != ((SRC_IP6[0] >> 16) & 0xffff) || + LSW(ctx->remote_ip6[1], 0) != ((SRC_IP6[1] >> 0) & 0xffff) || + LSW(ctx->remote_ip6[1], 1) != ((SRC_IP6[1] >> 16) & 0xffff) || + LSW(ctx->remote_ip6[2], 0) != ((SRC_IP6[2] >> 0) & 0xffff) || + LSW(ctx->remote_ip6[2], 1) != ((SRC_IP6[2] >> 16) & 0xffff) || + LSW(ctx->remote_ip6[3], 0) != ((SRC_IP6[3] >> 0) & 0xffff) || + LSW(ctx->remote_ip6[3], 1) != ((SRC_IP6[3] >> 16) & 0xffff)) return SK_DROP; /* Expect DST_IP6 in local_ip6 */ if (LSB(ctx->local_ip6[0], 0) != ((DST_IP6[0] >> 0) & 0xff) || diff --git a/tools/testing/selftests/bpf/progs/test_snprintf.c b/tools/testing/selftests/bpf/progs/test_snprintf.c new file mode 100644 index 000000000000..951a0301c553 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_snprintf.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Google LLC. */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char num_out[64] = {}; +long num_ret = 0; + +char ip_out[64] = {}; +long ip_ret = 0; + +char sym_out[64] = {}; +long sym_ret = 0; + +char addr_out[64] = {}; +long addr_ret = 0; + +char str_out[64] = {}; +long str_ret = 0; + +char over_out[6] = {}; +long over_ret = 0; + +char pad_out[10] = {}; +long pad_ret = 0; + +char noarg_out[64] = {}; +long noarg_ret = 0; + +long nobuf_ret = 0; + +extern const void schedule __ksym; + +SEC("raw_tp/sys_enter") +int handler(const void *ctx) +{ + /* Convenient values to pretty-print */ + const __u8 ex_ipv4[] = {127, 0, 0, 1}; + const __u8 ex_ipv6[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; + static const char str1[] = "str1"; + static const char longstr[] = "longstr"; + + /* Integer types */ + num_ret = BPF_SNPRINTF(num_out, sizeof(num_out), + "%d %u %x %li %llu %lX", + -8, 9, 150, -424242, 1337, 0xDABBAD00); + /* IP addresses */ + ip_ret = BPF_SNPRINTF(ip_out, sizeof(ip_out), "%pi4 %pI6", + &ex_ipv4, &ex_ipv6); + /* Symbol lookup formatting */ + sym_ret = BPF_SNPRINTF(sym_out, sizeof(sym_out), "%ps %pS %pB", + &schedule, &schedule, &schedule); + /* Kernel pointers */ + addr_ret = BPF_SNPRINTF(addr_out, sizeof(addr_out), "%pK %px %p", + 0, 0xFFFF00000ADD4E55, 0xFFFF00000ADD4E55); + /* Strings embedding */ + str_ret = BPF_SNPRINTF(str_out, sizeof(str_out), "%s %+05s", + str1, longstr); + /* Overflow */ + over_ret = BPF_SNPRINTF(over_out, sizeof(over_out), "%%overflow"); + /* Padding of fixed width numbers */ + pad_ret = BPF_SNPRINTF(pad_out, sizeof(pad_out), "%5d %0900000X", 4, 4); + /* No args */ + noarg_ret = BPF_SNPRINTF(noarg_out, sizeof(noarg_out), "simple case"); + /* No buffer */ + nobuf_ret = BPF_SNPRINTF(NULL, 0, "only interested in length %d", 60); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_snprintf_single.c b/tools/testing/selftests/bpf/progs/test_snprintf_single.c new file mode 100644 index 000000000000..402adaf344f9 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_snprintf_single.c @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Google LLC. */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +/* The format string is filled from the userspace such that loading fails */ +static const char fmt[10]; + +SEC("raw_tp/sys_enter") +int handler(const void *ctx) +{ + unsigned long long arg = 42; + + bpf_snprintf(NULL, 0, fmt, &arg, sizeof(arg)); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c index a3a366c57ce1..a39eba9f5201 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c +++ b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c @@ -29,15 +29,16 @@ struct { } verdict_map SEC(".maps"); static volatile bool test_sockmap; /* toggled by user-space */ +static volatile bool test_ingress; /* toggled by user-space */ SEC("sk_skb/stream_parser") -int prog_skb_parser(struct __sk_buff *skb) +int prog_stream_parser(struct __sk_buff *skb) { return skb->len; } SEC("sk_skb/stream_verdict") -int prog_skb_verdict(struct __sk_buff *skb) +int prog_stream_verdict(struct __sk_buff *skb) { unsigned int *count; __u32 zero = 0; @@ -55,6 +56,27 @@ int prog_skb_verdict(struct __sk_buff *skb) return verdict; } +SEC("sk_skb/skb_verdict") +int prog_skb_verdict(struct __sk_buff *skb) +{ + unsigned int *count; + __u32 zero = 0; + int verdict; + + if (test_sockmap) + verdict = bpf_sk_redirect_map(skb, &sock_map, zero, + test_ingress ? BPF_F_INGRESS : 0); + else + verdict = bpf_sk_redirect_hash(skb, &sock_hash, &zero, + test_ingress ? BPF_F_INGRESS : 0); + + count = bpf_map_lookup_elem(&verdict_map, &verdict); + if (count) + (*count)++; + + return verdict; +} + SEC("sk_msg") int prog_msg_verdict(struct sk_msg_md *msg) { diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c b/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c new file mode 100644 index 000000000000..2d31f66e4f23 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 2); + __type(key, __u32); + __type(value, __u64); +} sock_map SEC(".maps"); + +SEC("sk_skb/skb_verdict") +int prog_skb_verdict(struct __sk_buff *skb) +{ + return SK_DROP; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_static_linked1.c b/tools/testing/selftests/bpf/progs/test_static_linked1.c new file mode 100644 index 000000000000..ea1a6c4c7172 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_static_linked1.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +/* 8-byte aligned .bss */ +static volatile long static_var1; +static volatile int static_var11; +int var1 = 0; +/* 4-byte aligned .rodata */ +const volatile int rovar1; + +/* same "subprog" name in both files */ +static __noinline int subprog(int x) +{ + /* but different formula */ + return x * 2; +} + +SEC("raw_tp/sys_enter") +int handler1(const void *ctx) +{ + var1 = subprog(rovar1) + static_var1 + static_var11; + + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; +int VERSION SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/test_static_linked2.c b/tools/testing/selftests/bpf/progs/test_static_linked2.c new file mode 100644 index 000000000000..54d8d1ab577c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_static_linked2.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +/* 4-byte aligned .bss */ +static volatile int static_var2; +static volatile int static_var22; +int var2 = 0; +/* 8-byte aligned .rodata */ +const volatile long rovar2; + +/* same "subprog" name in both files */ +static __noinline int subprog(int x) +{ + /* but different formula */ + return x * 3; +} + +SEC("raw_tp/sys_enter") +int handler2(const void *ctx) +{ + var2 = subprog(rovar2) + static_var2 + static_var22; + + return 0; +} + +/* different name and/or type of the variable doesn't matter */ +char _license[] SEC("license") = "GPL"; +int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c index 37bce7a7c394..84cd63259554 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c +++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c @@ -24,14 +24,29 @@ static const int cfg_port = 8000; static const int cfg_udp_src = 20000; +#define L2_PAD_SZ (sizeof(struct vxlanhdr) + ETH_HLEN) + #define UDP_PORT 5555 #define MPLS_OVER_UDP_PORT 6635 #define ETH_OVER_UDP_PORT 7777 +#define VXLAN_UDP_PORT 8472 + +#define EXTPROTO_VXLAN 0x1 + +#define VXLAN_N_VID (1u << 24) +#define VXLAN_VNI_MASK bpf_htonl((VXLAN_N_VID - 1) << 8) +#define VXLAN_FLAGS 0x8 +#define VXLAN_VNI 1 /* MPLS label 1000 with S bit (last label) set and ttl of 255. */ static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 | MPLS_LS_S_MASK | 0xff); +struct vxlanhdr { + __be32 vx_flags; + __be32 vx_vni; +} __attribute__((packed)); + struct gre_hdr { __be16 flags; __be16 protocol; @@ -45,13 +60,13 @@ union l4hdr { struct v4hdr { struct iphdr ip; union l4hdr l4hdr; - __u8 pad[16]; /* enough space for L2 header */ + __u8 pad[L2_PAD_SZ]; /* space for L2 header / vxlan header ... */ } __attribute__((packed)); struct v6hdr { struct ipv6hdr ip; union l4hdr l4hdr; - __u8 pad[16]; /* enough space for L2 header */ + __u8 pad[L2_PAD_SZ]; /* space for L2 header / vxlan header ... */ } __attribute__((packed)); static __always_inline void set_ipv4_csum(struct iphdr *iph) @@ -69,14 +84,15 @@ static __always_inline void set_ipv4_csum(struct iphdr *iph) iph->check = ~((csum & 0xffff) + (csum >> 16)); } -static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto, - __u16 l2_proto) +static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto, + __u16 l2_proto, __u16 ext_proto) { __u16 udp_dst = UDP_PORT; struct iphdr iph_inner; struct v4hdr h_outer; struct tcphdr tcph; int olen, l2_len; + __u8 *l2_hdr = NULL; int tcp_off; __u64 flags; @@ -141,7 +157,11 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto, break; case ETH_P_TEB: l2_len = ETH_HLEN; - udp_dst = ETH_OVER_UDP_PORT; + if (ext_proto & EXTPROTO_VXLAN) { + udp_dst = VXLAN_UDP_PORT; + l2_len += sizeof(struct vxlanhdr); + } else + udp_dst = ETH_OVER_UDP_PORT; break; } flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len); @@ -171,14 +191,26 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto, } /* add L2 encap (if specified) */ + l2_hdr = (__u8 *)&h_outer + olen; switch (l2_proto) { case ETH_P_MPLS_UC: - *((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label; + *(__u32 *)l2_hdr = mpls_label; break; case ETH_P_TEB: - if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen, - ETH_HLEN)) + flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH; + + if (ext_proto & EXTPROTO_VXLAN) { + struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr; + + vxlan_hdr->vx_flags = VXLAN_FLAGS; + vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8); + + l2_hdr += sizeof(struct vxlanhdr); + } + + if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN)) return TC_ACT_SHOT; + break; } olen += l2_len; @@ -214,14 +246,21 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto, return TC_ACT_OK; } -static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto, +static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto, __u16 l2_proto) { + return __encap_ipv4(skb, encap_proto, l2_proto, 0); +} + +static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto, + __u16 l2_proto, __u16 ext_proto) +{ __u16 udp_dst = UDP_PORT; struct ipv6hdr iph_inner; struct v6hdr h_outer; struct tcphdr tcph; int olen, l2_len; + __u8 *l2_hdr = NULL; __u16 tot_len; __u64 flags; @@ -249,7 +288,11 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto, break; case ETH_P_TEB: l2_len = ETH_HLEN; - udp_dst = ETH_OVER_UDP_PORT; + if (ext_proto & EXTPROTO_VXLAN) { + udp_dst = VXLAN_UDP_PORT; + l2_len += sizeof(struct vxlanhdr); + } else + udp_dst = ETH_OVER_UDP_PORT; break; } flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len); @@ -267,7 +310,7 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto, h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src); h_outer.l4hdr.udp.dest = bpf_htons(udp_dst); tot_len = bpf_ntohs(iph_inner.payload_len) + sizeof(iph_inner) + - sizeof(h_outer.l4hdr.udp); + sizeof(h_outer.l4hdr.udp) + l2_len; h_outer.l4hdr.udp.check = 0; h_outer.l4hdr.udp.len = bpf_htons(tot_len); break; @@ -278,13 +321,24 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto, } /* add L2 encap (if specified) */ + l2_hdr = (__u8 *)&h_outer + olen; switch (l2_proto) { case ETH_P_MPLS_UC: - *((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label; + *(__u32 *)l2_hdr = mpls_label; break; case ETH_P_TEB: - if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen, - ETH_HLEN)) + flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH; + + if (ext_proto & EXTPROTO_VXLAN) { + struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr; + + vxlan_hdr->vx_flags = VXLAN_FLAGS; + vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8); + + l2_hdr += sizeof(struct vxlanhdr); + } + + if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN)) return TC_ACT_SHOT; break; } @@ -309,6 +363,12 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto, return TC_ACT_OK; } +static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto, + __u16 l2_proto) +{ + return __encap_ipv6(skb, encap_proto, l2_proto, 0); +} + SEC("encap_ipip_none") int __encap_ipip_none(struct __sk_buff *skb) { @@ -372,6 +432,17 @@ int __encap_udp_eth(struct __sk_buff *skb) return TC_ACT_OK; } +SEC("encap_vxlan_eth") +int __encap_vxlan_eth(struct __sk_buff *skb) +{ + if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) + return __encap_ipv4(skb, IPPROTO_UDP, + ETH_P_TEB, + EXTPROTO_VXLAN); + else + return TC_ACT_OK; +} + SEC("encap_sit_none") int __encap_sit_none(struct __sk_buff *skb) { @@ -444,6 +515,17 @@ int __encap_ip6udp_eth(struct __sk_buff *skb) return TC_ACT_OK; } +SEC("encap_ip6vxlan_eth") +int __encap_ip6vxlan_eth(struct __sk_buff *skb) +{ + if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) + return __encap_ipv6(skb, IPPROTO_UDP, + ETH_P_TEB, + EXTPROTO_VXLAN); + else + return TC_ACT_OK; +} + static int decap_internal(struct __sk_buff *skb, int off, int len, char proto) { char buf[sizeof(struct v6hdr)]; @@ -479,6 +561,9 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto) case ETH_OVER_UDP_PORT: olen += ETH_HLEN; break; + case VXLAN_UDP_PORT: + olen += ETH_HLEN + sizeof(struct vxlanhdr); + break; } break; default: diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c index 9afe947cfae9..e7b673117436 100644 --- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c @@ -396,7 +396,7 @@ int _ip6vxlan_get_tunnel(struct __sk_buff *skb) SEC("geneve_set_tunnel") int _geneve_set_tunnel(struct __sk_buff *skb) { - int ret, ret2; + int ret; struct bpf_tunnel_key key; struct geneve_opt gopt; @@ -508,10 +508,8 @@ int _ip6geneve_get_tunnel(struct __sk_buff *skb) } ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt)); - if (ret < 0) { - ERROR(ret); - return TC_ACT_SHOT; - } + if (ret < 0) + gopt.opt_class = 0; bpf_trace_printk(fmt, sizeof(fmt), key.tunnel_id, key.remote_ipv4, gopt.opt_class); diff --git a/tools/testing/selftests/bpf/test_bpftool_build.sh b/tools/testing/selftests/bpf/test_bpftool_build.sh index 2db3c60e1e61..ac349a5cea7e 100755 --- a/tools/testing/selftests/bpf/test_bpftool_build.sh +++ b/tools/testing/selftests/bpf/test_bpftool_build.sh @@ -85,23 +85,6 @@ make_with_tmpdir() { echo } -make_doc_and_clean() { - echo -e "\$PWD: $PWD" - echo -e "command: make -s $* doc >/dev/null" - RST2MAN_OPTS="--exit-status=1" make $J -s $* doc - if [ $? -ne 0 ] ; then - ERROR=1 - printf "FAILURE: Errors or warnings when building documentation\n" - fi - ( - if [ $# -ge 1 ] ; then - cd ${@: -1} - fi - make -s doc-clean - ) - echo -} - echo "Trying to build bpftool" echo -e "... through kbuild\n" @@ -162,7 +145,3 @@ make_and_clean make_with_tmpdir OUTPUT make_with_tmpdir O - -echo -e "Checking documentation build\n" -# From tools/bpf/bpftool -make_doc_and_clean diff --git a/tools/testing/selftests/bpf/test_btf.h b/tools/testing/selftests/bpf/test_btf.h index 2023725f1962..e2394eea4b7f 100644 --- a/tools/testing/selftests/bpf/test_btf.h +++ b/tools/testing/selftests/bpf/test_btf.h @@ -66,4 +66,7 @@ #define BTF_FUNC_ENC(name, func_proto) \ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), func_proto) +#define BTF_TYPE_FLOAT_ENC(name, sz) \ + BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 0), sz) + #endif /* _TEST_BTF_H */ diff --git a/tools/testing/selftests/bpf/test_doc_build.sh b/tools/testing/selftests/bpf/test_doc_build.sh new file mode 100755 index 000000000000..7eb940a7b2eb --- /dev/null +++ b/tools/testing/selftests/bpf/test_doc_build.sh @@ -0,0 +1,13 @@ +#!/bin/bash +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + +# Assume script is located under tools/testing/selftests/bpf/. We want to start +# build attempts from the top of kernel repository. +SCRIPT_REL_PATH=$(realpath --relative-to=$PWD $0) +SCRIPT_REL_DIR=$(dirname $SCRIPT_REL_PATH) +KDIR_ROOT_DIR=$(realpath $PWD/$SCRIPT_REL_DIR/../../../../) +cd $KDIR_ROOT_DIR + +for tgt in docs docs-clean; do + make -s -C $PWD/$SCRIPT_REL_DIR $tgt; +done diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index f7c2fd89d01a..dda52cb649dc 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -130,6 +130,20 @@ extern int test__join_cgroup(const char *path); #define CHECK_ATTR(condition, tag, format...) \ _CHECK(condition, tag, tattr.duration, format) +#define ASSERT_TRUE(actual, name) ({ \ + static int duration = 0; \ + bool ___ok = (actual); \ + CHECK(!___ok, (name), "unexpected %s: got FALSE\n", (name)); \ + ___ok; \ +}) + +#define ASSERT_FALSE(actual, name) ({ \ + static int duration = 0; \ + bool ___ok = !(actual); \ + CHECK(!___ok, (name), "unexpected %s: got TRUE\n", (name)); \ + ___ok; \ +}) + #define ASSERT_EQ(actual, expected, name) ({ \ static int duration = 0; \ typeof(actual) ___act = (actual); \ @@ -152,6 +166,50 @@ extern int test__join_cgroup(const char *path); ___ok; \ }) +#define ASSERT_LT(actual, expected, name) ({ \ + static int duration = 0; \ + typeof(actual) ___act = (actual); \ + typeof(expected) ___exp = (expected); \ + bool ___ok = ___act < ___exp; \ + CHECK(!___ok, (name), \ + "unexpected %s: actual %lld >= expected %lld\n", \ + (name), (long long)(___act), (long long)(___exp)); \ + ___ok; \ +}) + +#define ASSERT_LE(actual, expected, name) ({ \ + static int duration = 0; \ + typeof(actual) ___act = (actual); \ + typeof(expected) ___exp = (expected); \ + bool ___ok = ___act <= ___exp; \ + CHECK(!___ok, (name), \ + "unexpected %s: actual %lld > expected %lld\n", \ + (name), (long long)(___act), (long long)(___exp)); \ + ___ok; \ +}) + +#define ASSERT_GT(actual, expected, name) ({ \ + static int duration = 0; \ + typeof(actual) ___act = (actual); \ + typeof(expected) ___exp = (expected); \ + bool ___ok = ___act > ___exp; \ + CHECK(!___ok, (name), \ + "unexpected %s: actual %lld <= expected %lld\n", \ + (name), (long long)(___act), (long long)(___exp)); \ + ___ok; \ +}) + +#define ASSERT_GE(actual, expected, name) ({ \ + static int duration = 0; \ + typeof(actual) ___act = (actual); \ + typeof(expected) ___exp = (expected); \ + bool ___ok = ___act >= ___exp; \ + CHECK(!___ok, (name), \ + "unexpected %s: actual %lld < expected %lld\n", \ + (name), (long long)(___act), (long long)(___exp)); \ + ___ok; \ +}) + #define ASSERT_STREQ(actual, expected, name) ({ \ static int duration = 0; \ const char *___act = actual; \ @@ -167,7 +225,8 @@ extern int test__join_cgroup(const char *path); static int duration = 0; \ long long ___res = (res); \ bool ___ok = ___res == 0; \ - CHECK(!___ok, (name), "unexpected error: %lld\n", ___res); \ + CHECK(!___ok, (name), "unexpected error: %lld (errno %d)\n", \ + ___res, errno); \ ___ok; \ }) @@ -199,7 +258,7 @@ extern int test__join_cgroup(const char *path); #define ASSERT_ERR_PTR(ptr, name) ({ \ static int duration = 0; \ const void *___res = (ptr); \ - bool ___ok = IS_ERR(___res) \ + bool ___ok = IS_ERR(___res); \ CHECK(!___ok, (name), "unexpected pointer: %p\n", ___res); \ ___ok; \ }) diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index 427ca00a3217..eefd445b96fc 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -732,7 +732,7 @@ static int sendmsg_test(struct sockmap_options *opt) * socket is not a valid test. So in this case lets not * enable kTLS but still run the test. */ - if (!txmsg_redir || (txmsg_redir && txmsg_ingress)) { + if (!txmsg_redir || txmsg_ingress) { err = sockmap_init_ktls(opt->verbose, rx_fd); if (err) return err; diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh index 7c76b841b17b..c9dde9b9d987 100755 --- a/tools/testing/selftests/bpf/test_tc_tunnel.sh +++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh @@ -44,8 +44,8 @@ setup() { # clamp route to reserve room for tunnel headers ip -netns "${ns1}" -4 route flush table main ip -netns "${ns1}" -6 route flush table main - ip -netns "${ns1}" -4 route add "${ns2_v4}" mtu 1458 dev veth1 - ip -netns "${ns1}" -6 route add "${ns2_v6}" mtu 1438 dev veth1 + ip -netns "${ns1}" -4 route add "${ns2_v4}" mtu 1450 dev veth1 + ip -netns "${ns1}" -6 route add "${ns2_v6}" mtu 1430 dev veth1 sleep 1 @@ -105,6 +105,12 @@ if [[ "$#" -eq "0" ]]; then echo "sit" $0 ipv6 sit none 100 + echo "ip4 vxlan" + $0 ipv4 vxlan eth 2000 + + echo "ip6 vxlan" + $0 ipv6 ip6vxlan eth 2000 + for mac in none mpls eth ; do echo "ip gre $mac" $0 ipv4 gre $mac 100 @@ -214,6 +220,9 @@ if [[ "$tuntype" =~ "udp" ]]; then targs="encap fou encap-sport auto encap-dport $dport" elif [[ "$tuntype" =~ "gre" && "$mac" == "eth" ]]; then ttype=$gretaptype +elif [[ "$tuntype" =~ "vxlan" && "$mac" == "eth" ]]; then + ttype="vxlan" + targs="id 1 dstport 8472 udp6zerocsumrx" else ttype=$tuntype targs="" @@ -242,7 +251,7 @@ if [[ "$tuntype" == "ip6udp" && "$mac" == "mpls" ]]; then elif [[ "$tuntype" =~ "udp" && "$mac" == "eth" ]]; then # No support for TEB fou tunnel; expect failure. expect_tun_fail=1 -elif [[ "$tuntype" =~ "gre" && "$mac" == "eth" ]]; then +elif [[ "$tuntype" =~ (gre|vxlan) && "$mac" == "eth" ]]; then # Share ethernet address between tunnel/veth2 so L2 decap works. ethaddr=$(ip netns exec "${ns2}" ip link show veth2 | \ awk '/ether/ { print $2 }') diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 58b5a349d3ba..1512092e1e68 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -105,7 +105,7 @@ struct bpf_test { enum bpf_prog_type prog_type; uint8_t flags; void (*fill_helper)(struct bpf_test *self); - uint8_t runs; + int runs; #define bpf_testdata_struct_t \ struct { \ uint32_t retval, retval_unpriv; \ @@ -1165,7 +1165,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, run_errs = 0; run_successes = 0; - if (!alignment_prevented_execution && fd_prog >= 0) { + if (!alignment_prevented_execution && fd_prog >= 0 && test->runs >= 0) { uint32_t expected_val; int i; diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh index 88a7483eaae4..46633a3bfb0b 100755 --- a/tools/testing/selftests/bpf/test_xsk.sh +++ b/tools/testing/selftests/bpf/test_xsk.sh @@ -71,13 +71,21 @@ # # Run (full output without color-coding): # sudo ./test_xsk.sh +# +# Run with verbose output: +# sudo ./test_xsk.sh -v +# +# Run and dump packet contents: +# sudo ./test_xsk.sh -D . xsk_prereqs.sh -while getopts c flag +while getopts "cvD" flag do case "${flag}" in c) colorconsole=1;; + v) verbose=1;; + D) dump_pkts=1;; esac done @@ -95,17 +103,22 @@ NS1=af_xdp${VETH1_POSTFIX} MTU=1500 setup_vethPairs() { - echo "setting up ${VETH0}: namespace: ${NS0}" + if [[ $verbose -eq 1 ]]; then + echo "setting up ${VETH0}: namespace: ${NS0}" + fi ip netns add ${NS1} - ip link add ${VETH0} type veth peer name ${VETH1} + ip link add ${VETH0} numtxqueues 4 numrxqueues 4 type veth peer name ${VETH1} numtxqueues 4 numrxqueues 4 if [ -f /proc/net/if_inet6 ]; then echo 1 > /proc/sys/net/ipv6/conf/${VETH0}/disable_ipv6 fi - echo "setting up ${VETH1}: namespace: ${NS1}" + if [[ $verbose -eq 1 ]]; then + echo "setting up ${VETH1}: namespace: ${NS1}" + fi ip link set ${VETH1} netns ${NS1} ip netns exec ${NS1} ip link set ${VETH1} mtu ${MTU} ip link set ${VETH0} mtu ${MTU} ip netns exec ${NS1} ip link set ${VETH1} up + ip netns exec ${NS1} ip link set dev lo up ip link set ${VETH0} up } @@ -125,121 +138,24 @@ echo "${VETH0}:${VETH1},${NS1}" > ${SPECFILE} validate_veth_spec_file -echo "Spec file created: ${SPECFILE}" - -test_status $retval "${TEST_NAME}" - -## START TESTS - -statusList=() - -### TEST 1 -TEST_NAME="XSK KSELFTEST FRAMEWORK" - -echo "Switching interfaces [${VETH0}, ${VETH1}] to XDP Generic mode" -vethXDPgeneric ${VETH0} ${VETH1} ${NS1} - -retval=$? -if [ $retval -eq 0 ]; then - echo "Switching interfaces [${VETH0}, ${VETH1}] to XDP Native mode" - vethXDPnative ${VETH0} ${VETH1} ${NS1} +if [[ $verbose -eq 1 ]]; then + echo "Spec file created: ${SPECFILE}" + VERBOSE_ARG="-v" fi -retval=$? -test_status $retval "${TEST_NAME}" -statusList+=($retval) - -### TEST 2 -TEST_NAME="SKB NOPOLL" - -vethXDPgeneric ${VETH0} ${VETH1} ${NS1} - -params=("-S") -execxdpxceiver params - -retval=$? -test_status $retval "${TEST_NAME}" -statusList+=($retval) - -### TEST 3 -TEST_NAME="SKB POLL" - -vethXDPgeneric ${VETH0} ${VETH1} ${NS1} - -params=("-S" "-p") -execxdpxceiver params - -retval=$? -test_status $retval "${TEST_NAME}" -statusList+=($retval) - -### TEST 4 -TEST_NAME="DRV NOPOLL" - -vethXDPnative ${VETH0} ${VETH1} ${NS1} - -params=("-N") -execxdpxceiver params - -retval=$? -test_status $retval "${TEST_NAME}" -statusList+=($retval) - -### TEST 5 -TEST_NAME="DRV POLL" - -vethXDPnative ${VETH0} ${VETH1} ${NS1} - -params=("-N" "-p") -execxdpxceiver params - -retval=$? -test_status $retval "${TEST_NAME}" -statusList+=($retval) - -### TEST 6 -TEST_NAME="SKB SOCKET TEARDOWN" - -vethXDPgeneric ${VETH0} ${VETH1} ${NS1} - -params=("-S" "-T") -execxdpxceiver params - -retval=$? -test_status $retval "${TEST_NAME}" -statusList+=($retval) - -### TEST 7 -TEST_NAME="DRV SOCKET TEARDOWN" - -vethXDPnative ${VETH0} ${VETH1} ${NS1} - -params=("-N" "-T") -execxdpxceiver params +if [[ $dump_pkts -eq 1 ]]; then + DUMP_PKTS_ARG="-D" +fi -retval=$? test_status $retval "${TEST_NAME}" -statusList+=($retval) -### TEST 8 -TEST_NAME="SKB BIDIRECTIONAL SOCKETS" - -vethXDPgeneric ${VETH0} ${VETH1} ${NS1} - -params=("-S" "-B") -execxdpxceiver params - -retval=$? -test_status $retval "${TEST_NAME}" -statusList+=($retval) +## START TESTS -### TEST 9 -TEST_NAME="DRV BIDIRECTIONAL SOCKETS" +statusList=() -vethXDPnative ${VETH0} ${VETH1} ${NS1} +TEST_NAME="XSK KSELFTESTS" -params=("-N" "-B") -execxdpxceiver params +execxdpxceiver retval=$? test_status $retval "${TEST_NAME}" diff --git a/tools/testing/selftests/bpf/verifier/array_access.c b/tools/testing/selftests/bpf/verifier/array_access.c index 1b138cd2b187..1b1c798e9248 100644 --- a/tools/testing/selftests/bpf/verifier/array_access.c +++ b/tools/testing/selftests/bpf/verifier/array_access.c @@ -186,7 +186,7 @@ }, .fixup_map_hash_48b = { 3 }, .errstr_unpriv = "R0 leaks addr", - .errstr = "invalid access to map value, value_size=48 off=44 size=8", + .errstr = "R0 unbounded memory access", .result_unpriv = REJECT, .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c index 57ed67b86074..8a1caf46ffbc 100644 --- a/tools/testing/selftests/bpf/verifier/bounds.c +++ b/tools/testing/selftests/bpf/verifier/bounds.c @@ -261,8 +261,6 @@ }, .fixup_map_hash_8b = { 3 }, /* not actually fully unbounded, but the bound is very high */ - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root", - .result_unpriv = REJECT, .errstr = "value -4294967168 makes map_value pointer be out of bounds", .result = REJECT, }, @@ -298,9 +296,6 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_8b = { 3 }, - /* not actually fully unbounded, but the bound is very high */ - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root", - .result_unpriv = REJECT, .errstr = "value -4294967168 makes map_value pointer be out of bounds", .result = REJECT, }, diff --git a/tools/testing/selftests/bpf/verifier/bounds_deduction.c b/tools/testing/selftests/bpf/verifier/bounds_deduction.c index 1fd07a4f27ac..91869aea6d64 100644 --- a/tools/testing/selftests/bpf/verifier/bounds_deduction.c +++ b/tools/testing/selftests/bpf/verifier/bounds_deduction.c @@ -6,8 +6,9 @@ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), }, - .result = REJECT, + .errstr_unpriv = "R1 has pointer with unsupported alu operation", .errstr = "R0 tried to subtract pointer from scalar", + .result = REJECT, }, { "check deducing bounds from const, 2", @@ -20,6 +21,8 @@ BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R1 has pointer with unsupported alu operation", + .result_unpriv = REJECT, .result = ACCEPT, .retval = 1, }, @@ -31,20 +34,24 @@ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), }, - .result = REJECT, + .errstr_unpriv = "R1 has pointer with unsupported alu operation", .errstr = "R0 tried to subtract pointer from scalar", + .result = REJECT, }, { "check deducing bounds from const, 4", .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 1), BPF_EXIT_INSN(), BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1), BPF_EXIT_INSN(), - BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0), + BPF_ALU64_REG(BPF_SUB, BPF_REG_6, BPF_REG_0), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R6 has pointer with unsupported alu operation", + .result_unpriv = REJECT, .result = ACCEPT, }, { @@ -55,8 +62,9 @@ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), }, - .result = REJECT, + .errstr_unpriv = "R1 has pointer with unsupported alu operation", .errstr = "R0 tried to subtract pointer from scalar", + .result = REJECT, }, { "check deducing bounds from const, 6", @@ -67,8 +75,9 @@ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), }, - .result = REJECT, + .errstr_unpriv = "R1 has pointer with unsupported alu operation", .errstr = "R0 tried to subtract pointer from scalar", + .result = REJECT, }, { "check deducing bounds from const, 7", @@ -80,8 +89,9 @@ offsetof(struct __sk_buff, mark)), BPF_EXIT_INSN(), }, - .result = REJECT, + .errstr_unpriv = "R1 has pointer with unsupported alu operation", .errstr = "dereference of modified ctx ptr", + .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { @@ -94,8 +104,9 @@ offsetof(struct __sk_buff, mark)), BPF_EXIT_INSN(), }, - .result = REJECT, + .errstr_unpriv = "R1 has pointer with unsupported alu operation", .errstr = "dereference of modified ctx ptr", + .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { @@ -106,8 +117,9 @@ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), }, - .result = REJECT, + .errstr_unpriv = "R1 has pointer with unsupported alu operation", .errstr = "R0 tried to subtract pointer from scalar", + .result = REJECT, }, { "check deducing bounds from const, 10", @@ -119,6 +131,6 @@ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), }, - .result = REJECT, .errstr = "math between ctx pointer and register with unbounded min value is not allowed", + .result = REJECT, }, diff --git a/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c b/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c index 9baca7a75c42..c2aa6f26738b 100644 --- a/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c +++ b/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c @@ -19,7 +19,6 @@ }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -43,7 +42,6 @@ }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -69,7 +67,6 @@ }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", - .errstr_unpriv = "R8 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -94,7 +91,6 @@ }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", - .errstr_unpriv = "R8 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -141,7 +137,6 @@ }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -210,7 +205,6 @@ }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -260,7 +254,6 @@ }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -287,7 +280,6 @@ }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -313,7 +305,6 @@ }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -342,7 +333,6 @@ }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", - .errstr_unpriv = "R7 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -372,7 +362,6 @@ }, .fixup_map_hash_8b = { 4 }, .errstr = "unbounded min value", - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -400,7 +389,5 @@ }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, - .result_unpriv = REJECT, }, diff --git a/tools/testing/selftests/bpf/verifier/bpf_get_stack.c b/tools/testing/selftests/bpf/verifier/bpf_get_stack.c index 69b048cf46d9..3e024c891178 100644 --- a/tools/testing/selftests/bpf/verifier/bpf_get_stack.c +++ b/tools/testing/selftests/bpf/verifier/bpf_get_stack.c @@ -42,3 +42,46 @@ .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, +{ + "bpf_get_task_stack return R0 range is refined", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_6, 0), // ctx->meta->seq + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_1, 8), // ctx->task + BPF_LD_MAP_FD(BPF_REG_1, 0), // fixup_map_array_48b + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_9, BPF_REG_0), // keep buf for seq_write + BPF_MOV64_IMM(BPF_REG_3, 48), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_EMIT_CALL(BPF_FUNC_get_task_stack), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_9), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_seq_write), + + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACING, + .expected_attach_type = BPF_TRACE_ITER, + .kfunc = "task", + .runs = -1, // Don't run, just load + .fixup_map_array_48b = { 3 }, +}, diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index eb888c8479c3..336a749673d1 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -19,7 +19,7 @@ BPF_MOV64_IMM(BPF_REG_0, 2), BPF_EXIT_INSN(), }, - .errstr_unpriv = "function calls to other bpf functions are allowed for", + .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for", .result_unpriv = REJECT, .result = ACCEPT, .retval = 1, @@ -136,7 +136,7 @@ { "calls: wrong src reg", .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 2, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 3, 0, 0), BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, @@ -397,7 +397,7 @@ BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - .errstr_unpriv = "function calls to other bpf functions are allowed for", + .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for", .fixup_map_hash_48b = { 3 }, .result_unpriv = REJECT, .result = ACCEPT, @@ -1977,7 +1977,7 @@ BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, - .errstr_unpriv = "function calls to other bpf functions are allowed for", + .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for", .result_unpriv = REJECT, .result = ACCEPT, }, @@ -2003,7 +2003,7 @@ BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, - .errstr_unpriv = "function calls to other bpf functions are allowed for", + .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for", .errstr = "!read_ok", .result = REJECT, }, @@ -2028,7 +2028,7 @@ BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, - .errstr_unpriv = "function calls to other bpf functions are allowed for", + .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for", .errstr = "!read_ok", .result = REJECT, }, diff --git a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c index fb13ca2d5606..d78627be060f 100644 --- a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c +++ b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c @@ -239,6 +239,7 @@ .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SK_LOOKUP, .expected_attach_type = BPF_SK_LOOKUP, + .runs = -1, }, /* invalid 8-byte reads from a 4-byte fields in bpf_sk_lookup */ { diff --git a/tools/testing/selftests/bpf/verifier/dead_code.c b/tools/testing/selftests/bpf/verifier/dead_code.c index 5cf361d8eb1c..17fe33a75034 100644 --- a/tools/testing/selftests/bpf/verifier/dead_code.c +++ b/tools/testing/selftests/bpf/verifier/dead_code.c @@ -85,7 +85,7 @@ BPF_MOV64_IMM(BPF_REG_0, 12), BPF_EXIT_INSN(), }, - .errstr_unpriv = "function calls to other bpf functions are allowed for", + .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for", .result_unpriv = REJECT, .result = ACCEPT, .retval = 7, @@ -103,7 +103,7 @@ BPF_MOV64_IMM(BPF_REG_0, 12), BPF_EXIT_INSN(), }, - .errstr_unpriv = "function calls to other bpf functions are allowed for", + .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for", .result_unpriv = REJECT, .result = ACCEPT, .retval = 7, @@ -121,7 +121,7 @@ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -5), BPF_EXIT_INSN(), }, - .errstr_unpriv = "function calls to other bpf functions are allowed for", + .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for", .result_unpriv = REJECT, .result = ACCEPT, .retval = 7, @@ -137,7 +137,7 @@ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), }, - .errstr_unpriv = "function calls to other bpf functions are allowed for", + .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for", .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, @@ -152,7 +152,7 @@ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), }, - .errstr_unpriv = "function calls to other bpf functions are allowed for", + .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for", .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, diff --git a/tools/testing/selftests/bpf/verifier/map_ptr.c b/tools/testing/selftests/bpf/verifier/map_ptr.c index b117bdd3806d..1f82021429bf 100644 --- a/tools/testing/selftests/bpf/verifier/map_ptr.c +++ b/tools/testing/selftests/bpf/verifier/map_ptr.c @@ -75,6 +75,8 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_16b = { 4 }, + .result_unpriv = REJECT, + .errstr_unpriv = "R1 has pointer with unsupported alu operation", .result = ACCEPT, }, { @@ -91,5 +93,7 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_16b = { 4 }, + .result_unpriv = REJECT, + .errstr_unpriv = "R0 has pointer with unsupported alu operation", .result = ACCEPT, }, diff --git a/tools/testing/selftests/bpf/verifier/unpriv.c b/tools/testing/selftests/bpf/verifier/unpriv.c index b018ad71e0a8..bd436df5cc32 100644 --- a/tools/testing/selftests/bpf/verifier/unpriv.c +++ b/tools/testing/selftests/bpf/verifier/unpriv.c @@ -497,7 +497,7 @@ .result = ACCEPT, }, { - "unpriv: adding of fp", + "unpriv: adding of fp, reg", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_MOV64_IMM(BPF_REG_1, 0), @@ -510,6 +510,19 @@ .result = ACCEPT, }, { + "unpriv: adding of fp, imm", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", + .result_unpriv = REJECT, + .result = ACCEPT, +}, +{ "unpriv: cmp of stack pointer", .insns = { BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c index ed4e76b24649..e5913fd3b903 100644 --- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c +++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c @@ -21,8 +21,6 @@ .fixup_map_hash_16b = { 5 }, .fixup_map_array_48b = { 8 }, .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "R1 tried to add from different maps", .retval = 1, }, { @@ -122,7 +120,7 @@ .fixup_map_array_48b = { 1 }, .result = ACCEPT, .result_unpriv = REJECT, - .errstr_unpriv = "R2 tried to add from different pointers or scalars", + .errstr_unpriv = "R2 tried to add from different maps, paths or scalars", .retval = 0, }, { @@ -169,7 +167,7 @@ .fixup_map_array_48b = { 1 }, .result = ACCEPT, .result_unpriv = REJECT, - .errstr_unpriv = "R2 tried to add from different maps or paths", + .errstr_unpriv = "R2 tried to add from different maps, paths or scalars", .retval = 0, }, { @@ -517,6 +515,27 @@ .retval = 0xabcdef12, }, { + "map access: value_ptr += N, value_ptr -= N known scalar", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV32_IMM(BPF_REG_1, 0x12345678), + BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .retval = 0x12345678, +}, +{ "map access: unknown scalar += value_ptr, 1", .insns = { BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh index 26ae8d0b6ce3..8889b3f55236 100755 --- a/tools/testing/selftests/bpf/vmtest.sh +++ b/tools/testing/selftests/bpf/vmtest.sh @@ -17,19 +17,22 @@ KCONFIG_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vm KCONFIG_API_URL="https://api.github.com/repos/libbpf/libbpf/contents/travis-ci/vmtest/configs/latest.config" INDEX_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/INDEX" NUM_COMPILE_JOBS="$(nproc)" +LOG_FILE_BASE="$(date +"bpf_selftests.%Y-%m-%d_%H-%M-%S")" +LOG_FILE="${LOG_FILE_BASE}.log" +EXIT_STATUS_FILE="${LOG_FILE_BASE}.exit_status" usage() { cat <<EOF -Usage: $0 [-i] [-d <output_dir>] -- [<command>] +Usage: $0 [-i] [-s] [-d <output_dir>] -- [<command>] <command> is the command you would normally run when you are in tools/testing/selftests/bpf. e.g: $0 -- ./test_progs -t test_lsm -If no command is specified, "${DEFAULT_COMMAND}" will be run by -default. +If no command is specified and a debug shell (-s) is not requested, +"${DEFAULT_COMMAND}" will be run by default. If you build your kernel using KBUILD_OUTPUT= or O= options, these can be passed as environment variables to the script: @@ -46,6 +49,9 @@ Options: -d) Update the output directory (default: ${OUTPUT_DIR}) -j) Number of jobs for compilation, similar to -j in make (default: ${NUM_COMPILE_JOBS}) + -s) Instead of powering off the VM, start an interactive + shell. If <command> is specified, the shell runs after + the command finishes executing EOF } @@ -146,7 +152,7 @@ update_init_script() local init_script_dir="${OUTPUT_DIR}/${MOUNT_DIR}/etc/rcS.d" local init_script="${init_script_dir}/S50-startup" local command="$1" - local log_file="$2" + local exit_command="$2" mount_image @@ -160,17 +166,26 @@ EOF fi - sudo bash -c "cat >${init_script}" <<EOF -#!/bin/bash + sudo bash -c "echo '#!/bin/bash' > ${init_script}" + + if [[ "${command}" != "" ]]; then + sudo bash -c "cat >>${init_script}" <<EOF +# Have a default value in the exit status file +# incase the VM is forcefully stopped. +echo "130" > "/root/${EXIT_STATUS_FILE}" { cd /root/bpf echo ${command} stdbuf -oL -eL ${command} -} 2>&1 | tee /root/${log_file} -poweroff -f + echo "\$?" > "/root/${EXIT_STATUS_FILE}" +} 2>&1 | tee "/root/${LOG_FILE}" +# Ensure that the logs are written to disk +sync EOF + fi + sudo bash -c "echo ${exit_command} >> ${init_script}" sudo chmod a+x "${init_script}" unmount_image } @@ -221,10 +236,12 @@ EOF copy_logs() { local mount_dir="${OUTPUT_DIR}/${MOUNT_DIR}" - local log_file="${mount_dir}/root/$1" + local log_file="${mount_dir}/root/${LOG_FILE}" + local exit_status_file="${mount_dir}/root/${EXIT_STATUS_FILE}" mount_image sudo cp ${log_file} "${OUTPUT_DIR}" + sudo cp ${exit_status_file} "${OUTPUT_DIR}" sudo rm -f ${log_file} unmount_image } @@ -263,14 +280,15 @@ main() { local script_dir="$(cd -P -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)" local kernel_checkout=$(realpath "${script_dir}"/../../../../) - local log_file="$(date +"bpf_selftests.%Y-%m-%d_%H-%M-%S.log")" # By default the script searches for the kernel in the checkout directory but # it also obeys environment variables O= and KBUILD_OUTPUT= local kernel_bzimage="${kernel_checkout}/${X86_BZIMAGE}" local command="${DEFAULT_COMMAND}" local update_image="no" + local exit_command="poweroff -f" + local debug_shell="no" - while getopts 'hkid:j:' opt; do + while getopts 'hskid:j:' opt; do case ${opt} in i) update_image="yes" @@ -281,6 +299,11 @@ main() j) NUM_COMPILE_JOBS="$OPTARG" ;; + s) + command="" + debug_shell="yes" + exit_command="bash" + ;; h) usage exit 0 @@ -299,7 +322,7 @@ main() done shift $((OPTIND -1)) - if [[ $# -eq 0 ]]; then + if [[ $# -eq 0 && "${debug_shell}" == "no" ]]; then echo "No command specified, will run ${DEFAULT_COMMAND} in the vm" else command="$@" @@ -347,19 +370,25 @@ main() fi update_selftests "${kernel_checkout}" "${make_command}" - update_init_script "${command}" "${log_file}" + update_init_script "${command}" "${exit_command}" run_vm "${kernel_bzimage}" - copy_logs "${log_file}" - echo "Logs saved in ${OUTPUT_DIR}/${log_file}" + if [[ "${command}" != "" ]]; then + copy_logs + echo "Logs saved in ${OUTPUT_DIR}/${LOG_FILE}" + fi } catch() { local exit_code=$1 + local exit_status_file="${OUTPUT_DIR}/${EXIT_STATUS_FILE}" # This is just a cleanup and the directory may # have already been unmounted. So, don't let this # clobber the error code we intend to return. unmount_image || true + if [[ -f "${exit_status_file}" ]]; then + exit_code="$(cat ${exit_status_file})" + fi exit ${exit_code} } diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c index f4a96d5ff524..1135fb980814 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.c +++ b/tools/testing/selftests/bpf/xdpxceiver.c @@ -18,12 +18,7 @@ * These selftests test AF_XDP SKB and Native/DRV modes using veth * Virtual Ethernet interfaces. * - * The following tests are run: - * - * 1. AF_XDP SKB mode - * Generic mode XDP is driver independent, used when the driver does - * not have support for XDP. Works on any netdevice using sockets and - * generic XDP path. XDP hook from netif_receive_skb(). + * For each mode, the following tests are run: * a. nopoll - soft-irq processing * b. poll - using poll() syscall * c. Socket Teardown @@ -33,19 +28,25 @@ * Configure sockets as bi-directional tx/rx sockets, sets up fill and * completion rings on each socket, tx/rx in both directions. Only nopoll * mode is used + * e. Statistics + * Trigger some error conditions and ensure that the appropriate statistics + * are incremented. Within this test, the following statistics are tested: + * i. rx dropped + * Increase the UMEM frame headroom to a value which results in + * insufficient space in the rx buffer for both the packet and the headroom. + * ii. tx invalid + * Set the 'len' field of tx descriptors to an invalid value (umem frame + * size + 1). + * iii. rx ring full + * Reduce the size of the RX ring to a fraction of the fill ring size. + * iv. fill queue empty + * Do not populate the fill queue and then try to receive pkts. + * f. bpf_link resource persistence + * Configure sockets at indexes 0 and 1, run a traffic on queue ids 0, + * then remove xsk sockets from queue 0 on both veth interfaces and + * finally run a traffic on queues ids 1 * - * 2. AF_XDP DRV/Native mode - * Works on any netdevice with XDP_REDIRECT support, driver dependent. Processes - * packets before SKB allocation. Provides better performance than SKB. Driver - * hook available just after DMA of buffer descriptor. - * a. nopoll - * b. poll - * c. Socket Teardown - * d. Bi-directional sockets - * - Only copy mode is supported because veth does not currently support - * zero-copy mode - * - * Total tests: 8 + * Total tests: 12 * * Flow: * ----- @@ -58,7 +59,7 @@ * - Rx thread verifies if all 10k packets were received and delivered in-order, * and have the right content * - * Enable/disable debug mode: + * Enable/disable packet dump mode: * -------------------------- * To enable L2 - L4 headers and payload dump of each packet on STDOUT, add * parameter -D to params array in test_xsk.sh, i.e. params=("-S" "-D") @@ -96,35 +97,34 @@ typedef __u16 __sum16; #include "xdpxceiver.h" #include "../kselftest.h" +static const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62"; +static const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61"; +static const char *IP1 = "192.168.100.162"; +static const char *IP2 = "192.168.100.161"; +static const u16 UDP_PORT1 = 2020; +static const u16 UDP_PORT2 = 2121; + static void __exit_with_error(int error, const char *file, const char *func, int line) { - ksft_test_result_fail - ("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error, strerror(error)); - ksft_exit_xfail(); + if (configured_mode == TEST_MODE_UNCONFIGURED) { + ksft_exit_fail_msg + ("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error, strerror(error)); + } else { + ksft_test_result_fail + ("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error, strerror(error)); + ksft_exit_xfail(); + } } #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__) #define print_ksft_result(void)\ - (ksft_test_result_pass("PASS: %s %s %s%s\n", uut ? "DRV" : "SKB", opt_poll ? "POLL" :\ - "NOPOLL", opt_teardown ? "Socket Teardown" : "",\ - opt_bidi ? "Bi-directional Sockets" : "")) - -static void pthread_init_mutex(void) -{ - pthread_mutex_init(&sync_mutex, NULL); - pthread_mutex_init(&sync_mutex_tx, NULL); - pthread_cond_init(&signal_rx_condition, NULL); - pthread_cond_init(&signal_tx_condition, NULL); -} - -static void pthread_destroy_mutex(void) -{ - pthread_mutex_destroy(&sync_mutex); - pthread_mutex_destroy(&sync_mutex_tx); - pthread_cond_destroy(&signal_rx_condition); - pthread_cond_destroy(&signal_tx_condition); -} + (ksft_test_result_pass("PASS: %s %s %s%s%s%s\n", configured_mode ? "DRV" : "SKB",\ + test_type == TEST_TYPE_POLL ? "POLL" : "NOPOLL",\ + test_type == TEST_TYPE_TEARDOWN ? "Socket Teardown" : "",\ + test_type == TEST_TYPE_BIDI ? "Bi-directional Sockets" : "",\ + test_type == TEST_TYPE_STATS ? "Stats" : "",\ + test_type == TEST_TYPE_BPF_RES ? "BPF RES" : "")) static void *memset32_htonl(void *dest, u32 val, u32 size) { @@ -143,24 +143,11 @@ static void *memset32_htonl(void *dest, u32 val, u32 size) } /* - * This function code has been taken from - * Linux kernel lib/checksum.c - */ -static inline unsigned short from32to16(unsigned int x) -{ - /* add up 16-bit and 16-bit for 16+c bit */ - x = (x & 0xffff) + (x >> 16); - /* add up carry.. */ - x = (x & 0xffff) + (x >> 16); - return x; -} - -/* * Fold a partial checksum * This function code has been taken from * Linux kernel include/asm-generic/checksum.h */ -static inline __u16 csum_fold(__u32 csum) +static __u16 csum_fold(__u32 csum) { u32 sum = (__force u32)csum; @@ -173,7 +160,7 @@ static inline __u16 csum_fold(__u32 csum) * This function code has been taken from * Linux kernel lib/checksum.c */ -static inline u32 from64to32(u64 x) +static u32 from64to32(u64 x) { /* add up 32-bit and 32-bit for 32+c bit */ x = (x & 0xffffffff) + (x >> 32); @@ -182,13 +169,11 @@ static inline u32 from64to32(u64 x) return (u32)x; } -__u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum); - /* * This function code has been taken from * Linux kernel lib/checksum.c */ -__u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum) +static __u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum) { unsigned long long s = (__force u32)sum; @@ -206,13 +191,12 @@ __u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u3 * This function has been taken from * Linux kernel include/asm-generic/checksum.h */ -static inline __u16 -csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum) +static __u16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum) { return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); } -static inline u16 udp_csum(u32 saddr, u32 daddr, u32 len, u8 proto, u16 *udp_pkt) +static u16 udp_csum(u32 saddr, u32 daddr, u32 len, u8 proto, u16 *udp_pkt) { u32 csum = 0; u32 cnt = 0; @@ -267,26 +251,37 @@ static void gen_eth_frame(struct xsk_umem_info *umem, u64 addr) memcpy(xsk_umem__get_data(umem->buffer, addr), pkt_data, PKT_SIZE); } -static void xsk_configure_umem(struct ifobject *data, void *buffer, u64 size) +static void xsk_configure_umem(struct ifobject *data, void *buffer, int idx) { + struct xsk_umem_config cfg = { + .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, + .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, + .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE, + .frame_headroom = frame_headroom, + .flags = XSK_UMEM__DEFAULT_FLAGS + }; + int size = num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE; + struct xsk_umem_info *umem; int ret; - data->umem = calloc(1, sizeof(struct xsk_umem_info)); - if (!data->umem) + umem = calloc(1, sizeof(struct xsk_umem_info)); + if (!umem) exit_with_error(errno); - ret = xsk_umem__create(&data->umem->umem, buffer, size, - &data->umem->fq, &data->umem->cq, NULL); + ret = xsk_umem__create(&umem->umem, buffer, size, + &umem->fq, &umem->cq, &cfg); if (ret) exit_with_error(ret); - data->umem->buffer = buffer; + umem->buffer = buffer; + + data->umem_arr[idx] = umem; } static void xsk_populate_fill_ring(struct xsk_umem_info *umem) { int ret, i; - u32 idx; + u32 idx = 0; ret = xsk_ring_prod__reserve(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS, &idx); if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS) @@ -296,51 +291,48 @@ static void xsk_populate_fill_ring(struct xsk_umem_info *umem) xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS); } -static int xsk_configure_socket(struct ifobject *ifobject) +static int xsk_configure_socket(struct ifobject *ifobject, int idx) { struct xsk_socket_config cfg; + struct xsk_socket_info *xsk; struct xsk_ring_cons *rxr; struct xsk_ring_prod *txr; int ret; - ifobject->xsk = calloc(1, sizeof(struct xsk_socket_info)); - if (!ifobject->xsk) + xsk = calloc(1, sizeof(struct xsk_socket_info)); + if (!xsk) exit_with_error(errno); - ifobject->xsk->umem = ifobject->umem; - cfg.rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS; + xsk->umem = ifobject->umem; + cfg.rx_size = rxqsize; cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; cfg.libbpf_flags = 0; - cfg.xdp_flags = opt_xdp_flags; - cfg.bind_flags = opt_xdp_bind_flags; + cfg.xdp_flags = xdp_flags; + cfg.bind_flags = xdp_bind_flags; - if (!opt_bidi) { - rxr = (ifobject->fv.vector == rx) ? &ifobject->xsk->rx : NULL; - txr = (ifobject->fv.vector == tx) ? &ifobject->xsk->tx : NULL; + if (test_type != TEST_TYPE_BIDI) { + rxr = (ifobject->fv.vector == rx) ? &xsk->rx : NULL; + txr = (ifobject->fv.vector == tx) ? &xsk->tx : NULL; } else { - rxr = &ifobject->xsk->rx; - txr = &ifobject->xsk->tx; + rxr = &xsk->rx; + txr = &xsk->tx; } - ret = xsk_socket__create(&ifobject->xsk->xsk, ifobject->ifname, - opt_queue, ifobject->umem->umem, rxr, txr, &cfg); - + ret = xsk_socket__create(&xsk->xsk, ifobject->ifname, idx, + ifobject->umem->umem, rxr, txr, &cfg); if (ret) return 1; + ifobject->xsk_arr[idx] = xsk; + return 0; } static struct option long_options[] = { {"interface", required_argument, 0, 'i'}, {"queue", optional_argument, 0, 'q'}, - {"poll", no_argument, 0, 'p'}, - {"xdp-skb", no_argument, 0, 'S'}, - {"xdp-native", no_argument, 0, 'N'}, - {"copy", no_argument, 0, 'c'}, - {"tear-down", no_argument, 0, 'T'}, - {"bidi", optional_argument, 0, 'B'}, - {"debug", optional_argument, 0, 'D'}, + {"dump-pkts", optional_argument, 0, 'D'}, + {"verbose", no_argument, 0, 'v'}, {"tx-pkt-count", optional_argument, 0, 'C'}, {0, 0, 0, 0} }; @@ -352,23 +344,21 @@ static void usage(const char *prog) " Options:\n" " -i, --interface Use interface\n" " -q, --queue=n Use queue n (default 0)\n" - " -p, --poll Use poll syscall\n" - " -S, --xdp-skb=n Use XDP SKB mode\n" - " -N, --xdp-native=n Enforce XDP DRV (native) mode\n" - " -c, --copy Force copy mode\n" - " -T, --tear-down Tear down sockets by repeatedly recreating them\n" - " -B, --bidi Bi-directional sockets test\n" - " -D, --debug Debug mode - dump packets L2 - L5\n" + " -D, --dump-pkts Dump packets L2 - L5\n" + " -v, --verbose Verbose output\n" " -C, --tx-pkt-count=n Number of packets to send\n"; ksft_print_msg(str, prog); } -static bool switch_namespace(int idx) +static int switch_namespace(const char *nsname) { char fqns[26] = "/var/run/netns/"; int nsfd; - strncat(fqns, ifdict[idx]->nsname, sizeof(fqns) - strlen(fqns) - 1); + if (!nsname || strlen(nsname) == 0) + return -1; + + strncat(fqns, nsname, sizeof(fqns) - strlen(fqns) - 1); nsfd = open(fqns, O_RDONLY); if (nsfd == -1) @@ -377,26 +367,9 @@ static bool switch_namespace(int idx) if (setns(nsfd, 0) == -1) exit_with_error(errno); - return true; -} + print_verbose("NS switched: %s\n", nsname); -static void *nsswitchthread(void *args) -{ - struct targs *targs = args; - - targs->retptr = false; - - if (switch_namespace(targs->idx)) { - ifdict[targs->idx]->ifindex = if_nametoindex(ifdict[targs->idx]->ifname); - if (!ifdict[targs->idx]->ifindex) { - ksft_test_result_fail("ERROR: [%s] interface \"%s\" does not exist\n", - __func__, ifdict[targs->idx]->ifname); - } else { - ksft_print_msg("Interface found: %s\n", ifdict[targs->idx]->ifname); - targs->retptr = true; - } - } - pthread_exit(NULL); + return nsfd; } static int validate_interfaces(void) @@ -408,33 +381,6 @@ static int validate_interfaces(void) ret = false; ksft_test_result_fail("ERROR: interfaces: -i <int>,<ns> -i <int>,<ns>."); } - if (strcmp(ifdict[i]->nsname, "")) { - struct targs *targs; - - targs = malloc(sizeof(*targs)); - if (!targs) - exit_with_error(errno); - - targs->idx = i; - if (pthread_create(&ns_thread, NULL, nsswitchthread, targs)) - exit_with_error(errno); - - pthread_join(ns_thread, NULL); - - if (targs->retptr) - ksft_print_msg("NS switched: %s\n", ifdict[i]->nsname); - - free(targs); - } else { - ifdict[i]->ifindex = if_nametoindex(ifdict[i]->ifname); - if (!ifdict[i]->ifindex) { - ksft_test_result_fail - ("ERROR: interface \"%s\" does not exist\n", ifdict[i]->ifname); - ret = false; - } else { - ksft_print_msg("Interface found: %s\n", ifdict[i]->ifname); - } - } } return ret; } @@ -446,7 +392,7 @@ static void parse_command_line(int argc, char **argv) opterr = 0; for (;;) { - c = getopt_long(argc, argv, "i:q:pSNcTBDC:", long_options, &option_index); + c = getopt_long(argc, argv, "i:DC:v", long_options, &option_index); if (c == -1) break; @@ -466,43 +412,26 @@ static void parse_command_line(int argc, char **argv) MAX_INTERFACES_NAMESPACE_CHARS); interface_index++; break; - case 'q': - opt_queue = atoi(optarg); - break; - case 'p': - opt_poll = 1; - break; - case 'S': - opt_xdp_flags |= XDP_FLAGS_SKB_MODE; - opt_xdp_bind_flags |= XDP_COPY; - uut = ORDER_CONTENT_VALIDATE_XDP_SKB; - break; - case 'N': - opt_xdp_flags |= XDP_FLAGS_DRV_MODE; - opt_xdp_bind_flags |= XDP_COPY; - uut = ORDER_CONTENT_VALIDATE_XDP_DRV; - break; - case 'c': - opt_xdp_bind_flags |= XDP_COPY; - break; - case 'T': - opt_teardown = 1; - break; - case 'B': - opt_bidi = 1; - break; case 'D': debug_pkt_dump = 1; break; case 'C': opt_pkt_count = atoi(optarg); break; + case 'v': + opt_verbose = 1; + break; default: usage(basename(argv[0])); ksft_exit_xfail(); } } + if (!opt_pkt_count) { + print_verbose("No tx-pkt-count specified, using default %u\n", DEFAULT_PKT_CNT); + opt_pkt_count = DEFAULT_PKT_CNT; + } + if (!validate_interfaces()) { usage(basename(argv[0])); ksft_exit_xfail(); @@ -519,7 +448,7 @@ static void kick_tx(struct xsk_socket_info *xsk) exit_with_error(errno); } -static inline void complete_tx_only(struct xsk_socket_info *xsk, int batch_size) +static void complete_tx_only(struct xsk_socket_info *xsk, int batch_size) { unsigned int rcvd; u32 idx; @@ -527,7 +456,7 @@ static inline void complete_tx_only(struct xsk_socket_info *xsk, int batch_size) if (!xsk->outstanding_tx) return; - if (!NEED_WAKEUP || xsk_ring_prod__needs_wakeup(&xsk->tx)) + if (xsk_ring_prod__needs_wakeup(&xsk->tx)) kick_tx(xsk); rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx); @@ -597,8 +526,10 @@ static void rx_pkt(struct xsk_socket_info *xsk, struct pollfd *fds) static void tx_only(struct xsk_socket_info *xsk, u32 *frameptr, int batch_size) { - u32 idx; + u32 idx = 0; unsigned int i; + bool tx_invalid_test = stat_test_type == STAT_TEST_TX_INVALID; + u32 len = tx_invalid_test ? XSK_UMEM__DEFAULT_FRAME_SIZE + 1 : PKT_SIZE; while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) < batch_size) complete_tx_only(xsk, batch_size); @@ -607,17 +538,21 @@ static void tx_only(struct xsk_socket_info *xsk, u32 *frameptr, int batch_size) struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i); tx_desc->addr = (*frameptr + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT; - tx_desc->len = PKT_SIZE; + tx_desc->len = len; } xsk_ring_prod__submit(&xsk->tx, batch_size); - xsk->outstanding_tx += batch_size; + if (!tx_invalid_test) { + xsk->outstanding_tx += batch_size; + } else if (xsk_ring_prod__needs_wakeup(&xsk->tx)) { + kick_tx(xsk); + } *frameptr += batch_size; *frameptr %= num_frames; complete_tx_only(xsk, batch_size); } -static inline int get_batch_size(int pkt_cnt) +static int get_batch_size(int pkt_cnt) { if (!opt_pkt_count) return BATCH_SIZE; @@ -654,7 +589,7 @@ static void tx_only_all(struct ifobject *ifobject) while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) { int batch_size = get_batch_size(pkt_cnt); - if (opt_poll) { + if (test_type == TEST_TYPE_POLL) { ret = poll(fds, 1, POLL_TMOUT); if (ret <= 0) continue; @@ -673,48 +608,43 @@ static void tx_only_all(struct ifobject *ifobject) static void worker_pkt_dump(void) { - struct in_addr ipaddr; + struct ethhdr *ethhdr; + struct iphdr *iphdr; + struct udphdr *udphdr; + char s[128]; + int payload; + void *ptr; fprintf(stdout, "---------------------------------------\n"); for (int iter = 0; iter < num_frames - 1; iter++) { + ptr = pkt_buf[iter]->payload; + ethhdr = ptr; + iphdr = ptr + sizeof(*ethhdr); + udphdr = ptr + sizeof(*ethhdr) + sizeof(*iphdr); + /*extract L2 frame */ fprintf(stdout, "DEBUG>> L2: dst mac: "); for (int i = 0; i < ETH_ALEN; i++) - fprintf(stdout, "%02X", ((struct ethhdr *) - pkt_buf[iter]->payload)->h_dest[i]); + fprintf(stdout, "%02X", ethhdr->h_dest[i]); fprintf(stdout, "\nDEBUG>> L2: src mac: "); for (int i = 0; i < ETH_ALEN; i++) - fprintf(stdout, "%02X", ((struct ethhdr *) - pkt_buf[iter]->payload)->h_source[i]); + fprintf(stdout, "%02X", ethhdr->h_source[i]); /*extract L3 frame */ - fprintf(stdout, "\nDEBUG>> L3: ip_hdr->ihl: %02X\n", - ((struct iphdr *)(pkt_buf[iter]->payload + sizeof(struct ethhdr)))->ihl); - - ipaddr.s_addr = - ((struct iphdr *)(pkt_buf[iter]->payload + sizeof(struct ethhdr)))->saddr; - fprintf(stdout, "DEBUG>> L3: ip_hdr->saddr: %s\n", inet_ntoa(ipaddr)); - - ipaddr.s_addr = - ((struct iphdr *)(pkt_buf[iter]->payload + sizeof(struct ethhdr)))->daddr; - fprintf(stdout, "DEBUG>> L3: ip_hdr->daddr: %s\n", inet_ntoa(ipaddr)); - + fprintf(stdout, "\nDEBUG>> L3: ip_hdr->ihl: %02X\n", iphdr->ihl); + fprintf(stdout, "DEBUG>> L3: ip_hdr->saddr: %s\n", + inet_ntop(AF_INET, &iphdr->saddr, s, sizeof(s))); + fprintf(stdout, "DEBUG>> L3: ip_hdr->daddr: %s\n", + inet_ntop(AF_INET, &iphdr->daddr, s, sizeof(s))); /*extract L4 frame */ - fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n", - ntohs(((struct udphdr *)(pkt_buf[iter]->payload + - sizeof(struct ethhdr) + - sizeof(struct iphdr)))->source)); - - fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n", - ntohs(((struct udphdr *)(pkt_buf[iter]->payload + - sizeof(struct ethhdr) + - sizeof(struct iphdr)))->dest)); + fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n", ntohs(udphdr->source)); + fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n", ntohs(udphdr->dest)); /*extract L5 frame */ - int payload = *((uint32_t *)(pkt_buf[iter]->payload + PKT_HDR_SIZE)); + payload = *((uint32_t *)(ptr + PKT_HDR_SIZE)); if (payload == EOT) { - ksft_print_msg("End-of-transmission frame received\n"); + print_verbose("End-of-transmission frame received\n"); fprintf(stdout, "---------------------------------------\n"); break; } @@ -723,6 +653,48 @@ static void worker_pkt_dump(void) } } +static void worker_stats_validate(struct ifobject *ifobject) +{ + struct xdp_statistics stats; + socklen_t optlen; + int err; + struct xsk_socket *xsk = stat_test_type == STAT_TEST_TX_INVALID ? + ifdict[!ifobject->ifdict_index]->xsk->xsk : + ifobject->xsk->xsk; + int fd = xsk_socket__fd(xsk); + unsigned long xsk_stat = 0, expected_stat = opt_pkt_count; + + sigvar = 0; + + optlen = sizeof(stats); + err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen); + if (err) + return; + + if (optlen == sizeof(struct xdp_statistics)) { + switch (stat_test_type) { + case STAT_TEST_RX_DROPPED: + xsk_stat = stats.rx_dropped; + break; + case STAT_TEST_TX_INVALID: + xsk_stat = stats.tx_invalid_descs; + break; + case STAT_TEST_RX_FULL: + xsk_stat = stats.rx_ring_full; + expected_stat -= RX_FULL_RXQSIZE; + break; + case STAT_TEST_RX_FILL_EMPTY: + xsk_stat = stats.rx_fill_ring_empty_descs; + break; + default: + break; + } + + if (xsk_stat == expected_stat) + sigvar = 1; + } +} + static void worker_pkt_validate(void) { u32 payloadseqnum = -2; @@ -746,7 +718,7 @@ static void worker_pkt_validate(void) } if (payloadseqnum == EOT) { - ksft_print_msg("End-of-transmission frame received: PASS\n"); + print_verbose("End-of-transmission frame received: PASS\n"); sigvar = 1; break; } @@ -773,37 +745,69 @@ static void worker_pkt_validate(void) } } -static void thread_common_ops(struct ifobject *ifobject, void *bufs, pthread_mutex_t *mutexptr, - atomic_int *spinningptr) +static void thread_common_ops(struct ifobject *ifobject, void *bufs) { + int umem_sz = num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE; int ctr = 0; int ret; - xsk_configure_umem(ifobject, bufs, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE); - ret = xsk_configure_socket(ifobject); + ifobject->ns_fd = switch_namespace(ifobject->nsname); + + if (test_type == TEST_TYPE_BPF_RES) + umem_sz *= 2; + + bufs = mmap(NULL, umem_sz, + PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (bufs == MAP_FAILED) + exit_with_error(errno); + + xsk_configure_umem(ifobject, bufs, 0); + ifobject->umem = ifobject->umem_arr[0]; + ret = xsk_configure_socket(ifobject, 0); /* Retry Create Socket if it fails as xsk_socket__create() * is asynchronous - * - * Essential to lock Mutex here to prevent Tx thread from - * entering before Rx and causing a deadlock */ - pthread_mutex_lock(mutexptr); while (ret && ctr < SOCK_RECONF_CTR) { - atomic_store(spinningptr, 1); - xsk_configure_umem(ifobject, bufs, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE); - ret = xsk_configure_socket(ifobject); + xsk_configure_umem(ifobject, bufs, 0); + ifobject->umem = ifobject->umem_arr[0]; + ret = xsk_configure_socket(ifobject, 0); usleep(USLEEP_MAX); ctr++; } - atomic_store(spinningptr, 0); - pthread_mutex_unlock(mutexptr); if (ctr >= SOCK_RECONF_CTR) exit_with_error(ret); + + ifobject->umem = ifobject->umem_arr[0]; + ifobject->xsk = ifobject->xsk_arr[0]; + + if (test_type == TEST_TYPE_BPF_RES) { + xsk_configure_umem(ifobject, (u8 *)bufs + (umem_sz / 2), 1); + ifobject->umem = ifobject->umem_arr[1]; + ret = xsk_configure_socket(ifobject, 1); + } + + ifobject->umem = ifobject->umem_arr[0]; + ifobject->xsk = ifobject->xsk_arr[0]; + print_verbose("Interface [%s] vector [%s]\n", + ifobject->ifname, ifobject->fv.vector == tx ? "Tx" : "Rx"); +} + +static bool testapp_is_test_two_stepped(void) +{ + return (test_type != TEST_TYPE_BIDI && test_type != TEST_TYPE_BPF_RES) || second_step; +} + +static void testapp_cleanup_xsk_res(struct ifobject *ifobj) +{ + if (testapp_is_test_two_stepped()) { + xsk_socket__delete(ifobj->xsk->xsk); + (void)xsk_umem__delete(ifobj->umem->umem); + } } -static void *worker_testapp_validate(void *arg) +static void *worker_testapp_validate_tx(void *arg) { struct udphdr *udp_hdr = (struct udphdr *)(pkt_data + sizeof(struct ethhdr) + sizeof(struct iphdr)); @@ -813,149 +817,97 @@ static void *worker_testapp_validate(void *arg) struct generic_data data; void *bufs = NULL; - pthread_attr_setstacksize(&attr, THREAD_STACK); - - if (!bidi_pass) { - bufs = mmap(NULL, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE, - PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (bufs == MAP_FAILED) - exit_with_error(errno); - - if (strcmp(ifobject->nsname, "")) - switch_namespace(ifobject->ifdict_index); + if (!second_step) + thread_common_ops(ifobject, bufs); + + for (int i = 0; i < num_frames; i++) { + /*send EOT frame */ + if (i == (num_frames - 1)) + data.seqnum = -1; + else + data.seqnum = i; + gen_udp_hdr(&data, ifobject, udp_hdr); + gen_ip_hdr(ifobject, ip_hdr); + gen_udp_csum(udp_hdr, ip_hdr); + gen_eth_hdr(ifobject, eth_hdr); + gen_eth_frame(ifobject->umem, i * XSK_UMEM__DEFAULT_FRAME_SIZE); } - if (ifobject->fv.vector == tx) { - int spinningrxctr = 0; - - if (!bidi_pass) - thread_common_ops(ifobject, bufs, &sync_mutex_tx, &spinning_tx); - - while (atomic_load(&spinning_rx) && spinningrxctr < SOCK_RECONF_CTR) { - spinningrxctr++; - usleep(USLEEP_MAX); - } + print_verbose("Sending %d packets on interface %s\n", + (opt_pkt_count - 1), ifobject->ifname); + tx_only_all(ifobject); - ksft_print_msg("Interface [%s] vector [Tx]\n", ifobject->ifname); - for (int i = 0; i < num_frames; i++) { - /*send EOT frame */ - if (i == (num_frames - 1)) - data.seqnum = -1; - else - data.seqnum = i; - gen_udp_hdr(&data, ifobject, udp_hdr); - gen_ip_hdr(ifobject, ip_hdr); - gen_udp_csum(udp_hdr, ip_hdr); - gen_eth_hdr(ifobject, eth_hdr); - gen_eth_frame(ifobject->umem, i * XSK_UMEM__DEFAULT_FRAME_SIZE); - } + testapp_cleanup_xsk_res(ifobject); + pthread_exit(NULL); +} - ksft_print_msg("Sending %d packets on interface %s\n", - (opt_pkt_count - 1), ifobject->ifname); - tx_only_all(ifobject); - } else if (ifobject->fv.vector == rx) { - struct pollfd fds[MAX_SOCKS] = { }; - int ret; +static void *worker_testapp_validate_rx(void *arg) +{ + struct ifobject *ifobject = (struct ifobject *)arg; + struct pollfd fds[MAX_SOCKS] = { }; + void *bufs = NULL; - if (!bidi_pass) - thread_common_ops(ifobject, bufs, &sync_mutex_tx, &spinning_rx); + if (!second_step) + thread_common_ops(ifobject, bufs); - ksft_print_msg("Interface [%s] vector [Rx]\n", ifobject->ifname); + if (stat_test_type != STAT_TEST_RX_FILL_EMPTY) xsk_populate_fill_ring(ifobject->umem); - TAILQ_INIT(&head); - if (debug_pkt_dump) { - pkt_buf = calloc(num_frames, sizeof(*pkt_buf)); - if (!pkt_buf) - exit_with_error(errno); - } + TAILQ_INIT(&head); + if (debug_pkt_dump) { + pkt_buf = calloc(num_frames, sizeof(*pkt_buf)); + if (!pkt_buf) + exit_with_error(errno); + } - fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk); - fds[0].events = POLLIN; + fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk); + fds[0].events = POLLIN; - pthread_mutex_lock(&sync_mutex); - pthread_cond_signal(&signal_rx_condition); - pthread_mutex_unlock(&sync_mutex); + pthread_barrier_wait(&barr); - while (1) { - if (opt_poll) { - ret = poll(fds, 1, POLL_TMOUT); - if (ret <= 0) - continue; - } + while (1) { + if (test_type != TEST_TYPE_STATS) { rx_pkt(ifobject->xsk, fds); worker_pkt_validate(); - - if (sigvar) - break; + } else { + worker_stats_validate(ifobject); } + if (sigvar) + break; + } - ksft_print_msg("Received %d packets on interface %s\n", - pkt_counter, ifobject->ifname); + print_verbose("Received %d packets on interface %s\n", + pkt_counter, ifobject->ifname); - if (opt_teardown) - ksft_print_msg("Destroying socket\n"); - } + if (test_type == TEST_TYPE_TEARDOWN) + print_verbose("Destroying socket\n"); - if (!opt_bidi || bidi_pass) { - xsk_socket__delete(ifobject->xsk->xsk); - (void)xsk_umem__delete(ifobject->umem->umem); - } + testapp_cleanup_xsk_res(ifobject); pthread_exit(NULL); } static void testapp_validate(void) { - struct timespec max_wait = { 0, 0 }; + bool bidi = test_type == TEST_TYPE_BIDI; + bool bpf = test_type == TEST_TYPE_BPF_RES; - pthread_attr_init(&attr); - pthread_attr_setstacksize(&attr, THREAD_STACK); - - if (opt_bidi && bidi_pass) { - pthread_init_mutex(); - if (!switching_notify) { - ksft_print_msg("Switching Tx/Rx vectors\n"); - switching_notify++; - } - } - - pthread_mutex_lock(&sync_mutex); + if (pthread_barrier_init(&barr, NULL, 2)) + exit_with_error(errno); /*Spawn RX thread */ - if (!opt_bidi || !bidi_pass) { - if (pthread_create(&t0, &attr, worker_testapp_validate, ifdict[1])) - exit_with_error(errno); - } else if (opt_bidi && bidi_pass) { - /*switch Tx/Rx vectors */ - ifdict[0]->fv.vector = rx; - if (pthread_create(&t0, &attr, worker_testapp_validate, ifdict[0])) - exit_with_error(errno); - } + pthread_create(&t0, NULL, ifdict_rx->func_ptr, ifdict_rx); - if (clock_gettime(CLOCK_REALTIME, &max_wait)) + pthread_barrier_wait(&barr); + if (pthread_barrier_destroy(&barr)) exit_with_error(errno); - max_wait.tv_sec += TMOUT_SEC; - - if (pthread_cond_timedwait(&signal_rx_condition, &sync_mutex, &max_wait) == ETIMEDOUT) - exit_with_error(errno); - - pthread_mutex_unlock(&sync_mutex); /*Spawn TX thread */ - if (!opt_bidi || !bidi_pass) { - if (pthread_create(&t1, &attr, worker_testapp_validate, ifdict[0])) - exit_with_error(errno); - } else if (opt_bidi && bidi_pass) { - /*switch Tx/Rx vectors */ - ifdict[1]->fv.vector = tx; - if (pthread_create(&t1, &attr, worker_testapp_validate, ifdict[1])) - exit_with_error(errno); - } + pthread_create(&t1, NULL, ifdict_tx->func_ptr, ifdict_tx); pthread_join(t1, NULL); pthread_join(t0, NULL); - if (debug_pkt_dump) { + if (debug_pkt_dump && test_type != TEST_TYPE_STATS) { worker_pkt_dump(); for (int iter = 0; iter < num_frames - 1; iter++) { free(pkt_buf[iter]->payload); @@ -964,73 +916,217 @@ static void testapp_validate(void) free(pkt_buf); } - if (!opt_teardown && !opt_bidi) + if (!(test_type == TEST_TYPE_TEARDOWN) && !bidi && !bpf && !(test_type == TEST_TYPE_STATS)) print_ksft_result(); } -static void testapp_sockets(void) +static void testapp_teardown(void) +{ + int i; + + for (i = 0; i < MAX_TEARDOWN_ITER; i++) { + pkt_counter = 0; + prev_pkt = -1; + sigvar = 0; + print_verbose("Creating socket\n"); + testapp_validate(); + } + + print_ksft_result(); +} + +static void swap_vectors(struct ifobject *ifobj1, struct ifobject *ifobj2) { - for (int i = 0; i < (opt_teardown ? MAX_TEARDOWN_ITER : MAX_BIDI_ITER); i++) { + void *(*tmp_func_ptr)(void *) = ifobj1->func_ptr; + enum fvector tmp_vector = ifobj1->fv.vector; + + ifobj1->func_ptr = ifobj2->func_ptr; + ifobj1->fv.vector = ifobj2->fv.vector; + + ifobj2->func_ptr = tmp_func_ptr; + ifobj2->fv.vector = tmp_vector; + + ifdict_tx = ifobj1; + ifdict_rx = ifobj2; +} + +static void testapp_bidi(void) +{ + for (int i = 0; i < MAX_BIDI_ITER; i++) { pkt_counter = 0; prev_pkt = -1; sigvar = 0; - ksft_print_msg("Creating socket\n"); + print_verbose("Creating socket\n"); testapp_validate(); - opt_bidi ? bidi_pass++ : bidi_pass; + if (!second_step) { + print_verbose("Switching Tx/Rx vectors\n"); + swap_vectors(ifdict[1], ifdict[0]); + } + second_step = true; } + swap_vectors(ifdict[0], ifdict[1]); + print_ksft_result(); } -static void init_iface_config(struct ifaceconfigobj *ifaceconfig) +static void swap_xsk_res(void) { - /*Init interface0 */ - ifdict[0]->fv.vector = tx; - memcpy(ifdict[0]->dst_mac, ifaceconfig->dst_mac, ETH_ALEN); - memcpy(ifdict[0]->src_mac, ifaceconfig->src_mac, ETH_ALEN); - ifdict[0]->dst_ip = ifaceconfig->dst_ip.s_addr; - ifdict[0]->src_ip = ifaceconfig->src_ip.s_addr; - ifdict[0]->dst_port = ifaceconfig->dst_port; - ifdict[0]->src_port = ifaceconfig->src_port; - - /*Init interface1 */ - ifdict[1]->fv.vector = rx; - memcpy(ifdict[1]->dst_mac, ifaceconfig->src_mac, ETH_ALEN); - memcpy(ifdict[1]->src_mac, ifaceconfig->dst_mac, ETH_ALEN); - ifdict[1]->dst_ip = ifaceconfig->src_ip.s_addr; - ifdict[1]->src_ip = ifaceconfig->dst_ip.s_addr; - ifdict[1]->dst_port = ifaceconfig->src_port; - ifdict[1]->src_port = ifaceconfig->dst_port; + xsk_socket__delete(ifdict_tx->xsk->xsk); + xsk_umem__delete(ifdict_tx->umem->umem); + xsk_socket__delete(ifdict_rx->xsk->xsk); + xsk_umem__delete(ifdict_rx->umem->umem); + ifdict_tx->umem = ifdict_tx->umem_arr[1]; + ifdict_tx->xsk = ifdict_tx->xsk_arr[1]; + ifdict_rx->umem = ifdict_rx->umem_arr[1]; + ifdict_rx->xsk = ifdict_rx->xsk_arr[1]; +} + +static void testapp_bpf_res(void) +{ + int i; + + for (i = 0; i < MAX_BPF_ITER; i++) { + pkt_counter = 0; + prev_pkt = -1; + sigvar = 0; + print_verbose("Creating socket\n"); + testapp_validate(); + if (!second_step) + swap_xsk_res(); + second_step = true; + } + + print_ksft_result(); +} + +static void testapp_stats(void) +{ + for (int i = 0; i < STAT_TEST_TYPE_MAX; i++) { + stat_test_type = i; + + /* reset defaults */ + rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS; + frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM; + + switch (stat_test_type) { + case STAT_TEST_RX_DROPPED: + frame_headroom = XSK_UMEM__DEFAULT_FRAME_SIZE - + XDP_PACKET_HEADROOM - 1; + break; + case STAT_TEST_RX_FULL: + rxqsize = RX_FULL_RXQSIZE; + break; + default: + break; + } + testapp_validate(); + } + + print_ksft_result(); +} + +static void init_iface(struct ifobject *ifobj, const char *dst_mac, + const char *src_mac, const char *dst_ip, + const char *src_ip, const u16 dst_port, + const u16 src_port, enum fvector vector) +{ + struct in_addr ip; + + memcpy(ifobj->dst_mac, dst_mac, ETH_ALEN); + memcpy(ifobj->src_mac, src_mac, ETH_ALEN); + + inet_aton(dst_ip, &ip); + ifobj->dst_ip = ip.s_addr; + + inet_aton(src_ip, &ip); + ifobj->src_ip = ip.s_addr; + + ifobj->dst_port = dst_port; + ifobj->src_port = src_port; + + if (vector == tx) { + ifobj->fv.vector = tx; + ifobj->func_ptr = worker_testapp_validate_tx; + ifdict_tx = ifobj; + } else { + ifobj->fv.vector = rx; + ifobj->func_ptr = worker_testapp_validate_rx; + ifdict_rx = ifobj; + } +} + +static void run_pkt_test(int mode, int type) +{ + test_type = type; + + /* reset defaults after potential previous test */ + xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; + pkt_counter = 0; + second_step = 0; + prev_pkt = -1; + sigvar = 0; + stat_test_type = -1; + rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS; + frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM; + + configured_mode = mode; + + switch (mode) { + case (TEST_MODE_SKB): + xdp_flags |= XDP_FLAGS_SKB_MODE; + break; + case (TEST_MODE_DRV): + xdp_flags |= XDP_FLAGS_DRV_MODE; + break; + default: + break; + } + + switch (test_type) { + case TEST_TYPE_STATS: + testapp_stats(); + break; + case TEST_TYPE_TEARDOWN: + testapp_teardown(); + break; + case TEST_TYPE_BIDI: + testapp_bidi(); + break; + case TEST_TYPE_BPF_RES: + testapp_bpf_res(); + break; + default: + testapp_validate(); + break; + } } int main(int argc, char **argv) { struct rlimit _rlim = { RLIM_INFINITY, RLIM_INFINITY }; + bool failure = false; + int i, j; if (setrlimit(RLIMIT_MEMLOCK, &_rlim)) exit_with_error(errno); - const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62"; - const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61"; - const char *IP1 = "192.168.100.162"; - const char *IP2 = "192.168.100.161"; - u16 UDP_DST_PORT = 2020; - u16 UDP_SRC_PORT = 2121; - - ifaceconfig = malloc(sizeof(struct ifaceconfigobj)); - memcpy(ifaceconfig->dst_mac, MAC1, ETH_ALEN); - memcpy(ifaceconfig->src_mac, MAC2, ETH_ALEN); - inet_aton(IP1, &ifaceconfig->dst_ip); - inet_aton(IP2, &ifaceconfig->src_ip); - ifaceconfig->dst_port = UDP_DST_PORT; - ifaceconfig->src_port = UDP_SRC_PORT; - for (int i = 0; i < MAX_INTERFACES; i++) { ifdict[i] = malloc(sizeof(struct ifobject)); if (!ifdict[i]) exit_with_error(errno); ifdict[i]->ifdict_index = i; + ifdict[i]->xsk_arr = calloc(2, sizeof(struct xsk_socket_info *)); + if (!ifdict[i]->xsk_arr) { + failure = true; + goto cleanup; + } + ifdict[i]->umem_arr = calloc(2, sizeof(struct xsk_umem_info *)); + if (!ifdict[i]->umem_arr) { + failure = true; + goto cleanup; + } } setlocale(LC_ALL, ""); @@ -1039,25 +1135,27 @@ int main(int argc, char **argv) num_frames = ++opt_pkt_count; - init_iface_config(ifaceconfig); - - pthread_init_mutex(); + init_iface(ifdict[0], MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2, tx); + init_iface(ifdict[1], MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1, rx); - ksft_set_plan(1); + ksft_set_plan(TEST_MODE_MAX * TEST_TYPE_MAX); - if (!opt_teardown && !opt_bidi) { - testapp_validate(); - } else if (opt_teardown && opt_bidi) { - ksft_test_result_fail("ERROR: parameters -T and -B cannot be used together\n"); - ksft_exit_xfail(); - } else { - testapp_sockets(); + for (i = 0; i < TEST_MODE_MAX; i++) { + for (j = 0; j < TEST_TYPE_MAX; j++) + run_pkt_test(i, j); } - for (int i = 0; i < MAX_INTERFACES; i++) +cleanup: + for (int i = 0; i < MAX_INTERFACES; i++) { + if (ifdict[i]->ns_fd != -1) + close(ifdict[i]->ns_fd); + free(ifdict[i]->xsk_arr); + free(ifdict[i]->umem_arr); free(ifdict[i]); + } - pthread_destroy_mutex(); + if (failure) + exit_with_error(errno); ksft_exit_pass(); diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h index 0e9f9b7e61c2..6c428b276ab6 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.h +++ b/tools/testing/selftests/bpf/xdpxceiver.h @@ -23,6 +23,7 @@ #define MAX_SOCKS 1 #define MAX_TEARDOWN_ITER 10 #define MAX_BIDI_ITER 2 +#define MAX_BPF_ITER 2 #define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \ sizeof(struct udphdr)) #define MIN_PKT_SIZE 64 @@ -33,41 +34,63 @@ #define IP_PKT_TOS 0x9 #define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr)) #define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr)) -#define TMOUT_SEC (3) #define EOT (-1) #define USLEEP_MAX 200000 -#define THREAD_STACK 60000000 #define SOCK_RECONF_CTR 10 #define BATCH_SIZE 64 #define POLL_TMOUT 1000 -#define NEED_WAKEUP true +#define DEFAULT_PKT_CNT 10000 +#define RX_FULL_RXQSIZE 32 + +#define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0) typedef __u32 u32; typedef __u16 u16; typedef __u8 u8; -enum TESTS { - ORDER_CONTENT_VALIDATE_XDP_SKB = 0, - ORDER_CONTENT_VALIDATE_XDP_DRV = 1, +enum TEST_MODES { + TEST_MODE_UNCONFIGURED = -1, + TEST_MODE_SKB, + TEST_MODE_DRV, + TEST_MODE_MAX +}; + +enum TEST_TYPES { + TEST_TYPE_NOPOLL, + TEST_TYPE_POLL, + TEST_TYPE_TEARDOWN, + TEST_TYPE_BIDI, + TEST_TYPE_STATS, + TEST_TYPE_BPF_RES, + TEST_TYPE_MAX +}; + +enum STAT_TEST_TYPES { + STAT_TEST_RX_DROPPED, + STAT_TEST_TX_INVALID, + STAT_TEST_RX_FULL, + STAT_TEST_RX_FILL_EMPTY, + STAT_TEST_TYPE_MAX }; -u8 uut; -u8 debug_pkt_dump; -u32 num_frames; -u8 switching_notify; -u8 bidi_pass; +static int configured_mode = TEST_MODE_UNCONFIGURED; +static u8 debug_pkt_dump; +static u32 num_frames; +static bool second_step; +static int test_type; -static u32 opt_xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; -static int opt_queue; static int opt_pkt_count; -static int opt_poll; -static int opt_teardown; -static int opt_bidi; -static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP; +static u8 opt_verbose; + +static u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; +static u32 xdp_bind_flags = XDP_USE_NEED_WAKEUP | XDP_COPY; static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE]; static u32 pkt_counter; -static u32 prev_pkt = -1; +static long prev_pkt = -1; static int sigvar; +static int stat_test_type; +static u32 rxqsize; +static u32 frame_headroom; struct xsk_umem_info { struct xsk_ring_prod fq; @@ -99,47 +122,32 @@ struct generic_data { u32 seqnum; }; -struct ifaceconfigobj { - u8 dst_mac[ETH_ALEN]; - u8 src_mac[ETH_ALEN]; - struct in_addr dst_ip; - struct in_addr src_ip; - u16 src_port; - u16 dst_port; -} *ifaceconfig; - struct ifobject { - int ifindex; - int ifdict_index; char ifname[MAX_INTERFACE_NAME_CHARS]; char nsname[MAX_INTERFACES_NAMESPACE_CHARS]; - struct flow_vector fv; struct xsk_socket_info *xsk; + struct xsk_socket_info **xsk_arr; + struct xsk_umem_info **umem_arr; struct xsk_umem_info *umem; - u8 dst_mac[ETH_ALEN]; - u8 src_mac[ETH_ALEN]; + void *(*func_ptr)(void *arg); + struct flow_vector fv; + int ns_fd; + int ifdict_index; u32 dst_ip; u32 src_ip; u16 src_port; u16 dst_port; + u8 dst_mac[ETH_ALEN]; + u8 src_mac[ETH_ALEN]; }; static struct ifobject *ifdict[MAX_INTERFACES]; +static struct ifobject *ifdict_rx; +static struct ifobject *ifdict_tx; /*threads*/ -atomic_int spinning_tx; -atomic_int spinning_rx; -pthread_mutex_t sync_mutex; -pthread_mutex_t sync_mutex_tx; -pthread_cond_t signal_rx_condition; -pthread_cond_t signal_tx_condition; -pthread_t t0, t1, ns_thread; -pthread_attr_t attr; - -struct targs { - bool retptr; - int idx; -}; +pthread_barrier_t barr; +pthread_t t0, t1; TAILQ_HEAD(head_s, pkt) head = TAILQ_HEAD_INITIALIZER(head); struct head_s *head_p; diff --git a/tools/testing/selftests/bpf/xsk_prereqs.sh b/tools/testing/selftests/bpf/xsk_prereqs.sh index 9d54c4645127..dac1c5f78752 100755 --- a/tools/testing/selftests/bpf/xsk_prereqs.sh +++ b/tools/testing/selftests/bpf/xsk_prereqs.sh @@ -82,24 +82,21 @@ clear_configs() { if [ $(ip netns show | grep $3 &>/dev/null; echo $?;) == 0 ]; then [ $(ip netns exec $3 ip link show $2 &>/dev/null; echo $?;) == 0 ] && - { echo "removing link $1:$2"; ip netns exec $3 ip link del $2; } - echo "removing ns $3" + { ip netns exec $3 ip link del $2; } ip netns del $3 fi #Once we delete a veth pair node, the entire veth pair is removed, #this is just to be cautious just incase the NS does not exist then #veth node inside NS won't get removed so we explicitly remove it [ $(ip link show $1 &>/dev/null; echo $?;) == 0 ] && - { echo "removing link $1"; ip link del $1; } + { ip link del $1; } if [ -f ${SPECFILE} ]; then - echo "removing spec file:" ${SPECFILE} rm -f ${SPECFILE} fi } cleanup_exit() { - echo "cleaning up..." clear_configs $1 $2 $3 } @@ -108,28 +105,7 @@ validate_ip_utility() [ ! $(type -P ip) ] && { echo "'ip' not found. Skipping tests."; test_exit $ksft_skip 1; } } -vethXDPgeneric() -{ - ip link set dev $1 xdpdrv off - ip netns exec $3 ip link set dev $2 xdpdrv off -} - -vethXDPnative() -{ - ip link set dev $1 xdpgeneric off - ip netns exec $3 ip link set dev $2 xdpgeneric off -} - execxdpxceiver() { - local -a 'paramkeys=("${!'"$1"'[@]}")' copy - paramkeysstr=${paramkeys[*]} - - for index in $paramkeysstr; - do - current=$1"[$index]" - copy[$index]=${!current} - done - - ./${XSKOBJ} -i ${VETH0} -i ${VETH1},${NS1} ${copy[*]} -C ${NUMPKTS} + ./${XSKOBJ} -i ${VETH0} -i ${VETH1},${NS1} -C ${NUMPKTS} ${VERBOSE_ARG} ${DUMP_PKTS_ARG} } diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c index 0941aa16157e..22b31ebb3513 100644 --- a/tools/testing/selftests/cgroup/test_kmem.c +++ b/tools/testing/selftests/cgroup/test_kmem.c @@ -19,12 +19,12 @@ /* - * Memory cgroup charging and vmstat data aggregation is performed using - * percpu batches 32 pages big (look at MEMCG_CHARGE_BATCH). So the maximum - * discrepancy between charge and vmstat entries is number of cpus multiplied - * by 32 pages multiplied by 2. + * Memory cgroup charging is performed using percpu batches 32 pages + * big (look at MEMCG_CHARGE_BATCH), whereas memory.stat is exact. So + * the maximum discrepancy between charge and vmstat entries is number + * of cpus multiplied by 32 pages. */ -#define MAX_VMSTAT_ERROR (4096 * 32 * 2 * get_nprocs()) +#define MAX_VMSTAT_ERROR (4096 * 32 * get_nprocs()) static int alloc_dcache(const char *cgroup, void *arg) @@ -162,7 +162,7 @@ static int cg_run_in_subcgroups(const char *parent, */ static int test_kmem_memcg_deletion(const char *root) { - long current, slab, anon, file, kernel_stack, sum; + long current, slab, anon, file, kernel_stack, pagetables, percpu, sock, sum; int ret = KSFT_FAIL; char *parent; @@ -184,11 +184,14 @@ static int test_kmem_memcg_deletion(const char *root) anon = cg_read_key_long(parent, "memory.stat", "anon "); file = cg_read_key_long(parent, "memory.stat", "file "); kernel_stack = cg_read_key_long(parent, "memory.stat", "kernel_stack "); + pagetables = cg_read_key_long(parent, "memory.stat", "pagetables "); + percpu = cg_read_key_long(parent, "memory.stat", "percpu "); + sock = cg_read_key_long(parent, "memory.stat", "sock "); if (current < 0 || slab < 0 || anon < 0 || file < 0 || - kernel_stack < 0) + kernel_stack < 0 || pagetables < 0 || percpu < 0 || sock < 0) goto cleanup; - sum = slab + anon + file + kernel_stack; + sum = slab + anon + file + kernel_stack + pagetables + percpu + sock; if (abs(sum - current) < MAX_VMSTAT_ERROR) { ret = KSFT_PASS; } else { @@ -198,6 +201,9 @@ static int test_kmem_memcg_deletion(const char *root) printf("anon = %ld\n", anon); printf("file = %ld\n", file); printf("kernel_stack = %ld\n", kernel_stack); + printf("pagetables = %ld\n", pagetables); + printf("percpu = %ld\n", percpu); + printf("sock = %ld\n", sock); } cleanup: diff --git a/tools/testing/selftests/dma/dma_map_benchmark.c b/tools/testing/selftests/dma/dma_map_benchmark.c index fb23ce9617ea..485dff51bad2 100644 --- a/tools/testing/selftests/dma/dma_map_benchmark.c +++ b/tools/testing/selftests/dma/dma_map_benchmark.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020 Hisilicon Limited. + * Copyright (C) 2020 HiSilicon Limited. */ #include <fcntl.h> @@ -40,7 +40,8 @@ struct map_benchmark { __u32 dma_bits; /* DMA addressing capability */ __u32 dma_dir; /* DMA data direction */ __u32 dma_trans_ns; /* time for DMA transmission in ns */ - __u8 expansion[80]; /* For future use */ + __u32 granule; /* how many PAGE_SIZE will do map/unmap once a time */ + __u8 expansion[76]; /* For future use */ }; int main(int argc, char **argv) @@ -51,11 +52,13 @@ int main(int argc, char **argv) int threads = 1, seconds = 20, node = -1; /* default dma mask 32bit, bidirectional DMA */ int bits = 32, xdelay = 0, dir = DMA_MAP_BIDIRECTIONAL; + /* default granule 1 PAGESIZE */ + int granule = 1; int cmd = DMA_MAP_BENCHMARK; char *p; - while ((opt = getopt(argc, argv, "t:s:n:b:d:x:")) != -1) { + while ((opt = getopt(argc, argv, "t:s:n:b:d:x:g:")) != -1) { switch (opt) { case 't': threads = atoi(optarg); @@ -75,6 +78,9 @@ int main(int argc, char **argv) case 'x': xdelay = atoi(optarg); break; + case 'g': + granule = atoi(optarg); + break; default: return -1; } @@ -110,6 +116,11 @@ int main(int argc, char **argv) exit(1); } + if (granule < 1 || granule > 1024) { + fprintf(stderr, "invalid granule size\n"); + exit(1); + } + fd = open("/sys/kernel/debug/dma_map_benchmark", O_RDWR); if (fd == -1) { perror("open"); @@ -123,14 +134,15 @@ int main(int argc, char **argv) map.dma_bits = bits; map.dma_dir = dir; map.dma_trans_ns = xdelay; + map.granule = granule; if (ioctl(fd, cmd, &map)) { perror("ioctl"); exit(1); } - printf("dma mapping benchmark: threads:%d seconds:%d node:%d dir:%s\n", - threads, seconds, node, dir[directions]); + printf("dma mapping benchmark: threads:%d seconds:%d node:%d dir:%s granule: %d\n", + threads, seconds, node, dir[directions], granule); printf("average map latency(us):%.1f standard deviation:%.1f\n", map.avg_map_100ns/10.0, map.map_stddev/10.0); printf("average unmap latency(us):%.1f standard deviation:%.1f\n", diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh index 1fedfc9da434..42d44e27802c 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh @@ -446,6 +446,35 @@ __invalid_nexthop_test() log_test "Unresolved neigh: nexthop does not exist: $desc" } +__invalid_nexthop_bucket_test() +{ + local desc=$1; shift + local dip=$1; shift + local via_add=$1; shift + local trap_name="unresolved_neigh" + + RET=0 + + # Check that route to nexthop that does not exist triggers + # unresolved_neigh + ip nexthop add id 1 via $via_add dev $rp2 + ip nexthop add id 10 group 1 type resilient buckets 32 + ip route add $dip nhid 10 + + t0_packets=$(devlink_trap_rx_packets_get $trap_name) + ping_do $h1 $dip + t1_packets=$(devlink_trap_rx_packets_get $trap_name) + + if [[ $t0_packets -eq $t1_packets ]]; then + check_err 1 "Trap counter did not increase" + fi + + ip route del $dip nhid 10 + ip nexthop del id 10 + ip nexthop del id 1 + log_test "Unresolved neigh: nexthop bucket does not exist: $desc" +} + unresolved_neigh_test() { __host_miss_test "IPv4" 198.51.100.1 @@ -453,6 +482,8 @@ unresolved_neigh_test() __invalid_nexthop_test "IPv4" 198.51.100.1 198.51.100.3 24 198.51.100.4 __invalid_nexthop_test "IPv6" 2001:db8:2::1 2001:db8:2::3 64 \ 2001:db8:2::4 + __invalid_nexthop_bucket_test "IPv4" 198.51.100.1 198.51.100.4 + __invalid_nexthop_bucket_test "IPv6" 2001:db8:2::1 2001:db8:2::4 } vrf_without_routes_create() diff --git a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh index 6f3a70df63bc..e00435753008 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh @@ -120,12 +120,13 @@ __mirror_gre_test() sleep 5 for ((i = 0; i < count; ++i)); do + local sip=$(mirror_gre_ipv6_addr 1 $i)::1 local dip=$(mirror_gre_ipv6_addr 1 $i)::2 local htun=h3-gt6-$i local message icmp6_capture_install $htun - mirror_test v$h1 "" $dip $htun 100 10 + mirror_test v$h1 $sip $dip $htun 100 10 icmp6_capture_uninstall $htun done } diff --git a/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh index f813ffefc07e..65f43a7ce9c9 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh @@ -55,10 +55,6 @@ port_test() | jq '.[][][] | select(.name=="physical_ports") |.["occ"]') [[ $occ -eq $max_ports ]] - if [[ $should_fail -eq 0 ]]; then - check_err $? "Mismatch ports number: Expected $max_ports, got $occ." - else - check_err_fail $should_fail $? "Reached more ports than expected" - fi + check_err_fail $should_fail $? "Attempt to create $max_ports ports (actual result $occ)" } diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh index ed346da5d3cb..a217f9f6775b 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh @@ -33,6 +33,7 @@ ALL_TESTS=" nexthop_obj_invalid_test nexthop_obj_offload_test nexthop_obj_group_offload_test + nexthop_obj_bucket_offload_test nexthop_obj_blackhole_offload_test nexthop_obj_route_offload_test devlink_reload_test @@ -739,11 +740,28 @@ nexthop_obj_invalid_test() ip nexthop add id 1 dev $swp1 ip nexthop add id 2 dev $swp1 + ip nexthop add id 3 via 192.0.2.3 dev $swp1 ip nexthop add id 10 group 1/2 check_fail $? "managed to configure a nexthop group with device-only nexthops when should not" + ip nexthop add id 10 group 3 type resilient buckets 7 + check_fail $? "managed to configure a too small resilient nexthop group when should not" + + ip nexthop add id 10 group 3 type resilient buckets 129 + check_fail $? "managed to configure a resilient nexthop group with invalid number of buckets when should not" + + ip nexthop add id 10 group 1/2 type resilient buckets 32 + check_fail $? "managed to configure a resilient nexthop group with device-only nexthops when should not" + + ip nexthop add id 10 group 3 type resilient buckets 32 + check_err $? "failed to configure a valid resilient nexthop group" + ip nexthop replace id 3 dev $swp1 + check_fail $? "managed to populate a nexthop bucket with a device-only nexthop when should not" + log_test "nexthop objects - invalid configurations" + ip nexthop del id 10 + ip nexthop del id 3 ip nexthop del id 2 ip nexthop del id 1 @@ -858,6 +876,70 @@ nexthop_obj_group_offload_test() simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64 } +nexthop_obj_bucket_offload_test() +{ + # Test offload indication of nexthop buckets + RET=0 + + simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64 + simple_if_init $swp2 + setup_wait + + ip nexthop add id 1 via 192.0.2.2 dev $swp1 + ip nexthop add id 2 via 2001:db8:1::2 dev $swp1 + ip nexthop add id 10 group 1/2 type resilient buckets 32 idle_timer 0 + ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \ + dev $swp1 + ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud reachable \ + dev $swp1 + ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud reachable \ + dev $swp1 + + busywait "$TIMEOUT" wait_for_offload \ + ip nexthop bucket show nhid 1 + check_err $? "IPv4 nexthop buckets not marked as offloaded when should" + busywait "$TIMEOUT" wait_for_offload \ + ip nexthop bucket show nhid 2 + check_err $? "IPv6 nexthop buckets not marked as offloaded when should" + + # Invalidate nexthop id 1 + ip neigh replace 192.0.2.2 nud failed dev $swp1 + busywait "$TIMEOUT" wait_for_trap \ + ip nexthop bucket show nhid 1 + check_err $? "IPv4 nexthop buckets not marked with trap when should" + + # Invalidate nexthop id 2 + ip neigh replace 2001:db8:1::2 nud failed dev $swp1 + busywait "$TIMEOUT" wait_for_trap \ + ip nexthop bucket show nhid 2 + check_err $? "IPv6 nexthop buckets not marked with trap when should" + + # Revalidate nexthop id 1 by changing its configuration + ip nexthop replace id 1 via 192.0.2.3 dev $swp1 + busywait "$TIMEOUT" wait_for_offload \ + ip nexthop bucket show nhid 1 + check_err $? "nexthop bucket not marked as offloaded after revalidating nexthop" + + # Revalidate nexthop id 2 by changing its neighbour + ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud reachable \ + dev $swp1 + busywait "$TIMEOUT" wait_for_offload \ + ip nexthop bucket show nhid 2 + check_err $? "nexthop bucket not marked as offloaded after revalidating neighbour" + + log_test "nexthop bucket offload indication" + + ip neigh del 2001:db8:1::2 dev $swp1 + ip neigh del 192.0.2.3 dev $swp1 + ip neigh del 192.0.2.2 dev $swp1 + ip nexthop del id 10 + ip nexthop del id 2 + ip nexthop del id 1 + + simple_if_fini $swp2 + simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64 +} + nexthop_obj_blackhole_offload_test() { # Test offload indication of blackhole nexthop objects diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh index b0cb1aaffdda..33ddd01689be 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh @@ -507,8 +507,8 @@ do_red_test() check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0." local diff=$((limit - backlog)) pct=$((100 * diff / limit)) - ((0 <= pct && pct <= 5)) - check_err $? "backlog $backlog / $limit expected <= 5% distance" + ((0 <= pct && pct <= 10)) + check_err $? "backlog $backlog / $limit expected <= 10% distance" log_test "TC $((vlan - 10)): RED backlog > limit" stop_traffic diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh index 3f007c5f8361..f3ef3274f9b3 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh @@ -67,6 +67,13 @@ red_test() { install_qdisc + # Make sure that we get the non-zero value if there is any. + local cur=$(busywait 1100 until_counter_is "> 0" \ + qdisc_stats_get $swp3 10: .backlog) + (( cur == 0 )) + check_err $? "backlog of $cur observed on non-busy qdisc" + log_test "$QDISC backlog properly cleaned" + do_red_test 10 $BACKLOG1 do_red_test 11 $BACKLOG2 diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/q_in_vni_veto.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/q_in_vni_veto.sh deleted file mode 100755 index 0231205a7147..000000000000 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/q_in_vni_veto.sh +++ /dev/null @@ -1,77 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -lib_dir=$(dirname $0)/../../../../net/forwarding - -VXPORT=4789 - -ALL_TESTS=" - create_dot1d_and_dot1ad_vxlans -" -NUM_NETIFS=2 -source $lib_dir/lib.sh - -setup_prepare() -{ - swp1=${NETIFS[p1]} - swp2=${NETIFS[p2]} - - ip link set dev $swp1 up - ip link set dev $swp2 up -} - -cleanup() -{ - pre_cleanup - - ip link set dev $swp2 down - ip link set dev $swp1 down -} - -create_dot1d_and_dot1ad_vxlans() -{ - RET=0 - - ip link add dev br0 type bridge vlan_filtering 1 vlan_protocol 802.1ad \ - vlan_default_pvid 0 mcast_snooping 0 - ip link set dev br0 up - - ip link add name vx100 type vxlan id 1000 local 192.0.2.17 dstport \ - "$VXPORT" nolearning noudpcsum tos inherit ttl 100 - ip link set dev vx100 up - - ip link set dev $swp1 master br0 - ip link set dev vx100 master br0 - bridge vlan add vid 100 dev vx100 pvid untagged - - ip link add dev br1 type bridge vlan_filtering 0 mcast_snooping 0 - ip link set dev br1 up - - ip link add name vx200 type vxlan id 2000 local 192.0.2.17 dstport \ - "$VXPORT" nolearning noudpcsum tos inherit ttl 100 - ip link set dev vx200 up - - ip link set dev $swp2 master br1 - ip link set dev vx200 master br1 2>/dev/null - check_fail $? "802.1d and 802.1ad VxLANs at the same time not rejected" - - ip link set dev vx200 master br1 2>&1 >/dev/null \ - | grep -q mlxsw_spectrum - check_err $? "802.1d and 802.1ad VxLANs at the same time rejected without extack" - - log_test "create 802.1d and 802.1ad VxLANs" - - ip link del dev vx200 - ip link del dev br1 - ip link del dev vx100 - ip link del dev br0 -} - -trap cleanup EXIT - -setup_prepare -setup_wait - -tests_run - -exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh index 4a1c9328555f..50654f8a8c37 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh @@ -30,6 +30,7 @@ trap cleanup EXIT ALL_TESTS="router tc_flower mirror_gre tc_police port" for current_test in ${TESTS:-$ALL_TESTS}; do + RET_FIN=0 source ${current_test}_scale.sh num_netifs_var=${current_test^^}_NUM_NETIFS @@ -48,8 +49,9 @@ for current_test in ${TESTS:-$ALL_TESTS}; do else log_test "'$current_test' overflow $target" fi + RET_FIN=$(( RET_FIN || RET )) done done current_test="" -exit "$RET" +exit "$RET_FIN" diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh index 087a884f66cd..685dfb3478b3 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh @@ -24,6 +24,7 @@ trap cleanup EXIT ALL_TESTS="router tc_flower mirror_gre tc_police port" for current_test in ${TESTS:-$ALL_TESTS}; do + RET_FIN=0 source ${current_test}_scale.sh num_netifs_var=${current_test^^}_NUM_NETIFS @@ -50,8 +51,9 @@ for current_test in ${TESTS:-$ALL_TESTS}; do log_test "'$current_test' [$profile] overflow $target" fi done + RET_FIN=$(( RET_FIN || RET )) done done current_test="" -exit "$RET" +exit "$RET_FIN" diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh index cc0f07e72cf2..aa74be9f47c8 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh @@ -98,11 +98,7 @@ __tc_flower_test() jq -r '[ .[] | select(.kind == "flower") | .options | .in_hw ]' | jq .[] | wc -l) [[ $((offload_count - 1)) -eq $count ]] - if [[ $should_fail -eq 0 ]]; then - check_err $? "Offload mismatch" - else - check_err_fail $should_fail $? "Offload more than expacted" - fi + check_err_fail $should_fail $? "Attempt to offload $count rules (actual result $((offload_count - 1)))" } tc_flower_test() diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh index 553cb9fad508..5ec3beb637c8 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh @@ -11,6 +11,7 @@ ALL_TESTS=" matchall_mirror_behind_flower_ingress_test matchall_sample_behind_flower_ingress_test matchall_mirror_behind_flower_egress_test + matchall_proto_match_test police_limits_test multi_police_test " @@ -18,6 +19,7 @@ NUM_NETIFS=2 source $lib_dir/tc_common.sh source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh switch_create() { @@ -166,7 +168,8 @@ matchall_sample_egress_test() RET=0 # It is forbidden in mlxsw driver to have matchall with sample action - # bound on egress + # bound on egress. Spectrum-1 specific restriction + [[ "$DEVLINK_VIDDID" != "15b3:cb84" ]] && return tc qdisc add dev $swp1 clsact @@ -289,6 +292,22 @@ matchall_mirror_behind_flower_egress_test() matchall_behind_flower_egress_test "mirror" "mirred egress mirror dev $swp2" } +matchall_proto_match_test() +{ + RET=0 + + tc qdisc add dev $swp1 clsact + + tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \ + matchall skip_sw \ + action sample group 1 rate 100 + check_fail $? "Incorrect success to add matchall rule with protocol match" + + tc qdisc del dev $swp1 clsact + + log_test "matchall protocol match" +} + police_limits_test() { RET=0 diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh new file mode 100755 index 000000000000..093bed088ad0 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh @@ -0,0 +1,657 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test that packets are sampled when tc-sample is used and that reported +# metadata is correct. Two sets of hosts (with and without LAG) are used, since +# metadata extraction in mlxsw is a bit different when LAG is involved. +# +# +---------------------------------+ +---------------------------------+ +# | H1 (vrf) | | H3 (vrf) | +# | + $h1 | | + $h3_lag | +# | | 192.0.2.1/28 | | | 192.0.2.17/28 | +# | | | | | | +# | | default via 192.0.2.2 | | | default via 192.0.2.18 | +# +----|----------------------------+ +----|----------------------------+ +# | | +# +----|-----------------------------------------|----------------------------+ +# | | 192.0.2.2/28 | 192.0.2.18/28 | +# | + $rp1 + $rp3_lag | +# | | +# | + $rp2 + $rp4_lag | +# | | 198.51.100.2/28 | 198.51.100.18/28 | +# +----|-----------------------------------------|----------------------------+ +# | | +# +----|----------------------------+ +----|----------------------------+ +# | | default via 198.51.100.2 | | | default via 198.51.100.18 | +# | | | | | | +# | | 198.51.100.1/28 | | | 198.51.100.17/28 | +# | + $h2 | | + $h4_lag | +# | H2 (vrf) | | H4 (vrf) | +# +---------------------------------+ +---------------------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + tc_sample_rate_test + tc_sample_max_rate_test + tc_sample_conflict_test + tc_sample_group_conflict_test + tc_sample_md_iif_test + tc_sample_md_lag_iif_test + tc_sample_md_oif_test + tc_sample_md_lag_oif_test + tc_sample_md_out_tc_test + tc_sample_md_out_tc_occ_test + tc_sample_md_latency_test + tc_sample_acl_group_conflict_test + tc_sample_acl_rate_test + tc_sample_acl_max_rate_test +" +NUM_NETIFS=8 +CAPTURE_FILE=$(mktemp) +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +# Available at https://github.com/Mellanox/libpsample +require_command psample + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 + + ip -4 route add default vrf v$h1 nexthop via 192.0.2.2 +} + +h1_destroy() +{ + ip -4 route del default vrf v$h1 nexthop via 192.0.2.2 + + simple_if_fini $h1 192.0.2.1/28 +} + +h2_create() +{ + simple_if_init $h2 198.51.100.1/28 + + ip -4 route add default vrf v$h2 nexthop via 198.51.100.2 +} + +h2_destroy() +{ + ip -4 route del default vrf v$h2 nexthop via 198.51.100.2 + + simple_if_fini $h2 198.51.100.1/28 +} + +h3_create() +{ + ip link set dev $h3 down + ip link add name ${h3}_bond type bond mode 802.3ad + ip link set dev $h3 master ${h3}_bond + + simple_if_init ${h3}_bond 192.0.2.17/28 + + ip -4 route add default vrf v${h3}_bond nexthop via 192.0.2.18 +} + +h3_destroy() +{ + ip -4 route del default vrf v${h3}_bond nexthop via 192.0.2.18 + + simple_if_fini ${h3}_bond 192.0.2.17/28 + + ip link set dev $h3 nomaster + ip link del dev ${h3}_bond +} + +h4_create() +{ + ip link set dev $h4 down + ip link add name ${h4}_bond type bond mode 802.3ad + ip link set dev $h4 master ${h4}_bond + + simple_if_init ${h4}_bond 198.51.100.17/28 + + ip -4 route add default vrf v${h4}_bond nexthop via 198.51.100.18 +} + +h4_destroy() +{ + ip -4 route del default vrf v${h4}_bond nexthop via 198.51.100.18 + + simple_if_fini ${h4}_bond 198.51.100.17/28 + + ip link set dev $h4 nomaster + ip link del dev ${h4}_bond +} + +router_create() +{ + ip link set dev $rp1 up + __addr_add_del $rp1 add 192.0.2.2/28 + tc qdisc add dev $rp1 clsact + + ip link set dev $rp2 up + __addr_add_del $rp2 add 198.51.100.2/28 + tc qdisc add dev $rp2 clsact + + ip link add name ${rp3}_bond type bond mode 802.3ad + ip link set dev $rp3 master ${rp3}_bond + __addr_add_del ${rp3}_bond add 192.0.2.18/28 + tc qdisc add dev $rp3 clsact + ip link set dev ${rp3}_bond up + + ip link add name ${rp4}_bond type bond mode 802.3ad + ip link set dev $rp4 master ${rp4}_bond + __addr_add_del ${rp4}_bond add 198.51.100.18/28 + tc qdisc add dev $rp4 clsact + ip link set dev ${rp4}_bond up +} + +router_destroy() +{ + ip link set dev ${rp4}_bond down + tc qdisc del dev $rp4 clsact + __addr_add_del ${rp4}_bond del 198.51.100.18/28 + ip link set dev $rp4 nomaster + ip link del dev ${rp4}_bond + + ip link set dev ${rp3}_bond down + tc qdisc del dev $rp3 clsact + __addr_add_del ${rp3}_bond del 192.0.2.18/28 + ip link set dev $rp3 nomaster + ip link del dev ${rp3}_bond + + tc qdisc del dev $rp2 clsact + __addr_add_del $rp2 del 198.51.100.2/28 + ip link set dev $rp2 down + + tc qdisc del dev $rp1 clsact + __addr_add_del $rp1 del 192.0.2.2/28 + ip link set dev $rp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp1=${NETIFS[p2]} + rp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + h3=${NETIFS[p5]} + rp3=${NETIFS[p6]} + h4=${NETIFS[p7]} + rp4=${NETIFS[p8]} + + vrf_prepare + + h1_create + h2_create + h3_create + h4_create + router_create +} + +cleanup() +{ + pre_cleanup + + rm -f $CAPTURE_FILE + + router_destroy + h4_destroy + h3_destroy + h2_destroy + h1_destroy + + vrf_cleanup +} + +psample_capture_start() +{ + rm -f $CAPTURE_FILE + + psample &> $CAPTURE_FILE & + + sleep 1 +} + +psample_capture_stop() +{ + { kill %% && wait %%; } 2>/dev/null +} + +__tc_sample_rate_test() +{ + local desc=$1; shift + local dip=$1; shift + local pkts pct + + RET=0 + + tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \ + skip_sw action sample rate 32 group 1 + check_err $? "Failed to configure sampling rule" + + psample_capture_start + + ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \ + -B $dip -t udp dp=52768,sp=42768 -q + + psample_capture_stop + + pkts=$(grep -e "group 1 " $CAPTURE_FILE | wc -l) + pct=$((100 * (pkts - 100) / 100)) + (( -25 <= pct && pct <= 25)) + check_err $? "Expected 100 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%" + + log_test "tc sample rate ($desc)" + + tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall +} + +tc_sample_rate_test() +{ + __tc_sample_rate_test "forward" 198.51.100.1 + __tc_sample_rate_test "local receive" 192.0.2.2 +} + +tc_sample_max_rate_test() +{ + RET=0 + + tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \ + skip_sw action sample rate $((35 * 10 ** 8)) group 1 + check_err $? "Failed to configure sampling rule with max rate" + + tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall + + tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \ + skip_sw action sample rate $((35 * 10 ** 8 + 1)) \ + group 1 &> /dev/null + check_fail $? "Managed to configure sampling rate above maximum" + + log_test "tc sample maximum rate" +} + +tc_sample_conflict_test() +{ + RET=0 + + # Test that two sampling rules cannot be configured on the same port, + # even when they share the same parameters. + + tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \ + skip_sw action sample rate 1024 group 1 + check_err $? "Failed to configure sampling rule" + + tc filter add dev $rp1 ingress protocol all pref 2 handle 102 matchall \ + skip_sw action sample rate 1024 group 1 &> /dev/null + check_fail $? "Managed to configure second sampling rule" + + # Delete the first rule and make sure the second rule can now be + # configured. + + tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall + + tc filter add dev $rp1 ingress protocol all pref 2 handle 102 matchall \ + skip_sw action sample rate 1024 group 1 + check_err $? "Failed to configure sampling rule after deletion" + + log_test "tc sample conflict test" + + tc filter del dev $rp1 ingress protocol all pref 2 handle 102 matchall +} + +tc_sample_group_conflict_test() +{ + RET=0 + + # Test that two sampling rules cannot be configured on the same port + # with different groups. + + tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \ + skip_sw action sample rate 1024 group 1 + check_err $? "Failed to configure sampling rule" + + tc filter add dev $rp1 ingress protocol all pref 2 handle 102 matchall \ + skip_sw action sample rate 1024 group 2 &> /dev/null + check_fail $? "Managed to configure sampling rule with conflicting group" + + log_test "tc sample group conflict test" + + tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall +} + +tc_sample_md_iif_test() +{ + local rp1_ifindex + + RET=0 + + tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \ + skip_sw action sample rate 5 group 1 + check_err $? "Failed to configure sampling rule" + + psample_capture_start + + ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \ + -B 198.51.100.1 -t udp dp=52768,sp=42768 -q + + psample_capture_stop + + rp1_ifindex=$(ip -j -p link show dev $rp1 | jq '.[]["ifindex"]') + grep -q -e "in-ifindex $rp1_ifindex " $CAPTURE_FILE + check_err $? "Sampled packets do not have expected in-ifindex" + + log_test "tc sample iif" + + tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall +} + +tc_sample_md_lag_iif_test() +{ + local rp3_ifindex + + RET=0 + + tc filter add dev $rp3 ingress protocol all pref 1 handle 101 matchall \ + skip_sw action sample rate 5 group 1 + check_err $? "Failed to configure sampling rule" + + psample_capture_start + + ip vrf exec v${h3}_bond $MZ ${h3}_bond -c 3200 -d 1msec -p 64 \ + -A 192.0.2.17 -B 198.51.100.17 -t udp dp=52768,sp=42768 -q + + psample_capture_stop + + rp3_ifindex=$(ip -j -p link show dev $rp3 | jq '.[]["ifindex"]') + grep -q -e "in-ifindex $rp3_ifindex " $CAPTURE_FILE + check_err $? "Sampled packets do not have expected in-ifindex" + + log_test "tc sample lag iif" + + tc filter del dev $rp3 ingress protocol all pref 1 handle 101 matchall +} + +tc_sample_md_oif_test() +{ + local rp2_ifindex + + RET=0 + + tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \ + skip_sw action sample rate 5 group 1 + check_err $? "Failed to configure sampling rule" + + psample_capture_start + + ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \ + -B 198.51.100.1 -t udp dp=52768,sp=42768 -q + + psample_capture_stop + + rp2_ifindex=$(ip -j -p link show dev $rp2 | jq '.[]["ifindex"]') + grep -q -e "out-ifindex $rp2_ifindex " $CAPTURE_FILE + check_err $? "Sampled packets do not have expected out-ifindex" + + log_test "tc sample oif" + + tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall +} + +tc_sample_md_lag_oif_test() +{ + local rp4_ifindex + + RET=0 + + tc filter add dev $rp3 ingress protocol all pref 1 handle 101 matchall \ + skip_sw action sample rate 5 group 1 + check_err $? "Failed to configure sampling rule" + + psample_capture_start + + ip vrf exec v${h3}_bond $MZ ${h3}_bond -c 3200 -d 1msec -p 64 \ + -A 192.0.2.17 -B 198.51.100.17 -t udp dp=52768,sp=42768 -q + + psample_capture_stop + + rp4_ifindex=$(ip -j -p link show dev $rp4 | jq '.[]["ifindex"]') + grep -q -e "out-ifindex $rp4_ifindex " $CAPTURE_FILE + check_err $? "Sampled packets do not have expected out-ifindex" + + log_test "tc sample lag oif" + + tc filter del dev $rp3 ingress protocol all pref 1 handle 101 matchall +} + +tc_sample_md_out_tc_test() +{ + RET=0 + + # Output traffic class is not supported on Spectrum-1. + [[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return + + tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \ + skip_sw action sample rate 5 group 1 + check_err $? "Failed to configure sampling rule" + + # By default, all the packets should go to the same traffic class (0). + + psample_capture_start + + ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \ + -B 198.51.100.1 -t udp dp=52768,sp=42768 -q + + psample_capture_stop + + grep -q -e "out-tc 0 " $CAPTURE_FILE + check_err $? "Sampled packets do not have expected out-tc (0)" + + # Map all priorities to highest traffic class (7) and check reported + # out-tc. + tc qdisc replace dev $rp2 root handle 1: \ + prio bands 3 priomap 0 0 0 0 0 0 0 0 + + psample_capture_start + + ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \ + -B 198.51.100.1 -t udp dp=52768,sp=42768 -q + + psample_capture_stop + + grep -q -e "out-tc 7 " $CAPTURE_FILE + check_err $? "Sampled packets do not have expected out-tc (7)" + + log_test "tc sample out-tc" + + tc qdisc del dev $rp2 root handle 1: + tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall +} + +tc_sample_md_out_tc_occ_test() +{ + local backlog pct occ + + RET=0 + + # Output traffic class occupancy is not supported on Spectrum-1. + [[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return + + tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \ + skip_sw action sample rate 1024 group 1 + check_err $? "Failed to configure sampling rule" + + # Configure a shaper on egress to create congestion. + tc qdisc replace dev $rp2 root handle 1: \ + tbf rate 1Mbit burst 256k limit 1M + + psample_capture_start + + ip vrf exec v$h1 $MZ $h1 -c 0 -d 1usec -p 1400 -A 192.0.2.1 \ + -B 198.51.100.1 -t udp dp=52768,sp=42768 -q & + + # Allow congestion to reach steady state. + sleep 10 + + backlog=$(tc -j -p -s qdisc show dev $rp2 | jq '.[0]["backlog"]') + + # Kill mausezahn. + { kill %% && wait %%; } 2>/dev/null + + psample_capture_stop + + # Record last congestion sample. + occ=$(grep -e "out-tc-occ " $CAPTURE_FILE | tail -n 1 | \ + cut -d ' ' -f 16) + + pct=$((100 * (occ - backlog) / backlog)) + (( -1 <= pct && pct <= 1)) + check_err $? "Recorded a congestion of $backlog bytes, but sampled congestion is $occ bytes, which is $pct% off. Required accuracy is +-5%" + + log_test "tc sample out-tc-occ" + + tc qdisc del dev $rp2 root handle 1: + tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall +} + +tc_sample_md_latency_test() +{ + RET=0 + + # Egress sampling not supported on Spectrum-1. + [[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return + + tc filter add dev $rp2 egress protocol all pref 1 handle 101 matchall \ + skip_sw action sample rate 5 group 1 + check_err $? "Failed to configure sampling rule" + + psample_capture_start + + ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \ + -B 198.51.100.1 -t udp dp=52768,sp=42768 -q + + psample_capture_stop + + grep -q -e "latency " $CAPTURE_FILE + check_err $? "Sampled packets do not have latency attribute" + + log_test "tc sample latency" + + tc filter del dev $rp2 egress protocol all pref 1 handle 101 matchall +} + +tc_sample_acl_group_conflict_test() +{ + RET=0 + + # Test that two flower sampling rules cannot be configured on the same + # port with different groups. + + # Policy-based sampling is not supported on Spectrum-1. + [[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return + + tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \ + skip_sw action sample rate 1024 group 1 + check_err $? "Failed to configure sampling rule" + + tc filter add dev $rp1 ingress protocol ip pref 2 handle 102 flower \ + skip_sw action sample rate 1024 group 1 + check_err $? "Failed to configure sampling rule with same group" + + tc filter add dev $rp1 ingress protocol ip pref 3 handle 103 flower \ + skip_sw action sample rate 1024 group 2 &> /dev/null + check_fail $? "Managed to configure sampling rule with conflicting group" + + log_test "tc sample (w/ flower) group conflict test" + + tc filter del dev $rp1 ingress protocol ip pref 2 handle 102 flower + tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower +} + +__tc_sample_acl_rate_test() +{ + local bind=$1; shift + local port=$1; shift + local pkts pct + + RET=0 + + # Policy-based sampling is not supported on Spectrum-1. + [[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return + + tc filter add dev $port $bind protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 198.51.100.1 action sample rate 32 group 1 + check_err $? "Failed to configure sampling rule" + + psample_capture_start + + ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \ + -B 198.51.100.1 -t udp dp=52768,sp=42768 -q + + psample_capture_stop + + pkts=$(grep -e "group 1 " $CAPTURE_FILE | wc -l) + pct=$((100 * (pkts - 100) / 100)) + (( -25 <= pct && pct <= 25)) + check_err $? "Expected 100 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%" + + # Setup a filter that should not match any packet and make sure packets + # are not sampled. + tc filter del dev $port $bind protocol ip pref 1 handle 101 flower + + tc filter add dev $port $bind protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 198.51.100.10 action sample rate 32 group 1 + check_err $? "Failed to configure sampling rule" + + psample_capture_start + + ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \ + -B 198.51.100.1 -t udp dp=52768,sp=42768 -q + + psample_capture_stop + + grep -q -e "group 1 " $CAPTURE_FILE + check_fail $? "Sampled packets when should not" + + log_test "tc sample (w/ flower) rate ($bind)" + + tc filter del dev $port $bind protocol ip pref 1 handle 101 flower +} + +tc_sample_acl_rate_test() +{ + __tc_sample_acl_rate_test ingress $rp1 + __tc_sample_acl_rate_test egress $rp2 +} + +tc_sample_acl_max_rate_test() +{ + RET=0 + + # Policy-based sampling is not supported on Spectrum-1. + [[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return + + tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \ + skip_sw action sample rate $((2 ** 24 - 1)) group 1 + check_err $? "Failed to configure sampling rule with max rate" + + tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower + + tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \ + skip_sw action sample rate $((2 ** 24)) \ + group 1 &> /dev/null + check_fail $? "Managed to configure sampling rate above maximum" + + log_test "tc sample (w/ flower) maximum rate" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh index 9f64d5c7107b..7ca1f030d209 100644 --- a/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh @@ -24,8 +24,11 @@ function check { local code=$1 local str=$2 local exp_str=$3 + local exp_fail=$4 - if [ $code -ne 0 ]; then + [ -z "$exp_fail" ] && cop="-ne" || cop="-eq" + + if [ $code $cop 0 ]; then ((num_errors++)) return fi diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh new file mode 100755 index 000000000000..0c56746e9ce0 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh @@ -0,0 +1,110 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only + +source ethtool-common.sh + +NSIM_NETDEV=$(make_netdev) +[ a$ETHTOOL == a ] && ETHTOOL=ethtool + +set -o pipefail + +# netdevsim starts out with None/None +s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2) +check $? "$s" "Configured FEC encodings: None +Active FEC encoding: None" + +# Test Auto +$ETHTOOL --set-fec $NSIM_NETDEV encoding auto +check $? +s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2) +check $? "$s" "Configured FEC encodings: Auto +Active FEC encoding: Off" + +# Test case in-sensitivity +for o in off Off OFF; do + $ETHTOOL --set-fec $NSIM_NETDEV encoding $o + check $? + s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2) + check $? "$s" "Configured FEC encodings: Off +Active FEC encoding: Off" +done + +for o in BaseR baser BAser; do + $ETHTOOL --set-fec $NSIM_NETDEV encoding $o + check $? + s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2) + check $? "$s" "Configured FEC encodings: BaseR +Active FEC encoding: BaseR" +done + +for o in llrs rs; do + $ETHTOOL --set-fec $NSIM_NETDEV encoding $o + check $? + s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2) + check $? "$s" "Configured FEC encodings: ${o^^} +Active FEC encoding: ${o^^}" +done + +# Test mutliple bits +$ETHTOOL --set-fec $NSIM_NETDEV encoding rs llrs +check $? +s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2) +check $? "$s" "Configured FEC encodings: RS LLRS +Active FEC encoding: LLRS" + +$ETHTOOL --set-fec $NSIM_NETDEV encoding rs off auto +check $? +s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2) +check $? "$s" "Configured FEC encodings: Auto Off RS +Active FEC encoding: RS" + +# Make sure other link modes are rejected +$ETHTOOL --set-fec $NSIM_NETDEV encoding FIBRE 2>/dev/null +check $? '' '' 1 + +$ETHTOOL --set-fec $NSIM_NETDEV encoding bla-bla-bla 2>/dev/null +check $? '' '' 1 + +# Try JSON +$ETHTOOL --json --show-fec $NSIM_NETDEV | jq empty >>/dev/null 2>&1 +if [ $? -eq 0 ]; then + $ETHTOOL --set-fec $NSIM_NETDEV encoding auto + check $? + + s=$($ETHTOOL --json --show-fec $NSIM_NETDEV | jq '.[].config[]') + check $? "$s" '"Auto"' + s=$($ETHTOOL --json --show-fec $NSIM_NETDEV | jq '.[].active[]') + check $? "$s" '"Off"' + + $ETHTOOL --set-fec $NSIM_NETDEV encoding auto RS + check $? + + s=$($ETHTOOL --json --show-fec $NSIM_NETDEV | jq '.[].config[]') + check $? "$s" '"Auto" +"RS"' + s=$($ETHTOOL --json --show-fec $NSIM_NETDEV | jq '.[].active[]') + check $? "$s" '"RS"' +fi + +# Test error injection +echo 11 > $NSIM_DEV_DFS/ethtool/get_err + +$ETHTOOL --show-fec $NSIM_NETDEV >>/dev/null 2>&1 +check $? '' '' 1 + +echo 0 > $NSIM_DEV_DFS/ethtool/get_err +echo 11 > $NSIM_DEV_DFS/ethtool/set_err + +$ETHTOOL --show-fec $NSIM_NETDEV >>/dev/null 2>&1 +check $? + +$ETHTOOL --set-fec $NSIM_NETDEV encoding RS 2>/dev/null +check $? '' '' 1 + +if [ $num_errors -eq 0 ]; then + echo "PASSED all $((num_passes)) checks" + exit 0 +else + echo "FAILED $num_errors/$((num_errors+num_passes)) checks" + exit 1 +fi diff --git a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh index be0c1b5ee6b8..ba75c81cda91 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh @@ -11,14 +11,33 @@ ALL_TESTS=" nexthop_single_add_err_test nexthop_group_add_test nexthop_group_add_err_test + nexthop_res_group_add_test + nexthop_res_group_add_err_test nexthop_group_replace_test nexthop_group_replace_err_test + nexthop_res_group_replace_test + nexthop_res_group_replace_err_test + nexthop_res_group_idle_timer_test + nexthop_res_group_idle_timer_del_test + nexthop_res_group_increase_idle_timer_test + nexthop_res_group_decrease_idle_timer_test + nexthop_res_group_unbalanced_timer_test + nexthop_res_group_unbalanced_timer_del_test + nexthop_res_group_no_unbalanced_timer_test + nexthop_res_group_short_unbalanced_timer_test + nexthop_res_group_increase_unbalanced_timer_test + nexthop_res_group_decrease_unbalanced_timer_test + nexthop_res_group_force_migrate_busy_test nexthop_single_replace_test nexthop_single_replace_err_test nexthop_single_in_group_replace_test nexthop_single_in_group_replace_err_test + nexthop_single_in_res_group_replace_test + nexthop_single_in_res_group_replace_err_test nexthop_single_in_group_delete_test nexthop_single_in_group_delete_err_test + nexthop_single_in_res_group_delete_test + nexthop_single_in_res_group_delete_err_test nexthop_replay_test nexthop_replay_err_test " @@ -27,6 +46,7 @@ DEV_ADDR=1337 DEV=netdevsim${DEV_ADDR} DEVLINK_DEV=netdevsim/${DEV} SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/ +DEBUGFS_NET_DIR=/sys/kernel/debug/netdevsim/$DEV/ NUM_NETIFS=0 source $lib_dir/lib.sh source $lib_dir/devlink_lib.sh @@ -44,6 +64,28 @@ nexthop_check() return 0 } +nexthop_bucket_nhid_count_check() +{ + local group_id=$1; shift + local expected + local count + local nhid + local ret + + while (($# > 0)); do + nhid=$1; shift + expected=$1; shift + + count=$($IP nexthop bucket show id $group_id nhid $nhid | + grep "trap" | wc -l) + if ((expected != count)); then + return 1 + fi + done + + return 0 +} + nexthop_resource_check() { local expected_occ=$1; shift @@ -159,6 +201,71 @@ nexthop_group_add_err_test() nexthop_resource_set 9999 } +nexthop_res_group_add_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + + $IP nexthop add id 10 group 1/2 type resilient buckets 4 + nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap" + check_err $? "Unexpected nexthop group entry" + + nexthop_bucket_nhid_count_check 10 1 2 + check_err $? "Wrong nexthop buckets count" + nexthop_bucket_nhid_count_check 10 2 2 + check_err $? "Wrong nexthop buckets count" + + nexthop_resource_check 6 + check_err $? "Wrong nexthop occupancy" + + $IP nexthop del id 10 + nexthop_resource_check 2 + check_err $? "Wrong nexthop occupancy after delete" + + $IP nexthop add id 10 group 1,3/2,2 type resilient buckets 5 + nexthop_check "id 10" "id 10 group 1,3/2,2 type resilient buckets 5 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap" + check_err $? "Unexpected weighted nexthop group entry" + + nexthop_bucket_nhid_count_check 10 1 3 + check_err $? "Wrong nexthop buckets count" + nexthop_bucket_nhid_count_check 10 2 2 + check_err $? "Wrong nexthop buckets count" + + nexthop_resource_check 7 + check_err $? "Wrong weighted nexthop occupancy" + + $IP nexthop del id 10 + nexthop_resource_check 2 + check_err $? "Wrong nexthop occupancy after delete" + + log_test "Resilient nexthop group add and delete" + + $IP nexthop flush &> /dev/null +} + +nexthop_res_group_add_err_test() +{ + RET=0 + + nexthop_resource_set 2 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + + $IP nexthop add id 10 group 1/2 type resilient buckets 4 &> /dev/null + check_fail $? "Nexthop group addition succeeded when should fail" + + nexthop_resource_check 2 + check_err $? "Wrong nexthop occupancy" + + log_test "Resilient nexthop group add failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + nexthop_group_replace_test() { RET=0 @@ -206,6 +313,411 @@ nexthop_group_replace_err_test() nexthop_resource_set 9999 } +nexthop_res_group_replace_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 3 via 192.0.2.4 dev dummy1 + $IP nexthop add id 10 group 1/2 type resilient buckets 6 + + $IP nexthop replace id 10 group 1/2/3 type resilient + nexthop_check "id 10" "id 10 group 1/2/3 type resilient buckets 6 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap" + check_err $? "Unexpected nexthop group entry" + + nexthop_bucket_nhid_count_check 10 1 2 + check_err $? "Wrong nexthop buckets count" + nexthop_bucket_nhid_count_check 10 2 2 + check_err $? "Wrong nexthop buckets count" + nexthop_bucket_nhid_count_check 10 3 2 + check_err $? "Wrong nexthop buckets count" + + nexthop_resource_check 9 + check_err $? "Wrong nexthop occupancy" + + log_test "Resilient nexthop group replace" + + $IP nexthop flush &> /dev/null +} + +nexthop_res_group_replace_err_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 3 via 192.0.2.4 dev dummy1 + $IP nexthop add id 10 group 1/2 type resilient buckets 6 + + ip netns exec testns1 \ + echo 1 > $DEBUGFS_NET_DIR/fib/fail_res_nexthop_group_replace + $IP nexthop replace id 10 group 1/2/3 type resilient &> /dev/null + check_fail $? "Nexthop group replacement succeeded when should fail" + + nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 6 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap" + check_err $? "Unexpected nexthop group entry after failure" + + nexthop_bucket_nhid_count_check 10 1 3 + check_err $? "Wrong nexthop buckets count" + nexthop_bucket_nhid_count_check 10 2 3 + check_err $? "Wrong nexthop buckets count" + + nexthop_resource_check 9 + check_err $? "Wrong nexthop occupancy after failure" + + log_test "Resilient nexthop group replace failure" + + $IP nexthop flush &> /dev/null + ip netns exec testns1 \ + echo 0 > $DEBUGFS_NET_DIR/fib/fail_res_nexthop_group_replace +} + +nexthop_res_mark_buckets_busy() +{ + local group_id=$1; shift + local nhid=$1; shift + local count=$1; shift + local index + + for index in $($IP -j nexthop bucket show id $group_id nhid $nhid | + jq '.[].bucket.index' | head -n ${count:--0}) + do + echo $group_id $index \ + > $DEBUGFS_NET_DIR/fib/nexthop_bucket_activity + done +} + +nexthop_res_num_nhid_buckets() +{ + local group_id=$1; shift + local nhid=$1; shift + + $IP -j nexthop bucket show id $group_id nhid $nhid | jq length +} + +nexthop_res_group_idle_timer_test() +{ + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + + RET=0 + + $IP nexthop add id 10 group 1/2 type resilient buckets 8 idle_timer 4 + nexthop_res_mark_buckets_busy 10 1 + $IP nexthop replace id 10 group 1/2,3 type resilient + + nexthop_bucket_nhid_count_check 10 1 4 2 4 + check_err $? "Group expected to be unbalanced" + + sleep 6 + + nexthop_bucket_nhid_count_check 10 1 2 2 6 + check_err $? "Group expected to be balanced" + + log_test "Bucket migration after idle timer" + + $IP nexthop flush &> /dev/null +} + +nexthop_res_group_idle_timer_del_test() +{ + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 3 via 192.0.2.3 dev dummy1 + + RET=0 + + $IP nexthop add id 10 group 1,50/2,50/3,1 \ + type resilient buckets 8 idle_timer 6 + nexthop_res_mark_buckets_busy 10 1 + $IP nexthop replace id 10 group 1,50/2,150/3,1 type resilient + + nexthop_bucket_nhid_count_check 10 1 4 2 4 3 0 + check_err $? "Group expected to be unbalanced" + + sleep 4 + + # Deletion prompts group replacement. Check that the bucket timers + # are kept. + $IP nexthop delete id 3 + + nexthop_bucket_nhid_count_check 10 1 4 2 4 + check_err $? "Group expected to still be unbalanced" + + sleep 4 + + nexthop_bucket_nhid_count_check 10 1 2 2 6 + check_err $? "Group expected to be balanced" + + log_test "Bucket migration after idle timer (with delete)" + + $IP nexthop flush &> /dev/null +} + +__nexthop_res_group_increase_timer_test() +{ + local timer=$1; shift + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + + RET=0 + + $IP nexthop add id 10 group 1/2 type resilient buckets 8 $timer 4 + nexthop_res_mark_buckets_busy 10 1 + $IP nexthop replace id 10 group 1/2,3 type resilient + + nexthop_bucket_nhid_count_check 10 2 6 + check_fail $? "Group expected to be unbalanced" + + sleep 2 + $IP nexthop replace id 10 group 1/2,3 type resilient $timer 8 + sleep 4 + + # 6 seconds, past the original timer. + nexthop_bucket_nhid_count_check 10 2 6 + check_fail $? "Group still expected to be unbalanced" + + sleep 4 + + # 10 seconds, past the new timer. + nexthop_bucket_nhid_count_check 10 2 6 + check_err $? "Group expected to be balanced" + + log_test "Bucket migration after $timer increase" + + $IP nexthop flush &> /dev/null +} + +__nexthop_res_group_decrease_timer_test() +{ + local timer=$1; shift + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + + RET=0 + + $IP nexthop add id 10 group 1/2 type resilient buckets 8 $timer 8 + nexthop_res_mark_buckets_busy 10 1 + $IP nexthop replace id 10 group 1/2,3 type resilient + + nexthop_bucket_nhid_count_check 10 2 6 + check_fail $? "Group expected to be unbalanced" + + sleep 2 + $IP nexthop replace id 10 group 1/2,3 type resilient $timer 4 + sleep 4 + + # 6 seconds, past the new timer, before the old timer. + nexthop_bucket_nhid_count_check 10 2 6 + check_err $? "Group expected to be balanced" + + log_test "Bucket migration after $timer decrease" + + $IP nexthop flush &> /dev/null +} + +__nexthop_res_group_increase_timer_del_test() +{ + local timer=$1; shift + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 3 via 192.0.2.3 dev dummy1 + + RET=0 + + $IP nexthop add id 10 group 1,100/2,100/3,1 \ + type resilient buckets 8 $timer 4 + nexthop_res_mark_buckets_busy 10 1 + $IP nexthop replace id 10 group 1,100/2,300/3,1 type resilient + + nexthop_bucket_nhid_count_check 10 2 6 + check_fail $? "Group expected to be unbalanced" + + sleep 2 + $IP nexthop replace id 10 group 1/2,3 type resilient $timer 8 + sleep 4 + + # 6 seconds, past the original timer. + nexthop_bucket_nhid_count_check 10 2 6 + check_fail $? "Group still expected to be unbalanced" + + sleep 4 + + # 10 seconds, past the new timer. + nexthop_bucket_nhid_count_check 10 2 6 + check_err $? "Group expected to be balanced" + + log_test "Bucket migration after $timer increase" + + $IP nexthop flush &> /dev/null +} + +nexthop_res_group_increase_idle_timer_test() +{ + __nexthop_res_group_increase_timer_test idle_timer +} + +nexthop_res_group_decrease_idle_timer_test() +{ + __nexthop_res_group_decrease_timer_test idle_timer +} + +nexthop_res_group_unbalanced_timer_test() +{ + local i + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + + RET=0 + + $IP nexthop add id 10 group 1/2 type resilient \ + buckets 8 idle_timer 6 unbalanced_timer 10 + nexthop_res_mark_buckets_busy 10 1 + $IP nexthop replace id 10 group 1/2,3 type resilient + + for i in 1 2; do + sleep 4 + nexthop_bucket_nhid_count_check 10 1 4 2 4 + check_err $? "$i: Group expected to be unbalanced" + nexthop_res_mark_buckets_busy 10 1 + done + + # 3 x sleep 4 > unbalanced timer 10 + sleep 4 + nexthop_bucket_nhid_count_check 10 1 2 2 6 + check_err $? "Group expected to be balanced" + + log_test "Bucket migration after unbalanced timer" + + $IP nexthop flush &> /dev/null +} + +nexthop_res_group_unbalanced_timer_del_test() +{ + local i + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 3 via 192.0.2.3 dev dummy1 + + RET=0 + + $IP nexthop add id 10 group 1,50/2,50/3,1 type resilient \ + buckets 8 idle_timer 6 unbalanced_timer 10 + nexthop_res_mark_buckets_busy 10 1 + $IP nexthop replace id 10 group 1,50/2,150/3,1 type resilient + + # Check that NH delete does not reset unbalanced time. + sleep 4 + $IP nexthop delete id 3 + nexthop_bucket_nhid_count_check 10 1 4 2 4 + check_err $? "1: Group expected to be unbalanced" + nexthop_res_mark_buckets_busy 10 1 + + sleep 4 + nexthop_bucket_nhid_count_check 10 1 4 2 4 + check_err $? "2: Group expected to be unbalanced" + nexthop_res_mark_buckets_busy 10 1 + + # 3 x sleep 4 > unbalanced timer 10 + sleep 4 + nexthop_bucket_nhid_count_check 10 1 2 2 6 + check_err $? "Group expected to be balanced" + + log_test "Bucket migration after unbalanced timer (with delete)" + + $IP nexthop flush &> /dev/null +} + +nexthop_res_group_no_unbalanced_timer_test() +{ + local i + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + + RET=0 + + $IP nexthop add id 10 group 1/2 type resilient buckets 8 + nexthop_res_mark_buckets_busy 10 1 + $IP nexthop replace id 10 group 1/2,3 type resilient + + for i in $(seq 3); do + sleep 60 + nexthop_bucket_nhid_count_check 10 2 6 + check_fail $? "$i: Group expected to be unbalanced" + nexthop_res_mark_buckets_busy 10 1 + done + + log_test "Buckets never force-migrated without unbalanced timer" + + $IP nexthop flush &> /dev/null +} + +nexthop_res_group_short_unbalanced_timer_test() +{ + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + + RET=0 + + $IP nexthop add id 10 group 1/2 type resilient \ + buckets 8 idle_timer 120 unbalanced_timer 4 + nexthop_res_mark_buckets_busy 10 1 + $IP nexthop replace id 10 group 1/2,3 type resilient + + nexthop_bucket_nhid_count_check 10 2 6 + check_fail $? "Group expected to be unbalanced" + + sleep 5 + + nexthop_bucket_nhid_count_check 10 2 6 + check_err $? "Group expected to be balanced" + + log_test "Bucket migration after unbalanced < idle timer" + + $IP nexthop flush &> /dev/null +} + +nexthop_res_group_increase_unbalanced_timer_test() +{ + __nexthop_res_group_increase_timer_test unbalanced_timer +} + +nexthop_res_group_decrease_unbalanced_timer_test() +{ + __nexthop_res_group_decrease_timer_test unbalanced_timer +} + +nexthop_res_group_force_migrate_busy_test() +{ + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + + RET=0 + + $IP nexthop add id 10 group 1/2 type resilient \ + buckets 8 idle_timer 120 + nexthop_res_mark_buckets_busy 10 1 + $IP nexthop replace id 10 group 1/2,3 type resilient + + nexthop_bucket_nhid_count_check 10 2 6 + check_fail $? "Group expected to be unbalanced" + + $IP nexthop replace id 10 group 2 type resilient + nexthop_bucket_nhid_count_check 10 2 8 + check_err $? "All buckets expected to have migrated" + + log_test "Busy buckets force-migrated when NH removed" + + $IP nexthop flush &> /dev/null +} + nexthop_single_replace_test() { RET=0 @@ -299,6 +811,63 @@ nexthop_single_in_group_replace_err_test() nexthop_resource_set 9999 } +nexthop_single_in_res_group_replace_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 type resilient buckets 4 + + $IP nexthop replace id 1 via 192.0.2.4 dev dummy1 + check_err $? "Failed to replace nexthop when should not" + + nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap" + check_err $? "Unexpected nexthop group entry" + + nexthop_bucket_nhid_count_check 10 1 2 2 2 + check_err $? "Wrong nexthop buckets count" + + nexthop_resource_check 6 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop replace while in resilient group" + + $IP nexthop flush &> /dev/null +} + +nexthop_single_in_res_group_replace_err_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 type resilient buckets 4 + + ip netns exec testns1 \ + echo 1 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace + $IP nexthop replace id 1 via 192.0.2.4 dev dummy1 &> /dev/null + check_fail $? "Nexthop replacement succeeded when should fail" + + nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry after failure" + + nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap" + check_err $? "Unexpected nexthop group entry after failure" + + nexthop_bucket_nhid_count_check 10 1 2 2 2 + check_err $? "Wrong nexthop buckets count" + + nexthop_resource_check 6 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop replace while in resilient group failure" + + $IP nexthop flush &> /dev/null + ip netns exec testns1 \ + echo 0 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace +} + nexthop_single_in_group_delete_test() { RET=0 @@ -346,6 +915,57 @@ nexthop_single_in_group_delete_err_test() nexthop_resource_set 9999 } +nexthop_single_in_res_group_delete_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 type resilient buckets 4 + + $IP nexthop del id 1 + nexthop_check "id 10" "id 10 group 2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap" + check_err $? "Unexpected nexthop group entry" + + nexthop_bucket_nhid_count_check 10 2 4 + check_err $? "Wrong nexthop buckets count" + + nexthop_resource_check 5 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop delete while in resilient group" + + $IP nexthop flush &> /dev/null +} + +nexthop_single_in_res_group_delete_err_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 3 via 192.0.2.4 dev dummy1 + $IP nexthop add id 10 group 1/2/3 type resilient buckets 6 + + ip netns exec testns1 \ + echo 1 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace + $IP nexthop del id 1 + + # We failed to replace the two nexthop buckets that were originally + # assigned to nhid 1. + nexthop_bucket_nhid_count_check 10 2 2 3 2 + check_err $? "Wrong nexthop buckets count" + + nexthop_resource_check 8 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop delete while in resilient group failure" + + $IP nexthop flush &> /dev/null + ip netns exec testns1 \ + echo 0 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace +} + nexthop_replay_test() { RET=0 diff --git a/tools/testing/selftests/drivers/net/netdevsim/psample.sh b/tools/testing/selftests/drivers/net/netdevsim/psample.sh new file mode 100755 index 000000000000..ee10b1a8933c --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/psample.sh @@ -0,0 +1,181 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test is for checking the psample module. It makes use of netdevsim +# which periodically generates "sampled" packets. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + psample_enable_test + psample_group_num_test + psample_md_test +" +NETDEVSIM_PATH=/sys/bus/netdevsim/ +DEV_ADDR=1337 +DEV=netdevsim${DEV_ADDR} +DEVLINK_DEV=netdevsim/${DEV} +SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/ +PSAMPLE_DIR=/sys/kernel/debug/netdevsim/$DEV/psample/ +CAPTURE_FILE=$(mktemp) +NUM_NETIFS=0 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +# Available at https://github.com/Mellanox/libpsample +require_command psample + +psample_capture() +{ + rm -f $CAPTURE_FILE + + timeout 2 ip netns exec testns1 psample &> $CAPTURE_FILE +} + +psample_enable_test() +{ + RET=0 + + echo 1 > $PSAMPLE_DIR/enable + check_err $? "Failed to enable sampling when should not" + + echo 1 > $PSAMPLE_DIR/enable 2>/dev/null + check_fail $? "Sampling enablement succeeded when should fail" + + psample_capture + if [ $(cat $CAPTURE_FILE | wc -l) -eq 0 ]; then + check_err 1 "Failed to capture sampled packets" + fi + + echo 0 > $PSAMPLE_DIR/enable + check_err $? "Failed to disable sampling when should not" + + echo 0 > $PSAMPLE_DIR/enable 2>/dev/null + check_fail $? "Sampling disablement succeeded when should fail" + + psample_capture + if [ $(cat $CAPTURE_FILE | wc -l) -ne 0 ]; then + check_err 1 "Captured sampled packets when should not" + fi + + log_test "psample enable / disable" +} + +psample_group_num_test() +{ + RET=0 + + echo 1234 > $PSAMPLE_DIR/group_num + echo 1 > $PSAMPLE_DIR/enable + + psample_capture + grep -q -e "group 1234" $CAPTURE_FILE + check_err $? "Sampled packets reported with wrong group number" + + # New group number should only be used after disable / enable. + echo 4321 > $PSAMPLE_DIR/group_num + + psample_capture + grep -q -e "group 4321" $CAPTURE_FILE + check_fail $? "Group number changed while sampling is active" + + echo 0 > $PSAMPLE_DIR/enable && echo 1 > $PSAMPLE_DIR/enable + + psample_capture + grep -q -e "group 4321" $CAPTURE_FILE + check_err $? "Group number did not change after restarting sampling" + + log_test "psample group number" + + echo 0 > $PSAMPLE_DIR/enable +} + +psample_md_test() +{ + RET=0 + + echo 1 > $PSAMPLE_DIR/enable + + echo 1234 > $PSAMPLE_DIR/in_ifindex + echo 4321 > $PSAMPLE_DIR/out_ifindex + psample_capture + + grep -q -e "in-ifindex 1234" $CAPTURE_FILE + check_err $? "Sampled packets reported with wrong in-ifindex" + + grep -q -e "out-ifindex 4321" $CAPTURE_FILE + check_err $? "Sampled packets reported with wrong out-ifindex" + + echo 5 > $PSAMPLE_DIR/out_tc + psample_capture + + grep -q -e "out-tc 5" $CAPTURE_FILE + check_err $? "Sampled packets reported with wrong out-tc" + + echo $((2**16 - 1)) > $PSAMPLE_DIR/out_tc + psample_capture + + grep -q -e "out-tc " $CAPTURE_FILE + check_fail $? "Sampled packets reported with out-tc when should not" + + echo 1 > $PSAMPLE_DIR/out_tc + echo 10000 > $PSAMPLE_DIR/out_tc_occ_max + psample_capture + + grep -q -e "out-tc-occ " $CAPTURE_FILE + check_err $? "Sampled packets not reported with out-tc-occ when should" + + echo 0 > $PSAMPLE_DIR/out_tc_occ_max + psample_capture + + grep -q -e "out-tc-occ " $CAPTURE_FILE + check_fail $? "Sampled packets reported with out-tc-occ when should not" + + echo 10000 > $PSAMPLE_DIR/latency_max + psample_capture + + grep -q -e "latency " $CAPTURE_FILE + check_err $? "Sampled packets not reported with latency when should" + + echo 0 > $PSAMPLE_DIR/latency_max + psample_capture + + grep -q -e "latency " $CAPTURE_FILE + check_fail $? "Sampled packets reported with latency when should not" + + log_test "psample metadata" + + echo 0 > $PSAMPLE_DIR/enable +} + +setup_prepare() +{ + modprobe netdevsim &> /dev/null + + echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device + while [ ! -d $SYSFS_NET_DIR ] ; do :; done + + set -e + + ip netns add testns1 + devlink dev reload $DEVLINK_DEV netns testns1 + + set +e +} + +cleanup() +{ + pre_cleanup + rm -f $CAPTURE_FILE + ip netns del testns1 + echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device + modprobe -r netdevsim &> /dev/null +} + +trap cleanup EXIT + +setup_prepare + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/firmware/fw_namespace.c b/tools/testing/selftests/firmware/fw_namespace.c index 5ebc1aec7923..0e393cb5f42d 100644 --- a/tools/testing/selftests/firmware/fw_namespace.c +++ b/tools/testing/selftests/firmware/fw_namespace.c @@ -95,7 +95,7 @@ static bool test_fw_in_ns(const char *fw_name, const char *sys_path, bool block_ } if (block_fw_in_parent_ns) umount("/lib/firmware"); - return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false; + return WEXITSTATUS(status) == EXIT_SUCCESS; } if (unshare(CLONE_NEWNS) != 0) { diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 32b87cc77c8e..bd83158e0e0b 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only /aarch64/get-reg-list /aarch64/get-reg-list-sve +/aarch64/vgic_init /s390x/memop /s390x/resets /s390x/sync_regs_test @@ -8,10 +9,13 @@ /x86_64/debug_regs /x86_64/evmcs_test /x86_64/get_cpuid_test +/x86_64/get_msr_index_features /x86_64/kvm_pv_test +/x86_64/hyperv_clock /x86_64/hyperv_cpuid /x86_64/mmio_warning_test /x86_64/platform_info_test +/x86_64/set_boot_cpu_id /x86_64/set_sregs_test /x86_64/smm_test /x86_64/state_test @@ -35,6 +39,7 @@ /dirty_log_perf_test /hardware_disable_test /kvm_create_max_vcpus +/kvm_page_table_test /memslot_modification_stress_test /set_memory_region_test /steal_time diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index a6d61f451f88..e439d027939d 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -include ../../../../scripts/Kbuild.include +include ../../../build/Build.include all: @@ -39,12 +39,15 @@ LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test +TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test TEST_GEN_PROGS_x86_64 += x86_64/get_cpuid_test +TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test +TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test TEST_GEN_PROGS_x86_64 += x86_64/smm_test TEST_GEN_PROGS_x86_64 += x86_64/state_test @@ -69,16 +72,19 @@ TEST_GEN_PROGS_x86_64 += dirty_log_test TEST_GEN_PROGS_x86_64 += dirty_log_perf_test TEST_GEN_PROGS_x86_64 += hardware_disable_test TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus +TEST_GEN_PROGS_x86_64 += kvm_page_table_test TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test TEST_GEN_PROGS_x86_64 += set_memory_region_test TEST_GEN_PROGS_x86_64 += steal_time TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list-sve +TEST_GEN_PROGS_aarch64 += aarch64/vgic_init TEST_GEN_PROGS_aarch64 += demand_paging_test TEST_GEN_PROGS_aarch64 += dirty_log_test TEST_GEN_PROGS_aarch64 += dirty_log_perf_test TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus +TEST_GEN_PROGS_aarch64 += kvm_page_table_test TEST_GEN_PROGS_aarch64 += set_memory_region_test TEST_GEN_PROGS_aarch64 += steal_time @@ -88,6 +94,7 @@ TEST_GEN_PROGS_s390x += s390x/sync_regs_test TEST_GEN_PROGS_s390x += demand_paging_test TEST_GEN_PROGS_s390x += dirty_log_test TEST_GEN_PROGS_s390x += kvm_create_max_vcpus +TEST_GEN_PROGS_s390x += kvm_page_table_test TEST_GEN_PROGS_s390x += set_memory_region_test TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M)) diff --git a/tools/testing/selftests/kvm/aarch64/vgic_init.c b/tools/testing/selftests/kvm/aarch64/vgic_init.c new file mode 100644 index 000000000000..623f31a14326 --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/vgic_init.c @@ -0,0 +1,551 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * vgic init sequence tests + * + * Copyright (C) 2020, Red Hat, Inc. + */ +#define _GNU_SOURCE +#include <linux/kernel.h> +#include <sys/syscall.h> +#include <asm/kvm.h> +#include <asm/kvm_para.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +#define NR_VCPUS 4 + +#define REDIST_REGION_ATTR_ADDR(count, base, flags, index) (((uint64_t)(count) << 52) | \ + ((uint64_t)((base) >> 16) << 16) | ((uint64_t)(flags) << 12) | index) +#define REG_OFFSET(vcpu, offset) (((uint64_t)vcpu << 32) | offset) + +#define GICR_TYPER 0x8 + +struct vm_gic { + struct kvm_vm *vm; + int gic_fd; +}; + +static int max_ipa_bits; + +/* helper to access a redistributor register */ +static int access_redist_reg(int gicv3_fd, int vcpu, int offset, + uint32_t *val, bool write) +{ + uint64_t attr = REG_OFFSET(vcpu, offset); + + return _kvm_device_access(gicv3_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS, + attr, val, write); +} + +/* dummy guest code */ +static void guest_code(void) +{ + GUEST_SYNC(0); + GUEST_SYNC(1); + GUEST_SYNC(2); + GUEST_DONE(); +} + +/* we don't want to assert on run execution, hence that helper */ +static int run_vcpu(struct kvm_vm *vm, uint32_t vcpuid) +{ + ucall_init(vm, NULL); + int ret = _vcpu_ioctl(vm, vcpuid, KVM_RUN, NULL); + if (ret) + return -errno; + return 0; +} + +static struct vm_gic vm_gic_create(void) +{ + struct vm_gic v; + + v.vm = vm_create_default_with_vcpus(NR_VCPUS, 0, 0, guest_code, NULL); + v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3, false); + + return v; +} + +static void vm_gic_destroy(struct vm_gic *v) +{ + close(v->gic_fd); + kvm_vm_free(v->vm); +} + +/** + * Helper routine that performs KVM device tests in general and + * especially ARM_VGIC_V3 ones. Eventually the ARM_VGIC_V3 + * device gets created, a legacy RDIST region is set at @0x0 + * and a DIST region is set @0x60000 + */ +static void subtest_dist_rdist(struct vm_gic *v) +{ + int ret; + uint64_t addr; + + /* Check existing group/attributes */ + kvm_device_check_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_DIST); + + kvm_device_check_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST); + + /* check non existing attribute */ + ret = _kvm_device_check_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, 0); + TEST_ASSERT(ret && errno == ENXIO, "attribute not supported"); + + /* misaligned DIST and REDIST address settings */ + addr = 0x1000; + ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_DIST, &addr, true); + TEST_ASSERT(ret && errno == EINVAL, "GICv3 dist base not 64kB aligned"); + + ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr, true); + TEST_ASSERT(ret && errno == EINVAL, "GICv3 redist base not 64kB aligned"); + + /* out of range address */ + if (max_ipa_bits) { + addr = 1ULL << max_ipa_bits; + ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_DIST, &addr, true); + TEST_ASSERT(ret && errno == E2BIG, "dist address beyond IPA limit"); + + ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr, true); + TEST_ASSERT(ret && errno == E2BIG, "redist address beyond IPA limit"); + } + + /* set REDIST base address @0x0*/ + addr = 0x00000; + kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr, true); + + /* Attempt to create a second legacy redistributor region */ + addr = 0xE0000; + ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr, true); + TEST_ASSERT(ret && errno == EEXIST, "GICv3 redist base set again"); + + /* Attempt to mix legacy and new redistributor regions */ + addr = REDIST_REGION_ATTR_ADDR(NR_VCPUS, 0x100000, 0, 0); + ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + TEST_ASSERT(ret && errno == EINVAL, "attempt to mix GICv3 REDIST and REDIST_REGION"); + + /* + * Set overlapping DIST / REDIST, cannot be detected here. Will be detected + * on first vcpu run instead. + */ + addr = 3 * 2 * 0x10000; + kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, KVM_VGIC_V3_ADDR_TYPE_DIST, + &addr, true); +} + +/* Test the new REDIST region API */ +static void subtest_redist_regions(struct vm_gic *v) +{ + uint64_t addr, expected_addr; + int ret; + + ret = kvm_device_check_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST); + TEST_ASSERT(!ret, "Multiple redist regions advertised"); + + addr = REDIST_REGION_ATTR_ADDR(NR_VCPUS, 0x100000, 2, 0); + ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + TEST_ASSERT(ret && errno == EINVAL, "redist region attr value with flags != 0"); + + addr = REDIST_REGION_ATTR_ADDR(0, 0x100000, 0, 0); + ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + TEST_ASSERT(ret && errno == EINVAL, "redist region attr value with count== 0"); + + addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 1); + ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + TEST_ASSERT(ret && errno == EINVAL, + "attempt to register the first rdist region with index != 0"); + + addr = REDIST_REGION_ATTR_ADDR(2, 0x201000, 0, 1); + ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + TEST_ASSERT(ret && errno == EINVAL, "rdist region with misaligned address"); + + addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 0); + kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + + addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 1); + ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + TEST_ASSERT(ret && errno == EINVAL, "register an rdist region with already used index"); + + addr = REDIST_REGION_ATTR_ADDR(1, 0x210000, 0, 2); + ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + TEST_ASSERT(ret && errno == EINVAL, + "register an rdist region overlapping with another one"); + + addr = REDIST_REGION_ATTR_ADDR(1, 0x240000, 0, 2); + ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + TEST_ASSERT(ret && errno == EINVAL, "register redist region with index not +1"); + + addr = REDIST_REGION_ATTR_ADDR(1, 0x240000, 0, 1); + kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + + addr = REDIST_REGION_ATTR_ADDR(1, 1ULL << max_ipa_bits, 0, 2); + ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + TEST_ASSERT(ret && errno == E2BIG, + "register redist region with base address beyond IPA range"); + + addr = 0x260000; + ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr, true); + TEST_ASSERT(ret && errno == EINVAL, + "Mix KVM_VGIC_V3_ADDR_TYPE_REDIST and REDIST_REGION"); + + /* + * Now there are 2 redist regions: + * region 0 @ 0x200000 2 redists + * region 1 @ 0x240000 1 redist + * Attempt to read their characteristics + */ + + addr = REDIST_REGION_ATTR_ADDR(0, 0, 0, 0); + expected_addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 0); + ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, false); + TEST_ASSERT(!ret && addr == expected_addr, "read characteristics of region #0"); + + addr = REDIST_REGION_ATTR_ADDR(0, 0, 0, 1); + expected_addr = REDIST_REGION_ATTR_ADDR(1, 0x240000, 0, 1); + ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, false); + TEST_ASSERT(!ret && addr == expected_addr, "read characteristics of region #1"); + + addr = REDIST_REGION_ATTR_ADDR(0, 0, 0, 2); + ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, false); + TEST_ASSERT(ret && errno == ENOENT, "read characteristics of non existing region"); + + addr = 0x260000; + kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_DIST, &addr, true); + + addr = REDIST_REGION_ATTR_ADDR(1, 0x260000, 0, 2); + ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + TEST_ASSERT(ret && errno == EINVAL, "register redist region colliding with dist"); +} + +/* + * VGIC KVM device is created and initialized before the secondary CPUs + * get created + */ +static void test_vgic_then_vcpus(void) +{ + struct vm_gic v; + int ret, i; + + v.vm = vm_create_default(0, 0, guest_code); + v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3, false); + + subtest_dist_rdist(&v); + + /* Add the rest of the VCPUs */ + for (i = 1; i < NR_VCPUS; ++i) + vm_vcpu_add_default(v.vm, i, guest_code); + + ret = run_vcpu(v.vm, 3); + TEST_ASSERT(ret == -EINVAL, "dist/rdist overlap detected on 1st vcpu run"); + + vm_gic_destroy(&v); +} + +/* All the VCPUs are created before the VGIC KVM device gets initialized */ +static void test_vcpus_then_vgic(void) +{ + struct vm_gic v; + int ret; + + v = vm_gic_create(); + + subtest_dist_rdist(&v); + + ret = run_vcpu(v.vm, 3); + TEST_ASSERT(ret == -EINVAL, "dist/rdist overlap detected on 1st vcpu run"); + + vm_gic_destroy(&v); +} + +static void test_new_redist_regions(void) +{ + void *dummy = NULL; + struct vm_gic v; + uint64_t addr; + int ret; + + v = vm_gic_create(); + subtest_redist_regions(&v); + kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true); + + ret = run_vcpu(v.vm, 3); + TEST_ASSERT(ret == -ENXIO, "running without sufficient number of rdists"); + vm_gic_destroy(&v); + + /* step2 */ + + v = vm_gic_create(); + subtest_redist_regions(&v); + + addr = REDIST_REGION_ATTR_ADDR(1, 0x280000, 0, 2); + kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + + ret = run_vcpu(v.vm, 3); + TEST_ASSERT(ret == -EBUSY, "running without vgic explicit init"); + + vm_gic_destroy(&v); + + /* step 3 */ + + v = vm_gic_create(); + subtest_redist_regions(&v); + + _kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, dummy, true); + TEST_ASSERT(ret && errno == EFAULT, + "register a third region allowing to cover the 4 vcpus"); + + addr = REDIST_REGION_ATTR_ADDR(1, 0x280000, 0, 2); + kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + + kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true); + + ret = run_vcpu(v.vm, 3); + TEST_ASSERT(!ret, "vcpu run"); + + vm_gic_destroy(&v); +} + +static void test_typer_accesses(void) +{ + struct vm_gic v; + uint64_t addr; + uint32_t val; + int ret, i; + + v.vm = vm_create_default(0, 0, guest_code); + + v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3, false); + + vm_vcpu_add_default(v.vm, 3, guest_code); + + ret = access_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false); + TEST_ASSERT(ret && errno == EINVAL, "attempting to read GICR_TYPER of non created vcpu"); + + vm_vcpu_add_default(v.vm, 1, guest_code); + + ret = access_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false); + TEST_ASSERT(ret && errno == EBUSY, "read GICR_TYPER before GIC initialized"); + + vm_vcpu_add_default(v.vm, 2, guest_code); + + kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true); + + for (i = 0; i < NR_VCPUS ; i++) { + ret = access_redist_reg(v.gic_fd, 0, GICR_TYPER, &val, false); + TEST_ASSERT(!ret && !val, "read GICR_TYPER before rdist region setting"); + } + + addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 0); + kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + + /* The 2 first rdists should be put there (vcpu 0 and 3) */ + ret = access_redist_reg(v.gic_fd, 0, GICR_TYPER, &val, false); + TEST_ASSERT(!ret && !val, "read typer of rdist #0"); + + ret = access_redist_reg(v.gic_fd, 3, GICR_TYPER, &val, false); + TEST_ASSERT(!ret && val == 0x310, "read typer of rdist #1"); + + addr = REDIST_REGION_ATTR_ADDR(10, 0x100000, 0, 1); + ret = _kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + TEST_ASSERT(ret && errno == EINVAL, "collision with previous rdist region"); + + ret = access_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false); + TEST_ASSERT(!ret && val == 0x100, + "no redist region attached to vcpu #1 yet, last cannot be returned"); + + ret = access_redist_reg(v.gic_fd, 2, GICR_TYPER, &val, false); + TEST_ASSERT(!ret && val == 0x200, + "no redist region attached to vcpu #2, last cannot be returned"); + + addr = REDIST_REGION_ATTR_ADDR(10, 0x20000, 0, 1); + kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + + ret = access_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false); + TEST_ASSERT(!ret && val == 0x100, "read typer of rdist #1"); + + ret = access_redist_reg(v.gic_fd, 2, GICR_TYPER, &val, false); + TEST_ASSERT(!ret && val == 0x210, + "read typer of rdist #1, last properly returned"); + + vm_gic_destroy(&v); +} + +/** + * Test GICR_TYPER last bit with new redist regions + * rdist regions #1 and #2 are contiguous + * rdist region #0 @0x100000 2 rdist capacity + * rdists: 0, 3 (Last) + * rdist region #1 @0x240000 2 rdist capacity + * rdists: 5, 4 (Last) + * rdist region #2 @0x200000 2 rdist capacity + * rdists: 1, 2 + */ +static void test_last_bit_redist_regions(void) +{ + uint32_t vcpuids[] = { 0, 3, 5, 4, 1, 2 }; + struct vm_gic v; + uint64_t addr; + uint32_t val; + int ret; + + v.vm = vm_create_default_with_vcpus(6, 0, 0, guest_code, vcpuids); + + v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3, false); + + kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true); + + addr = REDIST_REGION_ATTR_ADDR(2, 0x100000, 0, 0); + kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + + addr = REDIST_REGION_ATTR_ADDR(2, 0x240000, 0, 1); + kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + + addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 2); + kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + + ret = access_redist_reg(v.gic_fd, 0, GICR_TYPER, &val, false); + TEST_ASSERT(!ret && val == 0x000, "read typer of rdist #0"); + + ret = access_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false); + TEST_ASSERT(!ret && val == 0x100, "read typer of rdist #1"); + + ret = access_redist_reg(v.gic_fd, 2, GICR_TYPER, &val, false); + TEST_ASSERT(!ret && val == 0x200, "read typer of rdist #2"); + + ret = access_redist_reg(v.gic_fd, 3, GICR_TYPER, &val, false); + TEST_ASSERT(!ret && val == 0x310, "read typer of rdist #3"); + + ret = access_redist_reg(v.gic_fd, 5, GICR_TYPER, &val, false); + TEST_ASSERT(!ret && val == 0x500, "read typer of rdist #5"); + + ret = access_redist_reg(v.gic_fd, 4, GICR_TYPER, &val, false); + TEST_ASSERT(!ret && val == 0x410, "read typer of rdist #4"); + + vm_gic_destroy(&v); +} + +/* Test last bit with legacy region */ +static void test_last_bit_single_rdist(void) +{ + uint32_t vcpuids[] = { 0, 3, 5, 4, 1, 2 }; + struct vm_gic v; + uint64_t addr; + uint32_t val; + int ret; + + v.vm = vm_create_default_with_vcpus(6, 0, 0, guest_code, vcpuids); + + v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3, false); + + kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true); + + addr = 0x10000; + kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr, true); + + ret = access_redist_reg(v.gic_fd, 0, GICR_TYPER, &val, false); + TEST_ASSERT(!ret && val == 0x000, "read typer of rdist #0"); + + ret = access_redist_reg(v.gic_fd, 3, GICR_TYPER, &val, false); + TEST_ASSERT(!ret && val == 0x300, "read typer of rdist #1"); + + ret = access_redist_reg(v.gic_fd, 5, GICR_TYPER, &val, false); + TEST_ASSERT(!ret && val == 0x500, "read typer of rdist #2"); + + ret = access_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false); + TEST_ASSERT(!ret && val == 0x100, "read typer of rdist #3"); + + ret = access_redist_reg(v.gic_fd, 2, GICR_TYPER, &val, false); + TEST_ASSERT(!ret && val == 0x210, "read typer of rdist #3"); + + vm_gic_destroy(&v); +} + +void test_kvm_device(void) +{ + struct vm_gic v; + int ret, fd; + + v.vm = vm_create_default_with_vcpus(NR_VCPUS, 0, 0, guest_code, NULL); + + /* try to create a non existing KVM device */ + ret = _kvm_create_device(v.vm, 0, true, &fd); + TEST_ASSERT(ret && errno == ENODEV, "unsupported device"); + + /* trial mode with VGIC_V3 device */ + ret = _kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3, true, &fd); + if (ret) { + print_skip("GICv3 not supported"); + exit(KSFT_SKIP); + } + v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3, false); + + ret = _kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3, false, &fd); + TEST_ASSERT(ret && errno == EEXIST, "create GICv3 device twice"); + + kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3, true); + + if (!_kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V2, true, &fd)) { + ret = _kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V2, false, &fd); + TEST_ASSERT(ret && errno == EINVAL, "create GICv2 while v3 exists"); + } + + vm_gic_destroy(&v); +} + +int main(int ac, char **av) +{ + max_ipa_bits = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); + + test_kvm_device(); + test_vcpus_then_vgic(); + test_vgic_then_vcpus(); + test_new_redist_regions(); + test_typer_accesses(); + test_last_bit_redist_regions(); + test_last_bit_single_rdist(); + + return 0; +} diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index bb2752d78fe3..81edbd23d371 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -17,6 +17,7 @@ #include <linux/bitmap.h> #include <linux/bitops.h> #include <asm/barrier.h> +#include <linux/atomic.h> #include "kvm_util.h" #include "test_util.h" @@ -137,12 +138,20 @@ static uint64_t host_clear_count; static uint64_t host_track_next_count; /* Whether dirty ring reset is requested, or finished */ -static sem_t dirty_ring_vcpu_stop; -static sem_t dirty_ring_vcpu_cont; +static sem_t sem_vcpu_stop; +static sem_t sem_vcpu_cont; +/* + * This is only set by main thread, and only cleared by vcpu thread. It is + * used to request vcpu thread to stop at the next GUEST_SYNC, since GUEST_SYNC + * is the only place that we'll guarantee both "dirty bit" and "dirty data" + * will match. E.g., SIG_IPI won't guarantee that if the vcpu is interrupted + * after setting dirty bit but before the data is written. + */ +static atomic_t vcpu_sync_stop_requested; /* * This is updated by the vcpu thread to tell the host whether it's a * ring-full event. It should only be read until a sem_wait() of - * dirty_ring_vcpu_stop and before vcpu continues to run. + * sem_vcpu_stop and before vcpu continues to run. */ static bool dirty_ring_vcpu_ring_full; /* @@ -234,6 +243,17 @@ static void clear_log_collect_dirty_pages(struct kvm_vm *vm, int slot, kvm_vm_clear_dirty_log(vm, slot, bitmap, 0, num_pages); } +/* Should only be called after a GUEST_SYNC */ +static void vcpu_handle_sync_stop(void) +{ + if (atomic_read(&vcpu_sync_stop_requested)) { + /* It means main thread is sleeping waiting */ + atomic_set(&vcpu_sync_stop_requested, false); + sem_post(&sem_vcpu_stop); + sem_wait_until(&sem_vcpu_cont); + } +} + static void default_after_vcpu_run(struct kvm_vm *vm, int ret, int err) { struct kvm_run *run = vcpu_state(vm, VCPU_ID); @@ -244,6 +264,8 @@ static void default_after_vcpu_run(struct kvm_vm *vm, int ret, int err) TEST_ASSERT(get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC, "Invalid guest sync status: exit_reason=%s\n", exit_reason_str(run->exit_reason)); + + vcpu_handle_sync_stop(); } static bool dirty_ring_supported(void) @@ -301,13 +323,13 @@ static void dirty_ring_wait_vcpu(void) { /* This makes sure that hardware PML cache flushed */ vcpu_kick(); - sem_wait_until(&dirty_ring_vcpu_stop); + sem_wait_until(&sem_vcpu_stop); } static void dirty_ring_continue_vcpu(void) { pr_info("Notifying vcpu to continue\n"); - sem_post(&dirty_ring_vcpu_cont); + sem_post(&sem_vcpu_cont); } static void dirty_ring_collect_dirty_pages(struct kvm_vm *vm, int slot, @@ -361,11 +383,11 @@ static void dirty_ring_after_vcpu_run(struct kvm_vm *vm, int ret, int err) /* Update the flag first before pause */ WRITE_ONCE(dirty_ring_vcpu_ring_full, run->exit_reason == KVM_EXIT_DIRTY_RING_FULL); - sem_post(&dirty_ring_vcpu_stop); + sem_post(&sem_vcpu_stop); pr_info("vcpu stops because %s...\n", dirty_ring_vcpu_ring_full ? "dirty ring is full" : "vcpu is kicked out"); - sem_wait_until(&dirty_ring_vcpu_cont); + sem_wait_until(&sem_vcpu_cont); pr_info("vcpu continues now.\n"); } else { TEST_ASSERT(false, "Invalid guest sync status: " @@ -377,7 +399,7 @@ static void dirty_ring_after_vcpu_run(struct kvm_vm *vm, int ret, int err) static void dirty_ring_before_vcpu_join(void) { /* Kick another round of vcpu just to make sure it will quit */ - sem_post(&dirty_ring_vcpu_cont); + sem_post(&sem_vcpu_cont); } struct log_mode { @@ -505,9 +527,8 @@ static void *vcpu_worker(void *data) */ sigmask->len = 8; pthread_sigmask(0, NULL, sigset); + sigdelset(sigset, SIG_IPI); vcpu_ioctl(vm, VCPU_ID, KVM_SET_SIGNAL_MASK, sigmask); - sigaddset(sigset, SIG_IPI); - pthread_sigmask(SIG_BLOCK, sigset, NULL); sigemptyset(sigset); sigaddset(sigset, SIG_IPI); @@ -768,7 +789,25 @@ static void run_test(enum vm_guest_mode mode, void *arg) usleep(p->interval * 1000); log_mode_collect_dirty_pages(vm, TEST_MEM_SLOT_INDEX, bmap, host_num_pages); + + /* + * See vcpu_sync_stop_requested definition for details on why + * we need to stop vcpu when verify data. + */ + atomic_set(&vcpu_sync_stop_requested, true); + sem_wait_until(&sem_vcpu_stop); + /* + * NOTE: for dirty ring, it's possible that we didn't stop at + * GUEST_SYNC but instead we stopped because ring is full; + * that's okay too because ring full means we're only missing + * the flush of the last page, and since we handle the last + * page specially verification will succeed anyway. + */ + assert(host_log_mode == LOG_MODE_DIRTY_RING || + atomic_read(&vcpu_sync_stop_requested) == false); vm_dirty_log_verify(mode, bmap); + sem_post(&sem_vcpu_cont); + iteration++; sync_global_to_guest(vm, iteration); } @@ -818,9 +857,10 @@ int main(int argc, char *argv[]) .interval = TEST_HOST_LOOP_INTERVAL, }; int opt, i; + sigset_t sigset; - sem_init(&dirty_ring_vcpu_stop, 0, 0); - sem_init(&dirty_ring_vcpu_cont, 0, 0); + sem_init(&sem_vcpu_stop, 0, 0); + sem_init(&sem_vcpu_cont, 0, 0); guest_modes_append_default(); @@ -876,6 +916,11 @@ int main(int argc, char *argv[]) srandom(time(0)); + /* Ensure that vCPU threads start with SIG_IPI blocked. */ + sigemptyset(&sigset); + sigaddset(&sigset, SIG_IPI); + pthread_sigmask(SIG_BLOCK, &sigset, NULL); + if (host_log_mode_option == LOG_MODE_ALL) { /* Run each log mode */ for (i = 0; i < LOG_MODE_NUM; i++) { diff --git a/tools/testing/selftests/kvm/hardware_disable_test.c b/tools/testing/selftests/kvm/hardware_disable_test.c index 2f2eeb8a1d86..5aadf84c91c0 100644 --- a/tools/testing/selftests/kvm/hardware_disable_test.c +++ b/tools/testing/selftests/kvm/hardware_disable_test.c @@ -108,7 +108,7 @@ static void run_test(uint32_t run) kvm_vm_elf_load(vm, program_invocation_name, 0, 0); vm_create_irqchip(vm); - fprintf(stderr, "%s: [%d] start vcpus\n", __func__, run); + pr_debug("%s: [%d] start vcpus\n", __func__, run); for (i = 0; i < VCPU_NUM; ++i) { vm_vcpu_add_default(vm, i, guest_code); payloads[i].vm = vm; @@ -124,7 +124,7 @@ static void run_test(uint32_t run) check_set_affinity(throw_away, &cpu_set); } } - fprintf(stderr, "%s: [%d] all threads launched\n", __func__, run); + pr_debug("%s: [%d] all threads launched\n", __func__, run); sem_post(sem); for (i = 0; i < VCPU_NUM; ++i) check_join(threads[i], &b); @@ -147,16 +147,16 @@ int main(int argc, char **argv) if (pid == 0) run_test(i); /* This function always exits */ - fprintf(stderr, "%s: [%d] waiting semaphore\n", __func__, i); + pr_debug("%s: [%d] waiting semaphore\n", __func__, i); sem_wait(sem); r = (rand() % DELAY_US_MAX) + 1; - fprintf(stderr, "%s: [%d] waiting %dus\n", __func__, i, r); + pr_debug("%s: [%d] waiting %dus\n", __func__, i, r); usleep(r); r = waitpid(pid, &s, WNOHANG); TEST_ASSERT(r != pid, "%s: [%d] child exited unexpectedly status: [%d]", __func__, i, s); - fprintf(stderr, "%s: [%d] killing child\n", __func__, i); + pr_debug("%s: [%d] killing child\n", __func__, i); kill(pid, SIGKILL); } diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 2d7eb6989e83..a8f022794ce3 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -16,6 +16,7 @@ #include "sparsebit.h" +#define KVM_DEV_PATH "/dev/kvm" #define KVM_MAX_VCPUS 512 /* @@ -68,9 +69,6 @@ enum vm_guest_mode { #define MIN_PAGE_SIZE (1U << MIN_PAGE_SHIFT) #define PTES_PER_MIN_PAGE ptes_per_page(MIN_PAGE_SIZE) -#define vm_guest_mode_string(m) vm_guest_mode_string[m] -extern const char * const vm_guest_mode_string[]; - struct vm_guest_mode_params { unsigned int pa_bits; unsigned int va_bits; @@ -84,6 +82,7 @@ int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap); int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id, struct kvm_enable_cap *cap); void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size); +const char *vm_guest_mode_string(uint32_t i); struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); void kvm_vm_free(struct kvm_vm *vmp); @@ -133,6 +132,7 @@ void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl, int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl, void *arg); void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); +int _vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg); void kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); int _kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); @@ -223,6 +223,15 @@ int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid, #endif void *vcpu_map_dirty_ring(struct kvm_vm *vm, uint32_t vcpuid); +int _kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr); +int kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr); +int _kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test, int *fd); +int kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test); +int _kvm_device_access(int dev_fd, uint32_t group, uint64_t attr, + void *val, bool write); +int kvm_device_access(int dev_fd, uint32_t group, uint64_t attr, + void *val, bool write); + const char *exit_reason_str(unsigned int exit_reason); void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot); diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index b7f41399f22c..fade3130eb01 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -71,13 +71,32 @@ enum vm_mem_backing_src_type { VM_MEM_SRC_ANONYMOUS, VM_MEM_SRC_ANONYMOUS_THP, VM_MEM_SRC_ANONYMOUS_HUGETLB, + VM_MEM_SRC_ANONYMOUS_HUGETLB_16KB, + VM_MEM_SRC_ANONYMOUS_HUGETLB_64KB, + VM_MEM_SRC_ANONYMOUS_HUGETLB_512KB, + VM_MEM_SRC_ANONYMOUS_HUGETLB_1MB, + VM_MEM_SRC_ANONYMOUS_HUGETLB_2MB, + VM_MEM_SRC_ANONYMOUS_HUGETLB_8MB, + VM_MEM_SRC_ANONYMOUS_HUGETLB_16MB, + VM_MEM_SRC_ANONYMOUS_HUGETLB_32MB, + VM_MEM_SRC_ANONYMOUS_HUGETLB_256MB, + VM_MEM_SRC_ANONYMOUS_HUGETLB_512MB, + VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB, + VM_MEM_SRC_ANONYMOUS_HUGETLB_2GB, + VM_MEM_SRC_ANONYMOUS_HUGETLB_16GB, + NUM_SRC_TYPES, }; struct vm_mem_backing_src_alias { const char *name; - enum vm_mem_backing_src_type type; + uint32_t flag; }; +bool thp_configured(void); +size_t get_trans_hugepagesz(void); +size_t get_def_hugetlb_pagesz(void); +const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i); +size_t get_backing_src_pagesz(uint32_t i); void backing_src_help(void); enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name); diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c new file mode 100644 index 000000000000..1c4753fff19e --- /dev/null +++ b/tools/testing/selftests/kvm/kvm_page_table_test.c @@ -0,0 +1,506 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KVM page table test + * + * Copyright (C) 2021, Huawei, Inc. + * + * Make sure that THP has been enabled or enough HUGETLB pages with specific + * page size have been pre-allocated on your system, if you are planning to + * use hugepages to back the guest memory for testing. + */ + +#define _GNU_SOURCE /* for program_invocation_name */ + +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include <pthread.h> +#include <semaphore.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "guest_modes.h" + +#define TEST_MEM_SLOT_INDEX 1 + +/* Default size(1GB) of the memory for testing */ +#define DEFAULT_TEST_MEM_SIZE (1 << 30) + +/* Default guest test virtual memory offset */ +#define DEFAULT_GUEST_TEST_MEM 0xc0000000 + +/* Different guest memory accessing stages */ +enum test_stage { + KVM_BEFORE_MAPPINGS, + KVM_CREATE_MAPPINGS, + KVM_UPDATE_MAPPINGS, + KVM_ADJUST_MAPPINGS, + NUM_TEST_STAGES, +}; + +static const char * const test_stage_string[] = { + "KVM_BEFORE_MAPPINGS", + "KVM_CREATE_MAPPINGS", + "KVM_UPDATE_MAPPINGS", + "KVM_ADJUST_MAPPINGS", +}; + +struct vcpu_args { + int vcpu_id; + bool vcpu_write; +}; + +struct test_args { + struct kvm_vm *vm; + uint64_t guest_test_virt_mem; + uint64_t host_page_size; + uint64_t host_num_pages; + uint64_t large_page_size; + uint64_t large_num_pages; + uint64_t host_pages_per_lpage; + enum vm_mem_backing_src_type src_type; + struct vcpu_args vcpu_args[KVM_MAX_VCPUS]; +}; + +/* + * Guest variables. Use addr_gva2hva() if these variables need + * to be changed in host. + */ +static enum test_stage guest_test_stage; + +/* Host variables */ +static uint32_t nr_vcpus = 1; +static struct test_args test_args; +static enum test_stage *current_stage; +static bool host_quit; + +/* Whether the test stage is updated, or completed */ +static sem_t test_stage_updated; +static sem_t test_stage_completed; + +/* + * Guest physical memory offset of the testing memory slot. + * This will be set to the topmost valid physical address minus + * the test memory size. + */ +static uint64_t guest_test_phys_mem; + +/* + * Guest virtual memory offset of the testing memory slot. + * Must not conflict with identity mapped test code. + */ +static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; + +static void guest_code(int vcpu_id) +{ + struct test_args *p = &test_args; + struct vcpu_args *vcpu_args = &p->vcpu_args[vcpu_id]; + enum test_stage *current_stage = &guest_test_stage; + uint64_t addr; + int i, j; + + /* Make sure vCPU args data structure is not corrupt */ + GUEST_ASSERT(vcpu_args->vcpu_id == vcpu_id); + + while (true) { + addr = p->guest_test_virt_mem; + + switch (READ_ONCE(*current_stage)) { + /* + * All vCPU threads will be started in this stage, + * where guest code of each vCPU will do nothing. + */ + case KVM_BEFORE_MAPPINGS: + break; + + /* + * Before dirty logging, vCPUs concurrently access the first + * 8 bytes of each page (host page/large page) within the same + * memory region with different accessing types (read/write). + * Then KVM will create normal page mappings or huge block + * mappings for them. + */ + case KVM_CREATE_MAPPINGS: + for (i = 0; i < p->large_num_pages; i++) { + if (vcpu_args->vcpu_write) + *(uint64_t *)addr = 0x0123456789ABCDEF; + else + READ_ONCE(*(uint64_t *)addr); + + addr += p->large_page_size; + } + break; + + /* + * During dirty logging, KVM will only update attributes of the + * normal page mappings from RO to RW if memory backing src type + * is anonymous. In other cases, KVM will split the huge block + * mappings into normal page mappings if memory backing src type + * is THP or HUGETLB. + */ + case KVM_UPDATE_MAPPINGS: + if (p->src_type == VM_MEM_SRC_ANONYMOUS) { + for (i = 0; i < p->host_num_pages; i++) { + *(uint64_t *)addr = 0x0123456789ABCDEF; + addr += p->host_page_size; + } + break; + } + + for (i = 0; i < p->large_num_pages; i++) { + /* + * Write to the first host page in each large + * page region, and triger break of large pages. + */ + *(uint64_t *)addr = 0x0123456789ABCDEF; + + /* + * Access the middle host pages in each large + * page region. Since dirty logging is enabled, + * this will create new mappings at the smallest + * granularity. + */ + addr += p->large_page_size / 2; + for (j = 0; j < p->host_pages_per_lpage / 2; j++) { + READ_ONCE(*(uint64_t *)addr); + addr += p->host_page_size; + } + } + break; + + /* + * After dirty logging is stopped, vCPUs concurrently read + * from every single host page. Then KVM will coalesce the + * split page mappings back to block mappings. And a TLB + * conflict abort could occur here if TLB entries of the + * page mappings are not fully invalidated. + */ + case KVM_ADJUST_MAPPINGS: + for (i = 0; i < p->host_num_pages; i++) { + READ_ONCE(*(uint64_t *)addr); + addr += p->host_page_size; + } + break; + + default: + GUEST_ASSERT(0); + } + + GUEST_SYNC(1); + } +} + +static void *vcpu_worker(void *data) +{ + int ret; + struct vcpu_args *vcpu_args = data; + struct kvm_vm *vm = test_args.vm; + int vcpu_id = vcpu_args->vcpu_id; + struct kvm_run *run; + struct timespec start; + struct timespec ts_diff; + enum test_stage stage; + + vcpu_args_set(vm, vcpu_id, 1, vcpu_id); + run = vcpu_state(vm, vcpu_id); + + while (!READ_ONCE(host_quit)) { + ret = sem_wait(&test_stage_updated); + TEST_ASSERT(ret == 0, "Error in sem_wait"); + + if (READ_ONCE(host_quit)) + return NULL; + + clock_gettime(CLOCK_MONOTONIC_RAW, &start); + ret = _vcpu_run(vm, vcpu_id); + ts_diff = timespec_elapsed(start); + + TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); + TEST_ASSERT(get_ucall(vm, vcpu_id, NULL) == UCALL_SYNC, + "Invalid guest sync status: exit_reason=%s\n", + exit_reason_str(run->exit_reason)); + + pr_debug("Got sync event from vCPU %d\n", vcpu_id); + stage = READ_ONCE(*current_stage); + + /* + * Here we can know the execution time of every + * single vcpu running in different test stages. + */ + pr_debug("vCPU %d has completed stage %s\n" + "execution time is: %ld.%.9lds\n\n", + vcpu_id, test_stage_string[stage], + ts_diff.tv_sec, ts_diff.tv_nsec); + + ret = sem_post(&test_stage_completed); + TEST_ASSERT(ret == 0, "Error in sem_post"); + } + + return NULL; +} + +struct test_params { + uint64_t phys_offset; + uint64_t test_mem_size; + enum vm_mem_backing_src_type src_type; +}; + +static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg) +{ + int ret; + struct test_params *p = arg; + struct vcpu_args *vcpu_args; + enum vm_mem_backing_src_type src_type = p->src_type; + uint64_t large_page_size = get_backing_src_pagesz(src_type); + uint64_t guest_page_size = vm_guest_mode_params[mode].page_size; + uint64_t host_page_size = getpagesize(); + uint64_t test_mem_size = p->test_mem_size; + uint64_t guest_num_pages; + uint64_t alignment; + void *host_test_mem; + struct kvm_vm *vm; + int vcpu_id; + + /* Align up the test memory size */ + alignment = max(large_page_size, guest_page_size); + test_mem_size = (test_mem_size + alignment - 1) & ~(alignment - 1); + + /* Create a VM with enough guest pages */ + guest_num_pages = test_mem_size / guest_page_size; + vm = vm_create_with_vcpus(mode, nr_vcpus, + guest_num_pages, 0, guest_code, NULL); + + /* Align down GPA of the testing memslot */ + if (!p->phys_offset) + guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * + guest_page_size; + else + guest_test_phys_mem = p->phys_offset; +#ifdef __s390x__ + alignment = max(0x100000, alignment); +#endif + guest_test_phys_mem &= ~(alignment - 1); + + /* Set up the shared data structure test_args */ + test_args.vm = vm; + test_args.guest_test_virt_mem = guest_test_virt_mem; + test_args.host_page_size = host_page_size; + test_args.host_num_pages = test_mem_size / host_page_size; + test_args.large_page_size = large_page_size; + test_args.large_num_pages = test_mem_size / large_page_size; + test_args.host_pages_per_lpage = large_page_size / host_page_size; + test_args.src_type = src_type; + + for (vcpu_id = 0; vcpu_id < KVM_MAX_VCPUS; vcpu_id++) { + vcpu_args = &test_args.vcpu_args[vcpu_id]; + vcpu_args->vcpu_id = vcpu_id; + vcpu_args->vcpu_write = !(vcpu_id % 2); + } + + /* Add an extra memory slot with specified backing src type */ + vm_userspace_mem_region_add(vm, src_type, guest_test_phys_mem, + TEST_MEM_SLOT_INDEX, guest_num_pages, 0); + + /* Do mapping(GVA->GPA) for the testing memory slot */ + virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0); + + /* Cache the HVA pointer of the region */ + host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem); + + /* Export shared structure test_args to guest */ + ucall_init(vm, NULL); + sync_global_to_guest(vm, test_args); + + ret = sem_init(&test_stage_updated, 0, 0); + TEST_ASSERT(ret == 0, "Error in sem_init"); + + ret = sem_init(&test_stage_completed, 0, 0); + TEST_ASSERT(ret == 0, "Error in sem_init"); + + current_stage = addr_gva2hva(vm, (vm_vaddr_t)(&guest_test_stage)); + *current_stage = NUM_TEST_STAGES; + + pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); + pr_info("Testing memory backing src type: %s\n", + vm_mem_backing_src_alias(src_type)->name); + pr_info("Testing memory backing src granularity: 0x%lx\n", + large_page_size); + pr_info("Testing memory size(aligned): 0x%lx\n", test_mem_size); + pr_info("Guest physical test memory offset: 0x%lx\n", + guest_test_phys_mem); + pr_info("Host virtual test memory offset: 0x%lx\n", + (uint64_t)host_test_mem); + pr_info("Number of testing vCPUs: %d\n", nr_vcpus); + + return vm; +} + +static void vcpus_complete_new_stage(enum test_stage stage) +{ + int ret; + int vcpus; + + /* Wake up all the vcpus to run new test stage */ + for (vcpus = 0; vcpus < nr_vcpus; vcpus++) { + ret = sem_post(&test_stage_updated); + TEST_ASSERT(ret == 0, "Error in sem_post"); + } + pr_debug("All vcpus have been notified to continue\n"); + + /* Wait for all the vcpus to complete new test stage */ + for (vcpus = 0; vcpus < nr_vcpus; vcpus++) { + ret = sem_wait(&test_stage_completed); + TEST_ASSERT(ret == 0, "Error in sem_wait"); + + pr_debug("%d vcpus have completed stage %s\n", + vcpus + 1, test_stage_string[stage]); + } + + pr_debug("All vcpus have completed stage %s\n", + test_stage_string[stage]); +} + +static void run_test(enum vm_guest_mode mode, void *arg) +{ + int ret; + pthread_t *vcpu_threads; + struct kvm_vm *vm; + int vcpu_id; + struct timespec start; + struct timespec ts_diff; + + /* Create VM with vCPUs and make some pre-initialization */ + vm = pre_init_before_test(mode, arg); + + vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); + TEST_ASSERT(vcpu_threads, "Memory allocation failed"); + + host_quit = false; + *current_stage = KVM_BEFORE_MAPPINGS; + + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { + pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker, + &test_args.vcpu_args[vcpu_id]); + } + + vcpus_complete_new_stage(*current_stage); + pr_info("Started all vCPUs successfully\n"); + + /* Test the stage of KVM creating mappings */ + *current_stage = KVM_CREATE_MAPPINGS; + + clock_gettime(CLOCK_MONOTONIC_RAW, &start); + vcpus_complete_new_stage(*current_stage); + ts_diff = timespec_elapsed(start); + + pr_info("KVM_CREATE_MAPPINGS: total execution time: %ld.%.9lds\n\n", + ts_diff.tv_sec, ts_diff.tv_nsec); + + /* Test the stage of KVM updating mappings */ + vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX, + KVM_MEM_LOG_DIRTY_PAGES); + + *current_stage = KVM_UPDATE_MAPPINGS; + + clock_gettime(CLOCK_MONOTONIC_RAW, &start); + vcpus_complete_new_stage(*current_stage); + ts_diff = timespec_elapsed(start); + + pr_info("KVM_UPDATE_MAPPINGS: total execution time: %ld.%.9lds\n\n", + ts_diff.tv_sec, ts_diff.tv_nsec); + + /* Test the stage of KVM adjusting mappings */ + vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX, 0); + + *current_stage = KVM_ADJUST_MAPPINGS; + + clock_gettime(CLOCK_MONOTONIC_RAW, &start); + vcpus_complete_new_stage(*current_stage); + ts_diff = timespec_elapsed(start); + + pr_info("KVM_ADJUST_MAPPINGS: total execution time: %ld.%.9lds\n\n", + ts_diff.tv_sec, ts_diff.tv_nsec); + + /* Tell the vcpu thread to quit */ + host_quit = true; + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { + ret = sem_post(&test_stage_updated); + TEST_ASSERT(ret == 0, "Error in sem_post"); + } + + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) + pthread_join(vcpu_threads[vcpu_id], NULL); + + ret = sem_destroy(&test_stage_updated); + TEST_ASSERT(ret == 0, "Error in sem_destroy"); + + ret = sem_destroy(&test_stage_completed); + TEST_ASSERT(ret == 0, "Error in sem_destroy"); + + free(vcpu_threads); + ucall_uninit(vm); + kvm_vm_free(vm); +} + +static void help(char *name) +{ + puts(""); + printf("usage: %s [-h] [-p offset] [-m mode] " + "[-b mem-size] [-v vcpus] [-s mem-type]\n", name); + puts(""); + printf(" -p: specify guest physical test memory offset\n" + " Warning: a low offset can conflict with the loaded test code.\n"); + guest_modes_help(); + printf(" -b: specify size of the memory region for testing. e.g. 10M or 3G.\n" + " (default: 1G)\n"); + printf(" -v: specify the number of vCPUs to run\n" + " (default: 1)\n"); + printf(" -s: specify the type of memory that should be used to\n" + " back the guest data region.\n" + " (default: anonymous)\n\n"); + backing_src_help(); + puts(""); +} + +int main(int argc, char *argv[]) +{ + int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); + struct test_params p = { + .test_mem_size = DEFAULT_TEST_MEM_SIZE, + .src_type = VM_MEM_SRC_ANONYMOUS, + }; + int opt; + + guest_modes_append_default(); + + while ((opt = getopt(argc, argv, "hp:m:b:v:s:")) != -1) { + switch (opt) { + case 'p': + p.phys_offset = strtoull(optarg, NULL, 0); + break; + case 'm': + guest_modes_cmdline(optarg); + break; + case 'b': + p.test_mem_size = parse_size(optarg); + break; + case 'v': + nr_vcpus = atoi(optarg); + TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, + "Invalid number of vcpus, must be between 1 and %d", max_vcpus); + break; + case 's': + p.src_type = parse_backing_src_type(optarg); + break; + case 'h': + default: + help(argv[0]); + exit(0); + } + } + + for_each_guest_mode(run_test, &p); + + return 0; +} diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c index 5ebbd0d6b472..71ade6100fd3 100644 --- a/tools/testing/selftests/kvm/lib/assert.c +++ b/tools/testing/selftests/kvm/lib/assert.c @@ -71,9 +71,9 @@ test_assert(bool exp, const char *exp_str, fprintf(stderr, "==== Test Assertion Failure ====\n" " %s:%u: %s\n" - " pid=%d tid=%d - %s\n", + " pid=%d tid=%d errno=%d - %s\n", file, line, exp_str, getpid(), _gettid(), - strerror(errno)); + errno, strerror(errno)); test_dump_stack(); if (fmt) { fputs(" ", stderr); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index e5fbf16f725b..fc83f6c5902d 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -18,7 +18,6 @@ #include <unistd.h> #include <linux/kernel.h> -#define KVM_UTIL_PGS_PER_HUGEPG 512 #define KVM_UTIL_MIN_PFN 2 static int vcpu_mmap_sz(void); @@ -143,17 +142,24 @@ static void vm_open(struct kvm_vm *vm, int perm) "rc: %i errno: %i", vm->fd, errno); } -const char * const vm_guest_mode_string[] = { - "PA-bits:52, VA-bits:48, 4K pages", - "PA-bits:52, VA-bits:48, 64K pages", - "PA-bits:48, VA-bits:48, 4K pages", - "PA-bits:48, VA-bits:48, 64K pages", - "PA-bits:40, VA-bits:48, 4K pages", - "PA-bits:40, VA-bits:48, 64K pages", - "PA-bits:ANY, VA-bits:48, 4K pages", -}; -_Static_assert(sizeof(vm_guest_mode_string)/sizeof(char *) == NUM_VM_MODES, - "Missing new mode strings?"); +const char *vm_guest_mode_string(uint32_t i) +{ + static const char * const strings[] = { + [VM_MODE_P52V48_4K] = "PA-bits:52, VA-bits:48, 4K pages", + [VM_MODE_P52V48_64K] = "PA-bits:52, VA-bits:48, 64K pages", + [VM_MODE_P48V48_4K] = "PA-bits:48, VA-bits:48, 4K pages", + [VM_MODE_P48V48_64K] = "PA-bits:48, VA-bits:48, 64K pages", + [VM_MODE_P40V48_4K] = "PA-bits:40, VA-bits:48, 4K pages", + [VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages", + [VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages", + }; + _Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES, + "Missing new mode strings?"); + + TEST_ASSERT(i < NUM_VM_MODES, "Guest mode ID %d too big", i); + + return strings[i]; +} const struct vm_guest_mode_params vm_guest_mode_params[] = { { 52, 48, 0x1000, 12 }, @@ -514,7 +520,7 @@ static void vm_vcpu_rm(struct kvm_vm *vm, struct vcpu *vcpu) ret = munmap(vcpu->state, vcpu_mmap_sz()); TEST_ASSERT(ret == 0, "munmap of VCPU fd failed, rc: %i " "errno: %i", ret, errno); - close(vcpu->fd); + ret = close(vcpu->fd); TEST_ASSERT(ret == 0, "Close of VCPU fd failed, rc: %i " "errno: %i", ret, errno); @@ -534,7 +540,7 @@ void kvm_vm_release(struct kvm_vm *vmp) TEST_ASSERT(ret == 0, "Close of vm fd failed,\n" " vmp->fd: %i rc: %i errno: %i", vmp->fd, ret, errno); - close(vmp->kvm_fd); + ret = close(vmp->kvm_fd); TEST_ASSERT(ret == 0, "Close of /dev/kvm fd failed,\n" " vmp->kvm_fd: %i rc: %i errno: %i", vmp->kvm_fd, ret, errno); } @@ -681,7 +687,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, { int ret; struct userspace_mem_region *region; - size_t huge_page_size = KVM_UTIL_PGS_PER_HUGEPG * vm->page_size; + size_t backing_src_pagesz = get_backing_src_pagesz(src_type); size_t alignment; TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages, @@ -743,7 +749,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, #endif if (src_type == VM_MEM_SRC_ANONYMOUS_THP) - alignment = max(huge_page_size, alignment); + alignment = max(backing_src_pagesz, alignment); /* Add enough memory to align up if necessary */ if (alignment > 1) @@ -752,7 +758,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, region->mmap_start = mmap(NULL, region->mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS - | (src_type == VM_MEM_SRC_ANONYMOUS_HUGETLB ? MAP_HUGETLB : 0), + | vm_mem_backing_src_alias(src_type)->flag, -1, 0); TEST_ASSERT(region->mmap_start != MAP_FAILED, "test_malloc failed, mmap_start: %p errno: %i", @@ -762,22 +768,13 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, region->host_mem = align(region->mmap_start, alignment); /* As needed perform madvise */ - if (src_type == VM_MEM_SRC_ANONYMOUS || src_type == VM_MEM_SRC_ANONYMOUS_THP) { - struct stat statbuf; - - ret = stat("/sys/kernel/mm/transparent_hugepage", &statbuf); - TEST_ASSERT(ret == 0 || (ret == -1 && errno == ENOENT), - "stat /sys/kernel/mm/transparent_hugepage"); - - TEST_ASSERT(ret == 0 || src_type != VM_MEM_SRC_ANONYMOUS_THP, - "VM_MEM_SRC_ANONYMOUS_THP requires THP to be configured in the host kernel"); - - if (ret == 0) { - ret = madvise(region->host_mem, npages * vm->page_size, - src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE); - TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %x", - region->host_mem, npages * vm->page_size, src_type); - } + if ((src_type == VM_MEM_SRC_ANONYMOUS || + src_type == VM_MEM_SRC_ANONYMOUS_THP) && thp_configured()) { + ret = madvise(region->host_mem, npages * vm->page_size, + src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE); + TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %s", + region->host_mem, npages * vm->page_size, + vm_mem_backing_src_alias(src_type)->name); } region->unused_phy_pages = sparsebit_alloc(); @@ -1697,11 +1694,16 @@ void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg) { int ret; - ret = ioctl(vm->fd, cmd, arg); + ret = _vm_ioctl(vm, cmd, arg); TEST_ASSERT(ret == 0, "vm ioctl %lu failed, rc: %i errno: %i (%s)", cmd, ret, errno, strerror(errno)); } +int _vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg) +{ + return ioctl(vm->fd, cmd, arg); +} + /* * KVM system ioctl * @@ -1729,6 +1731,81 @@ int _kvm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg) } /* + * Device Ioctl + */ + +int _kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr) +{ + struct kvm_device_attr attribute = { + .group = group, + .attr = attr, + .flags = 0, + }; + + return ioctl(dev_fd, KVM_HAS_DEVICE_ATTR, &attribute); +} + +int kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr) +{ + int ret = _kvm_device_check_attr(dev_fd, group, attr); + + TEST_ASSERT(ret >= 0, "KVM_HAS_DEVICE_ATTR failed, rc: %i errno: %i", ret, errno); + return ret; +} + +int _kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test, int *fd) +{ + struct kvm_create_device create_dev; + int ret; + + create_dev.type = type; + create_dev.fd = -1; + create_dev.flags = test ? KVM_CREATE_DEVICE_TEST : 0; + ret = ioctl(vm_get_fd(vm), KVM_CREATE_DEVICE, &create_dev); + *fd = create_dev.fd; + return ret; +} + +int kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test) +{ + int fd, ret; + + ret = _kvm_create_device(vm, type, test, &fd); + + if (!test) { + TEST_ASSERT(ret >= 0, + "KVM_CREATE_DEVICE IOCTL failed, rc: %i errno: %i", ret, errno); + return fd; + } + return ret; +} + +int _kvm_device_access(int dev_fd, uint32_t group, uint64_t attr, + void *val, bool write) +{ + struct kvm_device_attr kvmattr = { + .group = group, + .attr = attr, + .flags = 0, + .addr = (uintptr_t)val, + }; + int ret; + + ret = ioctl(dev_fd, write ? KVM_SET_DEVICE_ATTR : KVM_GET_DEVICE_ATTR, + &kvmattr); + return ret; +} + +int kvm_device_access(int dev_fd, uint32_t group, uint64_t attr, + void *val, bool write) +{ + int ret = _kvm_device_access(dev_fd, group, attr, val, write); + + TEST_ASSERT(ret >= 0, "KVM_SET|GET_DEVICE_ATTR IOCTL failed, rc: %i errno: %i", ret, errno); + return ret; +} + +/* * VM Dump * * Input Args: diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h index 34465dc562d8..91ce1b5d480b 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h +++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h @@ -10,8 +10,6 @@ #include "sparsebit.h" -#define KVM_DEV_PATH "/dev/kvm" - struct userspace_mem_region { struct kvm_userspace_memory_region region; struct sparsebit *unused_phy_pages; diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index 906c955384e2..63d2bc7d757b 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -10,6 +10,8 @@ #include <limits.h> #include <stdlib.h> #include <time.h> +#include <sys/stat.h> +#include <linux/mman.h> #include "linux/kernel.h" #include "test_util.h" @@ -111,28 +113,169 @@ void print_skip(const char *fmt, ...) puts(", skipping test"); } -const struct vm_mem_backing_src_alias backing_src_aliases[] = { - {"anonymous", VM_MEM_SRC_ANONYMOUS,}, - {"anonymous_thp", VM_MEM_SRC_ANONYMOUS_THP,}, - {"anonymous_hugetlb", VM_MEM_SRC_ANONYMOUS_HUGETLB,}, -}; +bool thp_configured(void) +{ + int ret; + struct stat statbuf; + + ret = stat("/sys/kernel/mm/transparent_hugepage", &statbuf); + TEST_ASSERT(ret == 0 || (ret == -1 && errno == ENOENT), + "Error in stating /sys/kernel/mm/transparent_hugepage"); + + return ret == 0; +} + +size_t get_trans_hugepagesz(void) +{ + size_t size; + FILE *f; + + TEST_ASSERT(thp_configured(), "THP is not configured in host kernel"); + + f = fopen("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", "r"); + TEST_ASSERT(f != NULL, "Error in opening transparent_hugepage/hpage_pmd_size"); + + fscanf(f, "%ld", &size); + fclose(f); + + return size; +} + +size_t get_def_hugetlb_pagesz(void) +{ + char buf[64]; + const char *tag = "Hugepagesize:"; + FILE *f; + + f = fopen("/proc/meminfo", "r"); + TEST_ASSERT(f != NULL, "Error in opening /proc/meminfo"); + + while (fgets(buf, sizeof(buf), f) != NULL) { + if (strstr(buf, tag) == buf) { + fclose(f); + return strtoull(buf + strlen(tag), NULL, 10) << 10; + } + } + + if (feof(f)) + TEST_FAIL("HUGETLB is not configured in host kernel"); + else + TEST_FAIL("Error in reading /proc/meminfo"); + + fclose(f); + return 0; +} + +const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i) +{ + static const struct vm_mem_backing_src_alias aliases[] = { + [VM_MEM_SRC_ANONYMOUS] = { + .name = "anonymous", + .flag = 0, + }, + [VM_MEM_SRC_ANONYMOUS_THP] = { + .name = "anonymous_thp", + .flag = 0, + }, + [VM_MEM_SRC_ANONYMOUS_HUGETLB] = { + .name = "anonymous_hugetlb", + .flag = MAP_HUGETLB, + }, + [VM_MEM_SRC_ANONYMOUS_HUGETLB_16KB] = { + .name = "anonymous_hugetlb_16kb", + .flag = MAP_HUGETLB | MAP_HUGE_16KB, + }, + [VM_MEM_SRC_ANONYMOUS_HUGETLB_64KB] = { + .name = "anonymous_hugetlb_64kb", + .flag = MAP_HUGETLB | MAP_HUGE_64KB, + }, + [VM_MEM_SRC_ANONYMOUS_HUGETLB_512KB] = { + .name = "anonymous_hugetlb_512kb", + .flag = MAP_HUGETLB | MAP_HUGE_512KB, + }, + [VM_MEM_SRC_ANONYMOUS_HUGETLB_1MB] = { + .name = "anonymous_hugetlb_1mb", + .flag = MAP_HUGETLB | MAP_HUGE_1MB, + }, + [VM_MEM_SRC_ANONYMOUS_HUGETLB_2MB] = { + .name = "anonymous_hugetlb_2mb", + .flag = MAP_HUGETLB | MAP_HUGE_2MB, + }, + [VM_MEM_SRC_ANONYMOUS_HUGETLB_8MB] = { + .name = "anonymous_hugetlb_8mb", + .flag = MAP_HUGETLB | MAP_HUGE_8MB, + }, + [VM_MEM_SRC_ANONYMOUS_HUGETLB_16MB] = { + .name = "anonymous_hugetlb_16mb", + .flag = MAP_HUGETLB | MAP_HUGE_16MB, + }, + [VM_MEM_SRC_ANONYMOUS_HUGETLB_32MB] = { + .name = "anonymous_hugetlb_32mb", + .flag = MAP_HUGETLB | MAP_HUGE_32MB, + }, + [VM_MEM_SRC_ANONYMOUS_HUGETLB_256MB] = { + .name = "anonymous_hugetlb_256mb", + .flag = MAP_HUGETLB | MAP_HUGE_256MB, + }, + [VM_MEM_SRC_ANONYMOUS_HUGETLB_512MB] = { + .name = "anonymous_hugetlb_512mb", + .flag = MAP_HUGETLB | MAP_HUGE_512MB, + }, + [VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB] = { + .name = "anonymous_hugetlb_1gb", + .flag = MAP_HUGETLB | MAP_HUGE_1GB, + }, + [VM_MEM_SRC_ANONYMOUS_HUGETLB_2GB] = { + .name = "anonymous_hugetlb_2gb", + .flag = MAP_HUGETLB | MAP_HUGE_2GB, + }, + [VM_MEM_SRC_ANONYMOUS_HUGETLB_16GB] = { + .name = "anonymous_hugetlb_16gb", + .flag = MAP_HUGETLB | MAP_HUGE_16GB, + }, + }; + _Static_assert(ARRAY_SIZE(aliases) == NUM_SRC_TYPES, + "Missing new backing src types?"); + + TEST_ASSERT(i < NUM_SRC_TYPES, "Backing src type ID %d too big", i); + + return &aliases[i]; +} + +#define MAP_HUGE_PAGE_SIZE(x) (1ULL << ((x >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK)) + +size_t get_backing_src_pagesz(uint32_t i) +{ + uint32_t flag = vm_mem_backing_src_alias(i)->flag; + + switch (i) { + case VM_MEM_SRC_ANONYMOUS: + return getpagesize(); + case VM_MEM_SRC_ANONYMOUS_THP: + return get_trans_hugepagesz(); + case VM_MEM_SRC_ANONYMOUS_HUGETLB: + return get_def_hugetlb_pagesz(); + default: + return MAP_HUGE_PAGE_SIZE(flag); + } +} void backing_src_help(void) { int i; printf("Available backing src types:\n"); - for (i = 0; i < ARRAY_SIZE(backing_src_aliases); i++) - printf("\t%s\n", backing_src_aliases[i].name); + for (i = 0; i < NUM_SRC_TYPES; i++) + printf("\t%s\n", vm_mem_backing_src_alias(i)->name); } enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name) { int i; - for (i = 0; i < ARRAY_SIZE(backing_src_aliases); i++) - if (!strcmp(type_name, backing_src_aliases[i].name)) - return backing_src_aliases[i].type; + for (i = 0; i < NUM_SRC_TYPES; i++) + if (!strcmp(type_name, vm_mem_backing_src_alias(i)->name)) + return i; backing_src_help(); TEST_FAIL("Unknown backing src type: %s", type_name); diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index f127ed31dba7..978f5b5f4dc0 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -329,6 +329,22 @@ static void test_zero_memory_regions(void) } #endif /* __x86_64__ */ +static int test_memory_region_add(struct kvm_vm *vm, void *mem, uint32_t slot, + uint32_t size, uint64_t guest_addr) +{ + struct kvm_userspace_memory_region region; + int ret; + + region.slot = slot; + region.flags = 0; + region.guest_phys_addr = guest_addr; + region.memory_size = size; + region.userspace_addr = (uintptr_t) mem; + ret = ioctl(vm_get_fd(vm), KVM_SET_USER_MEMORY_REGION, ®ion); + + return ret; +} + /* * Test it can be added memory slots up to KVM_CAP_NR_MEMSLOTS, then any * tentative to add further slots should fail. @@ -339,9 +355,15 @@ static void test_add_max_memory_regions(void) struct kvm_vm *vm; uint32_t max_mem_slots; uint32_t slot; - uint64_t guest_addr = 0x0; - uint64_t mem_reg_npages; - void *mem; + void *mem, *mem_aligned, *mem_extra; + size_t alignment; + +#ifdef __s390x__ + /* On s390x, the host address must be aligned to 1M (due to PGSTEs) */ + alignment = 0x100000; +#else + alignment = 1; +#endif max_mem_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS); TEST_ASSERT(max_mem_slots > 0, @@ -350,30 +372,37 @@ static void test_add_max_memory_regions(void) vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR); - mem_reg_npages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, MEM_REGION_SIZE); - /* Check it can be added memory slots up to the maximum allowed */ pr_info("Adding slots 0..%i, each memory region with %dK size\n", (max_mem_slots - 1), MEM_REGION_SIZE >> 10); + + mem = mmap(NULL, MEM_REGION_SIZE * max_mem_slots + alignment, + PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + TEST_ASSERT(mem != MAP_FAILED, "Failed to mmap() host"); + mem_aligned = (void *)(((size_t) mem + alignment - 1) & ~(alignment - 1)); + for (slot = 0; slot < max_mem_slots; slot++) { - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, - guest_addr, slot, mem_reg_npages, - 0); - guest_addr += MEM_REGION_SIZE; + ret = test_memory_region_add(vm, mem_aligned + + ((uint64_t)slot * MEM_REGION_SIZE), + slot, MEM_REGION_SIZE, + (uint64_t)slot * MEM_REGION_SIZE); + TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" + " rc: %i errno: %i slot: %i\n", + ret, errno, slot); } /* Check it cannot be added memory slots beyond the limit */ - mem = mmap(NULL, MEM_REGION_SIZE, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - TEST_ASSERT(mem != MAP_FAILED, "Failed to mmap() host"); + mem_extra = mmap(NULL, MEM_REGION_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + TEST_ASSERT(mem_extra != MAP_FAILED, "Failed to mmap() host"); - ret = ioctl(vm_get_fd(vm), KVM_SET_USER_MEMORY_REGION, - &(struct kvm_userspace_memory_region) {slot, 0, guest_addr, - MEM_REGION_SIZE, (uint64_t) mem}); + ret = test_memory_region_add(vm, mem_extra, max_mem_slots, MEM_REGION_SIZE, + (uint64_t)max_mem_slots * MEM_REGION_SIZE); TEST_ASSERT(ret == -1 && errno == EINVAL, "Adding one more memory slot should fail with EINVAL"); - munmap(mem, MEM_REGION_SIZE); + munmap(mem, MEM_REGION_SIZE * max_mem_slots + alignment); + munmap(mem_extra, MEM_REGION_SIZE); kvm_vm_free(vm); } diff --git a/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c b/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c new file mode 100644 index 000000000000..cb953df4d7d0 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test that KVM_GET_MSR_INDEX_LIST and + * KVM_GET_MSR_FEATURE_INDEX_LIST work as intended + * + * Copyright (C) 2020, Red Hat, Inc. + */ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +static int kvm_num_index_msrs(int kvm_fd, int nmsrs) +{ + struct kvm_msr_list *list; + int r; + + list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0])); + list->nmsrs = nmsrs; + r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list); + TEST_ASSERT(r == -1 && errno == E2BIG, + "Unexpected result from KVM_GET_MSR_INDEX_LIST probe, r: %i", + r); + + r = list->nmsrs; + free(list); + return r; +} + +static void test_get_msr_index(void) +{ + int old_res, res, kvm_fd, r; + struct kvm_msr_list *list; + + kvm_fd = open(KVM_DEV_PATH, O_RDONLY); + if (kvm_fd < 0) + exit(KSFT_SKIP); + + old_res = kvm_num_index_msrs(kvm_fd, 0); + TEST_ASSERT(old_res != 0, "Expecting nmsrs to be > 0"); + + if (old_res != 1) { + res = kvm_num_index_msrs(kvm_fd, 1); + TEST_ASSERT(res > 1, "Expecting nmsrs to be > 1"); + TEST_ASSERT(res == old_res, "Expecting nmsrs to be identical"); + } + + list = malloc(sizeof(*list) + old_res * sizeof(list->indices[0])); + list->nmsrs = old_res; + r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list); + + TEST_ASSERT(r == 0, + "Unexpected result from KVM_GET_MSR_FEATURE_INDEX_LIST, r: %i", + r); + TEST_ASSERT(list->nmsrs == old_res, "Expecting nmsrs to be identical"); + free(list); + + close(kvm_fd); +} + +static int kvm_num_feature_msrs(int kvm_fd, int nmsrs) +{ + struct kvm_msr_list *list; + int r; + + list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0])); + list->nmsrs = nmsrs; + r = ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, list); + TEST_ASSERT(r == -1 && errno == E2BIG, + "Unexpected result from KVM_GET_MSR_FEATURE_INDEX_LIST probe, r: %i", + r); + + r = list->nmsrs; + free(list); + return r; +} + +struct kvm_msr_list *kvm_get_msr_feature_list(int kvm_fd, int nmsrs) +{ + struct kvm_msr_list *list; + int r; + + list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0])); + list->nmsrs = nmsrs; + r = ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, list); + + TEST_ASSERT(r == 0, + "Unexpected result from KVM_GET_MSR_FEATURE_INDEX_LIST, r: %i", + r); + + return list; +} + +static void test_get_msr_feature(void) +{ + int res, old_res, i, kvm_fd; + struct kvm_msr_list *feature_list; + + kvm_fd = open(KVM_DEV_PATH, O_RDONLY); + if (kvm_fd < 0) + exit(KSFT_SKIP); + + old_res = kvm_num_feature_msrs(kvm_fd, 0); + TEST_ASSERT(old_res != 0, "Expecting nmsrs to be > 0"); + + if (old_res != 1) { + res = kvm_num_feature_msrs(kvm_fd, 1); + TEST_ASSERT(res > 1, "Expecting nmsrs to be > 1"); + TEST_ASSERT(res == old_res, "Expecting nmsrs to be identical"); + } + + feature_list = kvm_get_msr_feature_list(kvm_fd, old_res); + TEST_ASSERT(old_res == feature_list->nmsrs, + "Unmatching number of msr indexes"); + + for (i = 0; i < feature_list->nmsrs; i++) + kvm_get_feature_msr(feature_list->indices[i]); + + free(feature_list); + close(kvm_fd); +} + +int main(int argc, char *argv[]) +{ + if (kvm_check_cap(KVM_CAP_GET_MSR_FEATURES)) + test_get_msr_feature(); + + test_get_msr_index(); +} diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c new file mode 100644 index 000000000000..7f1d2765572c --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c @@ -0,0 +1,269 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2021, Red Hat, Inc. + * + * Tests for Hyper-V clocksources + */ +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +struct ms_hyperv_tsc_page { + volatile u32 tsc_sequence; + u32 reserved1; + volatile u64 tsc_scale; + volatile s64 tsc_offset; +} __packed; + +#define HV_X64_MSR_GUEST_OS_ID 0x40000000 +#define HV_X64_MSR_TIME_REF_COUNT 0x40000020 +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 +#define HV_X64_MSR_TSC_FREQUENCY 0x40000022 +#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106 +#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107 + +/* Simplified mul_u64_u64_shr() */ +static inline u64 mul_u64_u64_shr64(u64 a, u64 b) +{ + union { + u64 ll; + struct { + u32 low, high; + } l; + } rm, rn, rh, a0, b0; + u64 c; + + a0.ll = a; + b0.ll = b; + + rm.ll = (u64)a0.l.low * b0.l.high; + rn.ll = (u64)a0.l.high * b0.l.low; + rh.ll = (u64)a0.l.high * b0.l.high; + + rh.l.low = c = rm.l.high + rn.l.high + rh.l.low; + rh.l.high = (c >> 32) + rh.l.high; + + return rh.ll; +} + +static inline void nop_loop(void) +{ + int i; + + for (i = 0; i < 1000000; i++) + asm volatile("nop"); +} + +static inline void check_tsc_msr_rdtsc(void) +{ + u64 tsc_freq, r1, r2, t1, t2; + s64 delta_ns; + + tsc_freq = rdmsr(HV_X64_MSR_TSC_FREQUENCY); + GUEST_ASSERT(tsc_freq > 0); + + /* First, check MSR-based clocksource */ + r1 = rdtsc(); + t1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT); + nop_loop(); + r2 = rdtsc(); + t2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT); + + GUEST_ASSERT(r2 > r1 && t2 > t1); + + /* HV_X64_MSR_TIME_REF_COUNT is in 100ns */ + delta_ns = ((t2 - t1) * 100) - ((r2 - r1) * 1000000000 / tsc_freq); + if (delta_ns < 0) + delta_ns = -delta_ns; + + /* 1% tolerance */ + GUEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100); +} + +static inline u64 get_tscpage_ts(struct ms_hyperv_tsc_page *tsc_page) +{ + return mul_u64_u64_shr64(rdtsc(), tsc_page->tsc_scale) + tsc_page->tsc_offset; +} + +static inline void check_tsc_msr_tsc_page(struct ms_hyperv_tsc_page *tsc_page) +{ + u64 r1, r2, t1, t2; + + /* Compare TSC page clocksource with HV_X64_MSR_TIME_REF_COUNT */ + t1 = get_tscpage_ts(tsc_page); + r1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT); + + /* 10 ms tolerance */ + GUEST_ASSERT(r1 >= t1 && r1 - t1 < 100000); + nop_loop(); + + t2 = get_tscpage_ts(tsc_page); + r2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT); + GUEST_ASSERT(r2 >= t1 && r2 - t2 < 100000); +} + +static void guest_main(struct ms_hyperv_tsc_page *tsc_page, vm_paddr_t tsc_page_gpa) +{ + u64 tsc_scale, tsc_offset; + + /* Set Guest OS id to enable Hyper-V emulation */ + GUEST_SYNC(1); + wrmsr(HV_X64_MSR_GUEST_OS_ID, (u64)0x8100 << 48); + GUEST_SYNC(2); + + check_tsc_msr_rdtsc(); + + GUEST_SYNC(3); + + /* Set up TSC page is disabled state, check that it's clean */ + wrmsr(HV_X64_MSR_REFERENCE_TSC, tsc_page_gpa); + GUEST_ASSERT(tsc_page->tsc_sequence == 0); + GUEST_ASSERT(tsc_page->tsc_scale == 0); + GUEST_ASSERT(tsc_page->tsc_offset == 0); + + GUEST_SYNC(4); + + /* Set up TSC page is enabled state */ + wrmsr(HV_X64_MSR_REFERENCE_TSC, tsc_page_gpa | 0x1); + GUEST_ASSERT(tsc_page->tsc_sequence != 0); + + GUEST_SYNC(5); + + check_tsc_msr_tsc_page(tsc_page); + + GUEST_SYNC(6); + + tsc_offset = tsc_page->tsc_offset; + /* Call KVM_SET_CLOCK from userspace, check that TSC page was updated */ + + GUEST_SYNC(7); + /* Sanity check TSC page timestamp, it should be close to 0 */ + GUEST_ASSERT(get_tscpage_ts(tsc_page) < 100000); + + GUEST_ASSERT(tsc_page->tsc_offset != tsc_offset); + + nop_loop(); + + /* + * Enable Re-enlightenment and check that TSC page stays constant across + * KVM_SET_CLOCK. + */ + wrmsr(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0x1 << 16 | 0xff); + wrmsr(HV_X64_MSR_TSC_EMULATION_CONTROL, 0x1); + tsc_offset = tsc_page->tsc_offset; + tsc_scale = tsc_page->tsc_scale; + GUEST_SYNC(8); + GUEST_ASSERT(tsc_page->tsc_offset == tsc_offset); + GUEST_ASSERT(tsc_page->tsc_scale == tsc_scale); + + GUEST_SYNC(9); + + check_tsc_msr_tsc_page(tsc_page); + + /* + * Disable re-enlightenment and TSC page, check that KVM doesn't update + * it anymore. + */ + wrmsr(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0); + wrmsr(HV_X64_MSR_TSC_EMULATION_CONTROL, 0); + wrmsr(HV_X64_MSR_REFERENCE_TSC, 0); + memset(tsc_page, 0, sizeof(*tsc_page)); + + GUEST_SYNC(10); + GUEST_ASSERT(tsc_page->tsc_sequence == 0); + GUEST_ASSERT(tsc_page->tsc_offset == 0); + GUEST_ASSERT(tsc_page->tsc_scale == 0); + + GUEST_DONE(); +} + +#define VCPU_ID 0 + +static void host_check_tsc_msr_rdtsc(struct kvm_vm *vm) +{ + u64 tsc_freq, r1, r2, t1, t2; + s64 delta_ns; + + tsc_freq = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TSC_FREQUENCY); + TEST_ASSERT(tsc_freq > 0, "TSC frequency must be nonzero"); + + /* First, check MSR-based clocksource */ + r1 = rdtsc(); + t1 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT); + nop_loop(); + r2 = rdtsc(); + t2 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT); + + TEST_ASSERT(t2 > t1, "Time reference MSR is not monotonic (%ld <= %ld)", t1, t2); + + /* HV_X64_MSR_TIME_REF_COUNT is in 100ns */ + delta_ns = ((t2 - t1) * 100) - ((r2 - r1) * 1000000000 / tsc_freq); + if (delta_ns < 0) + delta_ns = -delta_ns; + + /* 1% tolerance */ + TEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100, + "Elapsed time does not match (MSR=%ld, TSC=%ld)", + (t2 - t1) * 100, (r2 - r1) * 1000000000 / tsc_freq); +} + +int main(void) +{ + struct kvm_vm *vm; + struct kvm_run *run; + struct ucall uc; + vm_vaddr_t tsc_page_gva; + int stage; + + vm = vm_create_default(VCPU_ID, 0, guest_main); + run = vcpu_state(vm, VCPU_ID); + + vcpu_set_hv_cpuid(vm, VCPU_ID); + + tsc_page_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); + memset(addr_gpa2hva(vm, tsc_page_gva), 0x0, getpagesize()); + TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0, + "TSC page has to be page aligned\n"); + vcpu_args_set(vm, VCPU_ID, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva)); + + host_check_tsc_msr_rdtsc(vm); + + for (stage = 1;; stage++) { + _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Stage %d: unexpected exit reason: %u (%s),\n", + stage, run->exit_reason, + exit_reason_str(run->exit_reason)); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_ABORT: + TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], + __FILE__, uc.args[1]); + /* NOT REACHED */ + case UCALL_SYNC: + break; + case UCALL_DONE: + /* Keep in sync with guest_main() */ + TEST_ASSERT(stage == 11, "Testing ended prematurely, stage %d\n", + stage); + goto out; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + + TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && + uc.args[1] == stage, + "Stage %d: Unexpected register values vmexit, got %lx", + stage, (ulong)uc.args[1]); + + /* Reset kvmclock triggering TSC page update */ + if (stage == 7 || stage == 8 || stage == 10) { + struct kvm_clock_data clock = {0}; + + vm_ioctl(vm, KVM_SET_CLOCK, &clock); + } + } + +out: + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c new file mode 100644 index 000000000000..12c558fc8074 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c @@ -0,0 +1,166 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test that KVM_SET_BOOT_CPU_ID works as intended + * + * Copyright (C) 2020, Red Hat, Inc. + */ +#define _GNU_SOURCE /* for program_invocation_name */ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +#define N_VCPU 2 +#define VCPU_ID0 0 +#define VCPU_ID1 1 + +static uint32_t get_bsp_flag(void) +{ + return rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_BSP; +} + +static void guest_bsp_vcpu(void *arg) +{ + GUEST_SYNC(1); + + GUEST_ASSERT(get_bsp_flag() != 0); + + GUEST_DONE(); +} + +static void guest_not_bsp_vcpu(void *arg) +{ + GUEST_SYNC(1); + + GUEST_ASSERT(get_bsp_flag() == 0); + + GUEST_DONE(); +} + +static void test_set_boot_busy(struct kvm_vm *vm) +{ + int res; + + res = _vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID0); + TEST_ASSERT(res == -1 && errno == EBUSY, + "KVM_SET_BOOT_CPU_ID set while running vm"); +} + +static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid) +{ + struct ucall uc; + int stage; + + for (stage = 0; stage < 2; stage++) { + + vcpu_run(vm, vcpuid); + + switch (get_ucall(vm, vcpuid, &uc)) { + case UCALL_SYNC: + TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && + uc.args[1] == stage + 1, + "Stage %d: Unexpected register values vmexit, got %lx", + stage + 1, (ulong)uc.args[1]); + test_set_boot_busy(vm); + break; + case UCALL_DONE: + TEST_ASSERT(stage == 1, + "Expected GUEST_DONE in stage 2, got stage %d", + stage); + break; + case UCALL_ABORT: + TEST_ASSERT(false, "%s at %s:%ld\n\tvalues: %#lx, %#lx", + (const char *)uc.args[0], __FILE__, + uc.args[1], uc.args[2], uc.args[3]); + default: + TEST_ASSERT(false, "Unexpected exit: %s", + exit_reason_str(vcpu_state(vm, vcpuid)->exit_reason)); + } + } +} + +static struct kvm_vm *create_vm(void) +{ + struct kvm_vm *vm; + uint64_t vcpu_pages = (DEFAULT_STACK_PGS) * 2; + uint64_t extra_pg_pages = vcpu_pages / PTES_PER_MIN_PAGE * N_VCPU; + uint64_t pages = DEFAULT_GUEST_PHY_PAGES + vcpu_pages + extra_pg_pages; + + pages = vm_adjust_num_guest_pages(VM_MODE_DEFAULT, pages); + vm = vm_create(VM_MODE_DEFAULT, pages, O_RDWR); + + kvm_vm_elf_load(vm, program_invocation_name, 0, 0); + vm_create_irqchip(vm); + + return vm; +} + +static void add_x86_vcpu(struct kvm_vm *vm, uint32_t vcpuid, bool bsp_code) +{ + if (bsp_code) + vm_vcpu_add_default(vm, vcpuid, guest_bsp_vcpu); + else + vm_vcpu_add_default(vm, vcpuid, guest_not_bsp_vcpu); + + vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid()); +} + +static void run_vm_bsp(uint32_t bsp_vcpu) +{ + struct kvm_vm *vm; + bool is_bsp_vcpu1 = bsp_vcpu == VCPU_ID1; + + vm = create_vm(); + + if (is_bsp_vcpu1) + vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID1); + + add_x86_vcpu(vm, VCPU_ID0, !is_bsp_vcpu1); + add_x86_vcpu(vm, VCPU_ID1, is_bsp_vcpu1); + + run_vcpu(vm, VCPU_ID0); + run_vcpu(vm, VCPU_ID1); + + kvm_vm_free(vm); +} + +static void check_set_bsp_busy(void) +{ + struct kvm_vm *vm; + int res; + + vm = create_vm(); + + add_x86_vcpu(vm, VCPU_ID0, true); + add_x86_vcpu(vm, VCPU_ID1, false); + + res = _vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID1); + TEST_ASSERT(res == -1 && errno == EBUSY, "KVM_SET_BOOT_CPU_ID set after adding vcpu"); + + run_vcpu(vm, VCPU_ID0); + run_vcpu(vm, VCPU_ID1); + + res = _vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID1); + TEST_ASSERT(res == -1 && errno == EBUSY, "KVM_SET_BOOT_CPU_ID set to a terminated vcpu"); + + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + if (!kvm_check_cap(KVM_CAP_SET_BOOT_CPU_ID)) { + print_skip("set_boot_cpu_id not available"); + return 0; + } + + run_vm_bsp(VCPU_ID0); + run_vm_bsp(VCPU_ID1); + run_vm_bsp(VCPU_ID0); + + check_set_bsp_busy(); +} diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c index 804ff5ff022d..1f4a0599683c 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c @@ -186,7 +186,7 @@ int main(int argc, char *argv[]) vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &st); } - struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR);; + struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR); rs->state = 0x5a; for (;;) { diff --git a/tools/testing/selftests/landlock/.gitignore b/tools/testing/selftests/landlock/.gitignore new file mode 100644 index 000000000000..470203a7cd73 --- /dev/null +++ b/tools/testing/selftests/landlock/.gitignore @@ -0,0 +1,2 @@ +/*_test +/true diff --git a/tools/testing/selftests/landlock/Makefile b/tools/testing/selftests/landlock/Makefile new file mode 100644 index 000000000000..a99596ca9882 --- /dev/null +++ b/tools/testing/selftests/landlock/Makefile @@ -0,0 +1,24 @@ +# SPDX-License-Identifier: GPL-2.0 + +CFLAGS += -Wall -O2 + +src_test := $(wildcard *_test.c) + +TEST_GEN_PROGS := $(src_test:.c=) + +TEST_GEN_PROGS_EXTENDED := true + +KSFT_KHDR_INSTALL := 1 +OVERRIDE_TARGETS := 1 +include ../lib.mk + +khdr_dir = $(top_srcdir)/usr/include + +$(khdr_dir)/linux/landlock.h: khdr + @: + +$(OUTPUT)/true: true.c + $(LINK.c) $< $(LDLIBS) -o $@ -static + +$(OUTPUT)/%_test: %_test.c $(khdr_dir)/linux/landlock.h ../kselftest_harness.h common.h + $(LINK.c) $< $(LDLIBS) -o $@ -lcap -I$(khdr_dir) diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c new file mode 100644 index 000000000000..ca40abe9daa8 --- /dev/null +++ b/tools/testing/selftests/landlock/base_test.c @@ -0,0 +1,266 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Landlock tests - Common user space base + * + * Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net> + * Copyright © 2019-2020 ANSSI + */ + +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <linux/landlock.h> +#include <string.h> +#include <sys/prctl.h> +#include <sys/socket.h> +#include <sys/types.h> + +#include "common.h" + +#ifndef O_PATH +#define O_PATH 010000000 +#endif + +TEST(inconsistent_attr) { + const long page_size = sysconf(_SC_PAGESIZE); + char *const buf = malloc(page_size + 1); + struct landlock_ruleset_attr *const ruleset_attr = (void *)buf; + + ASSERT_NE(NULL, buf); + + /* Checks copy_from_user(). */ + ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, 0, 0)); + /* The size if less than sizeof(struct landlock_attr_enforce). */ + ASSERT_EQ(EINVAL, errno); + ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, 1, 0)); + ASSERT_EQ(EINVAL, errno); + + ASSERT_EQ(-1, landlock_create_ruleset(NULL, 1, 0)); + /* The size if less than sizeof(struct landlock_attr_enforce). */ + ASSERT_EQ(EFAULT, errno); + + ASSERT_EQ(-1, landlock_create_ruleset(NULL, + sizeof(struct landlock_ruleset_attr), 0)); + ASSERT_EQ(EFAULT, errno); + + ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, page_size + 1, 0)); + ASSERT_EQ(E2BIG, errno); + + ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, + sizeof(struct landlock_ruleset_attr), 0)); + ASSERT_EQ(ENOMSG, errno); + ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, page_size, 0)); + ASSERT_EQ(ENOMSG, errno); + + /* Checks non-zero value. */ + buf[page_size - 2] = '.'; + ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, page_size, 0)); + ASSERT_EQ(E2BIG, errno); + + ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, page_size + 1, 0)); + ASSERT_EQ(E2BIG, errno); + + free(buf); +} + +TEST(abi_version) { + const struct landlock_ruleset_attr ruleset_attr = { + .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE, + }; + ASSERT_EQ(1, landlock_create_ruleset(NULL, 0, + LANDLOCK_CREATE_RULESET_VERSION)); + + ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0, + LANDLOCK_CREATE_RULESET_VERSION)); + ASSERT_EQ(EINVAL, errno); + + ASSERT_EQ(-1, landlock_create_ruleset(NULL, sizeof(ruleset_attr), + LANDLOCK_CREATE_RULESET_VERSION)); + ASSERT_EQ(EINVAL, errno); + + ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, + sizeof(ruleset_attr), + LANDLOCK_CREATE_RULESET_VERSION)); + ASSERT_EQ(EINVAL, errno); + + ASSERT_EQ(-1, landlock_create_ruleset(NULL, 0, + LANDLOCK_CREATE_RULESET_VERSION | 1 << 31)); + ASSERT_EQ(EINVAL, errno); +} + +TEST(inval_create_ruleset_flags) { + const int last_flag = LANDLOCK_CREATE_RULESET_VERSION; + const int invalid_flag = last_flag << 1; + const struct landlock_ruleset_attr ruleset_attr = { + .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE, + }; + + ASSERT_EQ(-1, landlock_create_ruleset(NULL, 0, invalid_flag)); + ASSERT_EQ(EINVAL, errno); + + ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0, invalid_flag)); + ASSERT_EQ(EINVAL, errno); + + ASSERT_EQ(-1, landlock_create_ruleset(NULL, sizeof(ruleset_attr), + invalid_flag)); + ASSERT_EQ(EINVAL, errno); + + ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, + sizeof(ruleset_attr), invalid_flag)); + ASSERT_EQ(EINVAL, errno); +} + +TEST(empty_path_beneath_attr) { + const struct landlock_ruleset_attr ruleset_attr = { + .handled_access_fs = LANDLOCK_ACCESS_FS_EXECUTE, + }; + const int ruleset_fd = landlock_create_ruleset(&ruleset_attr, + sizeof(ruleset_attr), 0); + + ASSERT_LE(0, ruleset_fd); + + /* Similar to struct landlock_path_beneath_attr.parent_fd = 0 */ + ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, + NULL, 0)); + ASSERT_EQ(EFAULT, errno); + ASSERT_EQ(0, close(ruleset_fd)); +} + +TEST(inval_fd_enforce) { + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); + + ASSERT_EQ(-1, landlock_restrict_self(-1, 0)); + ASSERT_EQ(EBADF, errno); +} + +TEST(unpriv_enforce_without_no_new_privs) { + int err; + + drop_caps(_metadata); + err = landlock_restrict_self(-1, 0); + ASSERT_EQ(EPERM, errno); + ASSERT_EQ(err, -1); +} + +TEST(ruleset_fd_io) +{ + struct landlock_ruleset_attr ruleset_attr = { + .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE, + }; + int ruleset_fd; + char buf; + + drop_caps(_metadata); + ruleset_fd = landlock_create_ruleset(&ruleset_attr, + sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + + ASSERT_EQ(-1, write(ruleset_fd, ".", 1)); + ASSERT_EQ(EINVAL, errno); + ASSERT_EQ(-1, read(ruleset_fd, &buf, 1)); + ASSERT_EQ(EINVAL, errno); + + ASSERT_EQ(0, close(ruleset_fd)); +} + +/* Tests enforcement of a ruleset FD transferred through a UNIX socket. */ +TEST(ruleset_fd_transfer) +{ + struct landlock_ruleset_attr ruleset_attr = { + .handled_access_fs = LANDLOCK_ACCESS_FS_READ_DIR, + }; + struct landlock_path_beneath_attr path_beneath_attr = { + .allowed_access = LANDLOCK_ACCESS_FS_READ_DIR, + }; + int ruleset_fd_tx, dir_fd; + union { + /* Aligned ancillary data buffer. */ + char buf[CMSG_SPACE(sizeof(ruleset_fd_tx))]; + struct cmsghdr _align; + } cmsg_tx = {}; + char data_tx = '.'; + struct iovec io = { + .iov_base = &data_tx, + .iov_len = sizeof(data_tx), + }; + struct msghdr msg = { + .msg_iov = &io, + .msg_iovlen = 1, + .msg_control = &cmsg_tx.buf, + .msg_controllen = sizeof(cmsg_tx.buf), + }; + struct cmsghdr *cmsg; + int socket_fds[2]; + pid_t child; + int status; + + drop_caps(_metadata); + + /* Creates a test ruleset with a simple rule. */ + ruleset_fd_tx = landlock_create_ruleset(&ruleset_attr, + sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd_tx); + path_beneath_attr.parent_fd = open("/tmp", O_PATH | O_NOFOLLOW | + O_DIRECTORY | O_CLOEXEC); + ASSERT_LE(0, path_beneath_attr.parent_fd); + ASSERT_EQ(0, landlock_add_rule(ruleset_fd_tx, LANDLOCK_RULE_PATH_BENEATH, + &path_beneath_attr, 0)); + ASSERT_EQ(0, close(path_beneath_attr.parent_fd)); + + cmsg = CMSG_FIRSTHDR(&msg); + ASSERT_NE(NULL, cmsg); + cmsg->cmsg_len = CMSG_LEN(sizeof(ruleset_fd_tx)); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + memcpy(CMSG_DATA(cmsg), &ruleset_fd_tx, sizeof(ruleset_fd_tx)); + + /* Sends the ruleset FD over a socketpair and then close it. */ + ASSERT_EQ(0, socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, socket_fds)); + ASSERT_EQ(sizeof(data_tx), sendmsg(socket_fds[0], &msg, 0)); + ASSERT_EQ(0, close(socket_fds[0])); + ASSERT_EQ(0, close(ruleset_fd_tx)); + + child = fork(); + ASSERT_LE(0, child); + if (child == 0) { + int ruleset_fd_rx; + + *(char *)msg.msg_iov->iov_base = '\0'; + ASSERT_EQ(sizeof(data_tx), recvmsg(socket_fds[1], &msg, MSG_CMSG_CLOEXEC)); + ASSERT_EQ('.', *(char *)msg.msg_iov->iov_base); + ASSERT_EQ(0, close(socket_fds[1])); + cmsg = CMSG_FIRSTHDR(&msg); + ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(ruleset_fd_tx))); + memcpy(&ruleset_fd_rx, CMSG_DATA(cmsg), sizeof(ruleset_fd_tx)); + + /* Enforces the received ruleset on the child. */ + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); + ASSERT_EQ(0, landlock_restrict_self(ruleset_fd_rx, 0)); + ASSERT_EQ(0, close(ruleset_fd_rx)); + + /* Checks that the ruleset enforcement. */ + ASSERT_EQ(-1, open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC)); + ASSERT_EQ(EACCES, errno); + dir_fd = open("/tmp", O_RDONLY | O_DIRECTORY | O_CLOEXEC); + ASSERT_LE(0, dir_fd); + ASSERT_EQ(0, close(dir_fd)); + _exit(_metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE); + return; + } + + ASSERT_EQ(0, close(socket_fds[1])); + + /* Checks that the parent is unrestricted. */ + dir_fd = open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC); + ASSERT_LE(0, dir_fd); + ASSERT_EQ(0, close(dir_fd)); + dir_fd = open("/tmp", O_RDONLY | O_DIRECTORY | O_CLOEXEC); + ASSERT_LE(0, dir_fd); + ASSERT_EQ(0, close(dir_fd)); + + ASSERT_EQ(child, waitpid(child, &status, 0)); + ASSERT_EQ(1, WIFEXITED(status)); + ASSERT_EQ(EXIT_SUCCESS, WEXITSTATUS(status)); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h new file mode 100644 index 000000000000..20e2a9286d71 --- /dev/null +++ b/tools/testing/selftests/landlock/common.h @@ -0,0 +1,183 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Landlock test helpers + * + * Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net> + * Copyright © 2019-2020 ANSSI + * Copyright © 2021 Microsoft Corporation + */ + +#include <errno.h> +#include <linux/landlock.h> +#include <sys/capability.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "../kselftest_harness.h" + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif + +/* + * TEST_F_FORK() is useful when a test drop privileges but the corresponding + * FIXTURE_TEARDOWN() requires them (e.g. to remove files from a directory + * where write actions are denied). For convenience, FIXTURE_TEARDOWN() is + * also called when the test failed, but not when FIXTURE_SETUP() failed. For + * this to be possible, we must not call abort() but instead exit smoothly + * (hence the step print). + */ +#define TEST_F_FORK(fixture_name, test_name) \ + static void fixture_name##_##test_name##_child( \ + struct __test_metadata *_metadata, \ + FIXTURE_DATA(fixture_name) *self, \ + const FIXTURE_VARIANT(fixture_name) *variant); \ + TEST_F(fixture_name, test_name) \ + { \ + int status; \ + const pid_t child = fork(); \ + if (child < 0) \ + abort(); \ + if (child == 0) { \ + _metadata->no_print = 1; \ + fixture_name##_##test_name##_child(_metadata, self, variant); \ + if (_metadata->skip) \ + _exit(255); \ + if (_metadata->passed) \ + _exit(0); \ + _exit(_metadata->step); \ + } \ + if (child != waitpid(child, &status, 0)) \ + abort(); \ + if (WIFSIGNALED(status) || !WIFEXITED(status)) { \ + _metadata->passed = 0; \ + _metadata->step = 1; \ + return; \ + } \ + switch (WEXITSTATUS(status)) { \ + case 0: \ + _metadata->passed = 1; \ + break; \ + case 255: \ + _metadata->passed = 1; \ + _metadata->skip = 1; \ + break; \ + default: \ + _metadata->passed = 0; \ + _metadata->step = WEXITSTATUS(status); \ + break; \ + } \ + } \ + static void fixture_name##_##test_name##_child( \ + struct __test_metadata __attribute__((unused)) *_metadata, \ + FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \ + const FIXTURE_VARIANT(fixture_name) \ + __attribute__((unused)) *variant) + +#ifndef landlock_create_ruleset +static inline int landlock_create_ruleset( + const struct landlock_ruleset_attr *const attr, + const size_t size, const __u32 flags) +{ + return syscall(__NR_landlock_create_ruleset, attr, size, flags); +} +#endif + +#ifndef landlock_add_rule +static inline int landlock_add_rule(const int ruleset_fd, + const enum landlock_rule_type rule_type, + const void *const rule_attr, const __u32 flags) +{ + return syscall(__NR_landlock_add_rule, ruleset_fd, rule_type, + rule_attr, flags); +} +#endif + +#ifndef landlock_restrict_self +static inline int landlock_restrict_self(const int ruleset_fd, + const __u32 flags) +{ + return syscall(__NR_landlock_restrict_self, ruleset_fd, flags); +} +#endif + +static void _init_caps(struct __test_metadata *const _metadata, bool drop_all) +{ + cap_t cap_p; + /* Only these three capabilities are useful for the tests. */ + const cap_value_t caps[] = { + CAP_DAC_OVERRIDE, + CAP_MKNOD, + CAP_SYS_ADMIN, + CAP_SYS_CHROOT, + }; + + cap_p = cap_get_proc(); + EXPECT_NE(NULL, cap_p) { + TH_LOG("Failed to cap_get_proc: %s", strerror(errno)); + } + EXPECT_NE(-1, cap_clear(cap_p)) { + TH_LOG("Failed to cap_clear: %s", strerror(errno)); + } + if (!drop_all) { + EXPECT_NE(-1, cap_set_flag(cap_p, CAP_PERMITTED, + ARRAY_SIZE(caps), caps, CAP_SET)) { + TH_LOG("Failed to cap_set_flag: %s", strerror(errno)); + } + } + EXPECT_NE(-1, cap_set_proc(cap_p)) { + TH_LOG("Failed to cap_set_proc: %s", strerror(errno)); + } + EXPECT_NE(-1, cap_free(cap_p)) { + TH_LOG("Failed to cap_free: %s", strerror(errno)); + } +} + +/* We cannot put such helpers in a library because of kselftest_harness.h . */ +__attribute__((__unused__)) +static void disable_caps(struct __test_metadata *const _metadata) +{ + _init_caps(_metadata, false); +} + +__attribute__((__unused__)) +static void drop_caps(struct __test_metadata *const _metadata) +{ + _init_caps(_metadata, true); +} + +static void _effective_cap(struct __test_metadata *const _metadata, + const cap_value_t caps, const cap_flag_value_t value) +{ + cap_t cap_p; + + cap_p = cap_get_proc(); + EXPECT_NE(NULL, cap_p) { + TH_LOG("Failed to cap_get_proc: %s", strerror(errno)); + } + EXPECT_NE(-1, cap_set_flag(cap_p, CAP_EFFECTIVE, 1, &caps, value)) { + TH_LOG("Failed to cap_set_flag: %s", strerror(errno)); + } + EXPECT_NE(-1, cap_set_proc(cap_p)) { + TH_LOG("Failed to cap_set_proc: %s", strerror(errno)); + } + EXPECT_NE(-1, cap_free(cap_p)) { + TH_LOG("Failed to cap_free: %s", strerror(errno)); + } +} + +__attribute__((__unused__)) +static void set_cap(struct __test_metadata *const _metadata, + const cap_value_t caps) +{ + _effective_cap(_metadata, caps, CAP_SET); +} + +__attribute__((__unused__)) +static void clear_cap(struct __test_metadata *const _metadata, + const cap_value_t caps) +{ + _effective_cap(_metadata, caps, CAP_CLEAR); +} diff --git a/tools/testing/selftests/landlock/config b/tools/testing/selftests/landlock/config new file mode 100644 index 000000000000..0f0a65287bac --- /dev/null +++ b/tools/testing/selftests/landlock/config @@ -0,0 +1,7 @@ +CONFIG_OVERLAY_FS=y +CONFIG_SECURITY_LANDLOCK=y +CONFIG_SECURITY_PATH=y +CONFIG_SECURITY=y +CONFIG_SHMEM=y +CONFIG_TMPFS_XATTR=y +CONFIG_TMPFS=y diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c new file mode 100644 index 000000000000..10c9a1e4ebd9 --- /dev/null +++ b/tools/testing/selftests/landlock/fs_test.c @@ -0,0 +1,2791 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Landlock tests - Filesystem + * + * Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net> + * Copyright © 2020 ANSSI + * Copyright © 2020-2021 Microsoft Corporation + */ + +#define _GNU_SOURCE +#include <fcntl.h> +#include <linux/landlock.h> +#include <sched.h> +#include <string.h> +#include <sys/capability.h> +#include <sys/mount.h> +#include <sys/prctl.h> +#include <sys/sendfile.h> +#include <sys/stat.h> +#include <sys/sysmacros.h> +#include <unistd.h> + +#include "common.h" + +#define TMP_DIR "tmp" +#define BINARY_PATH "./true" + +/* Paths (sibling number and depth) */ +static const char dir_s1d1[] = TMP_DIR "/s1d1"; +static const char file1_s1d1[] = TMP_DIR "/s1d1/f1"; +static const char file2_s1d1[] = TMP_DIR "/s1d1/f2"; +static const char dir_s1d2[] = TMP_DIR "/s1d1/s1d2"; +static const char file1_s1d2[] = TMP_DIR "/s1d1/s1d2/f1"; +static const char file2_s1d2[] = TMP_DIR "/s1d1/s1d2/f2"; +static const char dir_s1d3[] = TMP_DIR "/s1d1/s1d2/s1d3"; +static const char file1_s1d3[] = TMP_DIR "/s1d1/s1d2/s1d3/f1"; +static const char file2_s1d3[] = TMP_DIR "/s1d1/s1d2/s1d3/f2"; + +static const char dir_s2d1[] = TMP_DIR "/s2d1"; +static const char file1_s2d1[] = TMP_DIR "/s2d1/f1"; +static const char dir_s2d2[] = TMP_DIR "/s2d1/s2d2"; +static const char file1_s2d2[] = TMP_DIR "/s2d1/s2d2/f1"; +static const char dir_s2d3[] = TMP_DIR "/s2d1/s2d2/s2d3"; +static const char file1_s2d3[] = TMP_DIR "/s2d1/s2d2/s2d3/f1"; +static const char file2_s2d3[] = TMP_DIR "/s2d1/s2d2/s2d3/f2"; + +static const char dir_s3d1[] = TMP_DIR "/s3d1"; +/* dir_s3d2 is a mount point. */ +static const char dir_s3d2[] = TMP_DIR "/s3d1/s3d2"; +static const char dir_s3d3[] = TMP_DIR "/s3d1/s3d2/s3d3"; + +/* + * layout1 hierarchy: + * + * tmp + * ├── s1d1 + * │  ├── f1 + * │  ├── f2 + * │  └── s1d2 + * │  ├── f1 + * │  ├── f2 + * │  └── s1d3 + * │  ├── f1 + * │  └── f2 + * ├── s2d1 + * │  ├── f1 + * │  └── s2d2 + * │  ├── f1 + * │  └── s2d3 + * │  ├── f1 + * │  └── f2 + * └── s3d1 + * └── s3d2 + * └── s3d3 + */ + +static void mkdir_parents(struct __test_metadata *const _metadata, + const char *const path) +{ + char *walker; + const char *parent; + int i, err; + + ASSERT_NE(path[0], '\0'); + walker = strdup(path); + ASSERT_NE(NULL, walker); + parent = walker; + for (i = 1; walker[i]; i++) { + if (walker[i] != '/') + continue; + walker[i] = '\0'; + err = mkdir(parent, 0700); + ASSERT_FALSE(err && errno != EEXIST) { + TH_LOG("Failed to create directory \"%s\": %s", + parent, strerror(errno)); + } + walker[i] = '/'; + } + free(walker); +} + +static void create_directory(struct __test_metadata *const _metadata, + const char *const path) +{ + mkdir_parents(_metadata, path); + ASSERT_EQ(0, mkdir(path, 0700)) { + TH_LOG("Failed to create directory \"%s\": %s", path, + strerror(errno)); + } +} + +static void create_file(struct __test_metadata *const _metadata, + const char *const path) +{ + mkdir_parents(_metadata, path); + ASSERT_EQ(0, mknod(path, S_IFREG | 0700, 0)) { + TH_LOG("Failed to create file \"%s\": %s", path, + strerror(errno)); + } +} + +static int remove_path(const char *const path) +{ + char *walker; + int i, ret, err = 0; + + walker = strdup(path); + if (!walker) { + err = ENOMEM; + goto out; + } + if (unlink(path) && rmdir(path)) { + if (errno != ENOENT) + err = errno; + goto out; + } + for (i = strlen(walker); i > 0; i--) { + if (walker[i] != '/') + continue; + walker[i] = '\0'; + ret = rmdir(walker); + if (ret) { + if (errno != ENOTEMPTY && errno != EBUSY) + err = errno; + goto out; + } + if (strcmp(walker, TMP_DIR) == 0) + goto out; + } + +out: + free(walker); + return err; +} + +static void prepare_layout(struct __test_metadata *const _metadata) +{ + disable_caps(_metadata); + umask(0077); + create_directory(_metadata, TMP_DIR); + + /* + * Do not pollute the rest of the system: creates a private mount point + * for tests relying on pivot_root(2) and move_mount(2). + */ + set_cap(_metadata, CAP_SYS_ADMIN); + ASSERT_EQ(0, unshare(CLONE_NEWNS)); + ASSERT_EQ(0, mount("tmp", TMP_DIR, "tmpfs", 0, "size=4m,mode=700")); + ASSERT_EQ(0, mount(NULL, TMP_DIR, NULL, MS_PRIVATE | MS_REC, NULL)); + clear_cap(_metadata, CAP_SYS_ADMIN); +} + +static void cleanup_layout(struct __test_metadata *const _metadata) +{ + set_cap(_metadata, CAP_SYS_ADMIN); + EXPECT_EQ(0, umount(TMP_DIR)); + clear_cap(_metadata, CAP_SYS_ADMIN); + EXPECT_EQ(0, remove_path(TMP_DIR)); +} + +static void create_layout1(struct __test_metadata *const _metadata) +{ + create_file(_metadata, file1_s1d1); + create_file(_metadata, file1_s1d2); + create_file(_metadata, file1_s1d3); + create_file(_metadata, file2_s1d1); + create_file(_metadata, file2_s1d2); + create_file(_metadata, file2_s1d3); + + create_file(_metadata, file1_s2d1); + create_file(_metadata, file1_s2d2); + create_file(_metadata, file1_s2d3); + create_file(_metadata, file2_s2d3); + + create_directory(_metadata, dir_s3d2); + set_cap(_metadata, CAP_SYS_ADMIN); + ASSERT_EQ(0, mount("tmp", dir_s3d2, "tmpfs", 0, "size=4m,mode=700")); + clear_cap(_metadata, CAP_SYS_ADMIN); + + ASSERT_EQ(0, mkdir(dir_s3d3, 0700)); +} + +static void remove_layout1(struct __test_metadata *const _metadata) +{ + EXPECT_EQ(0, remove_path(file2_s1d3)); + EXPECT_EQ(0, remove_path(file2_s1d2)); + EXPECT_EQ(0, remove_path(file2_s1d1)); + EXPECT_EQ(0, remove_path(file1_s1d3)); + EXPECT_EQ(0, remove_path(file1_s1d2)); + EXPECT_EQ(0, remove_path(file1_s1d1)); + + EXPECT_EQ(0, remove_path(file2_s2d3)); + EXPECT_EQ(0, remove_path(file1_s2d3)); + EXPECT_EQ(0, remove_path(file1_s2d2)); + EXPECT_EQ(0, remove_path(file1_s2d1)); + + EXPECT_EQ(0, remove_path(dir_s3d3)); + set_cap(_metadata, CAP_SYS_ADMIN); + umount(dir_s3d2); + clear_cap(_metadata, CAP_SYS_ADMIN); + EXPECT_EQ(0, remove_path(dir_s3d2)); +} + +FIXTURE(layout1) { +}; + +FIXTURE_SETUP(layout1) +{ + prepare_layout(_metadata); + + create_layout1(_metadata); +} + +FIXTURE_TEARDOWN(layout1) +{ + remove_layout1(_metadata); + + cleanup_layout(_metadata); +} + +/* + * This helper enables to use the ASSERT_* macros and print the line number + * pointing to the test caller. + */ +static int test_open_rel(const int dirfd, const char *const path, const int flags) +{ + int fd; + + /* Works with file and directories. */ + fd = openat(dirfd, path, flags | O_CLOEXEC); + if (fd < 0) + return errno; + /* + * Mixing error codes from close(2) and open(2) should not lead to any + * (access type) confusion for this test. + */ + if (close(fd) != 0) + return errno; + return 0; +} + +static int test_open(const char *const path, const int flags) +{ + return test_open_rel(AT_FDCWD, path, flags); +} + +TEST_F_FORK(layout1, no_restriction) +{ + ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY)); + ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY)); + ASSERT_EQ(0, test_open(file2_s1d1, O_RDONLY)); + ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY)); + ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY)); + ASSERT_EQ(0, test_open(file2_s1d2, O_RDONLY)); + ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY)); + ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY)); + + ASSERT_EQ(0, test_open(dir_s2d1, O_RDONLY)); + ASSERT_EQ(0, test_open(file1_s2d1, O_RDONLY)); + ASSERT_EQ(0, test_open(dir_s2d2, O_RDONLY)); + ASSERT_EQ(0, test_open(file1_s2d2, O_RDONLY)); + ASSERT_EQ(0, test_open(dir_s2d3, O_RDONLY)); + ASSERT_EQ(0, test_open(file1_s2d3, O_RDONLY)); + + ASSERT_EQ(0, test_open(dir_s3d1, O_RDONLY)); + ASSERT_EQ(0, test_open(dir_s3d2, O_RDONLY)); + ASSERT_EQ(0, test_open(dir_s3d3, O_RDONLY)); +} + +TEST_F_FORK(layout1, inval) +{ + struct landlock_path_beneath_attr path_beneath = { + .allowed_access = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE, + .parent_fd = -1, + }; + struct landlock_ruleset_attr ruleset_attr = { + .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE, + }; + int ruleset_fd; + + path_beneath.parent_fd = open(dir_s1d2, O_PATH | O_DIRECTORY | + O_CLOEXEC); + ASSERT_LE(0, path_beneath.parent_fd); + + ruleset_fd = open(dir_s1d1, O_PATH | O_DIRECTORY | O_CLOEXEC); + ASSERT_LE(0, ruleset_fd); + ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, + &path_beneath, 0)); + /* Returns EBADF because ruleset_fd is not a landlock-ruleset FD. */ + ASSERT_EQ(EBADF, errno); + ASSERT_EQ(0, close(ruleset_fd)); + + ruleset_fd = open(dir_s1d1, O_DIRECTORY | O_CLOEXEC); + ASSERT_LE(0, ruleset_fd); + ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, + &path_beneath, 0)); + /* Returns EBADFD because ruleset_fd is not a valid ruleset. */ + ASSERT_EQ(EBADFD, errno); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Gets a real ruleset. */ + ruleset_fd = landlock_create_ruleset(&ruleset_attr, + sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, + &path_beneath, 0)); + ASSERT_EQ(0, close(path_beneath.parent_fd)); + + /* Tests without O_PATH. */ + path_beneath.parent_fd = open(dir_s1d2, O_DIRECTORY | O_CLOEXEC); + ASSERT_LE(0, path_beneath.parent_fd); + ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, + &path_beneath, 0)); + ASSERT_EQ(0, close(path_beneath.parent_fd)); + + /* Tests with a ruleset FD. */ + path_beneath.parent_fd = ruleset_fd; + ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, + &path_beneath, 0)); + ASSERT_EQ(EBADFD, errno); + + /* Checks unhandled allowed_access. */ + path_beneath.parent_fd = open(dir_s1d2, O_PATH | O_DIRECTORY | + O_CLOEXEC); + ASSERT_LE(0, path_beneath.parent_fd); + + /* Test with legitimate values. */ + path_beneath.allowed_access |= LANDLOCK_ACCESS_FS_EXECUTE; + ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, + &path_beneath, 0)); + ASSERT_EQ(EINVAL, errno); + path_beneath.allowed_access &= ~LANDLOCK_ACCESS_FS_EXECUTE; + + /* Test with unknown (64-bits) value. */ + path_beneath.allowed_access |= (1ULL << 60); + ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, + &path_beneath, 0)); + ASSERT_EQ(EINVAL, errno); + path_beneath.allowed_access &= ~(1ULL << 60); + + /* Test with no access. */ + path_beneath.allowed_access = 0; + ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, + &path_beneath, 0)); + ASSERT_EQ(ENOMSG, errno); + path_beneath.allowed_access &= ~(1ULL << 60); + + ASSERT_EQ(0, close(path_beneath.parent_fd)); + + /* Enforces the ruleset. */ + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); + ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0)); + + ASSERT_EQ(0, close(ruleset_fd)); +} + +#define ACCESS_FILE ( \ + LANDLOCK_ACCESS_FS_EXECUTE | \ + LANDLOCK_ACCESS_FS_WRITE_FILE | \ + LANDLOCK_ACCESS_FS_READ_FILE) + +#define ACCESS_LAST LANDLOCK_ACCESS_FS_MAKE_SYM + +#define ACCESS_ALL ( \ + ACCESS_FILE | \ + LANDLOCK_ACCESS_FS_READ_DIR | \ + LANDLOCK_ACCESS_FS_REMOVE_DIR | \ + LANDLOCK_ACCESS_FS_REMOVE_FILE | \ + LANDLOCK_ACCESS_FS_MAKE_CHAR | \ + LANDLOCK_ACCESS_FS_MAKE_DIR | \ + LANDLOCK_ACCESS_FS_MAKE_REG | \ + LANDLOCK_ACCESS_FS_MAKE_SOCK | \ + LANDLOCK_ACCESS_FS_MAKE_FIFO | \ + LANDLOCK_ACCESS_FS_MAKE_BLOCK | \ + ACCESS_LAST) + +TEST_F_FORK(layout1, file_access_rights) +{ + __u64 access; + int err; + struct landlock_path_beneath_attr path_beneath = {}; + struct landlock_ruleset_attr ruleset_attr = { + .handled_access_fs = ACCESS_ALL, + }; + const int ruleset_fd = landlock_create_ruleset(&ruleset_attr, + sizeof(ruleset_attr), 0); + + ASSERT_LE(0, ruleset_fd); + + /* Tests access rights for files. */ + path_beneath.parent_fd = open(file1_s1d2, O_PATH | O_CLOEXEC); + ASSERT_LE(0, path_beneath.parent_fd); + for (access = 1; access <= ACCESS_LAST; access <<= 1) { + path_beneath.allowed_access = access; + err = landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, + &path_beneath, 0); + if ((access | ACCESS_FILE) == ACCESS_FILE) { + ASSERT_EQ(0, err); + } else { + ASSERT_EQ(-1, err); + ASSERT_EQ(EINVAL, errno); + } + } + ASSERT_EQ(0, close(path_beneath.parent_fd)); +} + +static void add_path_beneath(struct __test_metadata *const _metadata, + const int ruleset_fd, const __u64 allowed_access, + const char *const path) +{ + struct landlock_path_beneath_attr path_beneath = { + .allowed_access = allowed_access, + }; + + path_beneath.parent_fd = open(path, O_PATH | O_CLOEXEC); + ASSERT_LE(0, path_beneath.parent_fd) { + TH_LOG("Failed to open directory \"%s\": %s", path, + strerror(errno)); + } + ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, + &path_beneath, 0)) { + TH_LOG("Failed to update the ruleset with \"%s\": %s", path, + strerror(errno)); + } + ASSERT_EQ(0, close(path_beneath.parent_fd)); +} + +struct rule { + const char *path; + __u64 access; +}; + +#define ACCESS_RO ( \ + LANDLOCK_ACCESS_FS_READ_FILE | \ + LANDLOCK_ACCESS_FS_READ_DIR) + +#define ACCESS_RW ( \ + ACCESS_RO | \ + LANDLOCK_ACCESS_FS_WRITE_FILE) + +static int create_ruleset(struct __test_metadata *const _metadata, + const __u64 handled_access_fs, const struct rule rules[]) +{ + int ruleset_fd, i; + struct landlock_ruleset_attr ruleset_attr = { + .handled_access_fs = handled_access_fs, + }; + + ASSERT_NE(NULL, rules) { + TH_LOG("No rule list"); + } + ASSERT_NE(NULL, rules[0].path) { + TH_LOG("Empty rule list"); + } + + ruleset_fd = landlock_create_ruleset(&ruleset_attr, + sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd) { + TH_LOG("Failed to create a ruleset: %s", strerror(errno)); + } + + for (i = 0; rules[i].path; i++) { + add_path_beneath(_metadata, ruleset_fd, rules[i].access, + rules[i].path); + } + return ruleset_fd; +} + +static void enforce_ruleset(struct __test_metadata *const _metadata, + const int ruleset_fd) +{ + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); + ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0)) { + TH_LOG("Failed to enforce ruleset: %s", strerror(errno)); + } +} + +TEST_F_FORK(layout1, proc_nsfs) +{ + const struct rule rules[] = { + { + .path = "/dev/null", + .access = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE, + }, + {} + }; + struct landlock_path_beneath_attr path_beneath; + const int ruleset_fd = create_ruleset(_metadata, rules[0].access | + LANDLOCK_ACCESS_FS_READ_DIR, rules); + + ASSERT_LE(0, ruleset_fd); + ASSERT_EQ(0, test_open("/proc/self/ns/mnt", O_RDONLY)); + + enforce_ruleset(_metadata, ruleset_fd); + + ASSERT_EQ(EACCES, test_open("/", O_RDONLY)); + ASSERT_EQ(EACCES, test_open("/dev", O_RDONLY)); + ASSERT_EQ(0, test_open("/dev/null", O_RDONLY)); + ASSERT_EQ(EACCES, test_open("/dev/full", O_RDONLY)); + + ASSERT_EQ(EACCES, test_open("/proc", O_RDONLY)); + ASSERT_EQ(EACCES, test_open("/proc/self", O_RDONLY)); + ASSERT_EQ(EACCES, test_open("/proc/self/ns", O_RDONLY)); + /* + * Because nsfs is an internal filesystem, /proc/self/ns/mnt is a + * disconnected path. Such path cannot be identified and must then be + * allowed. + */ + ASSERT_EQ(0, test_open("/proc/self/ns/mnt", O_RDONLY)); + + /* + * Checks that it is not possible to add nsfs-like filesystem + * references to a ruleset. + */ + path_beneath.allowed_access = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE, + path_beneath.parent_fd = open("/proc/self/ns/mnt", O_PATH | O_CLOEXEC); + ASSERT_LE(0, path_beneath.parent_fd); + ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, + &path_beneath, 0)); + ASSERT_EQ(EBADFD, errno); + ASSERT_EQ(0, close(path_beneath.parent_fd)); +} + +TEST_F_FORK(layout1, unpriv) { + const struct rule rules[] = { + { + .path = dir_s1d2, + .access = ACCESS_RO, + }, + {} + }; + int ruleset_fd; + + drop_caps(_metadata); + + ruleset_fd = create_ruleset(_metadata, ACCESS_RO, rules); + ASSERT_LE(0, ruleset_fd); + ASSERT_EQ(-1, landlock_restrict_self(ruleset_fd, 0)); + ASSERT_EQ(EPERM, errno); + + /* enforce_ruleset() calls prctl(no_new_privs). */ + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); +} + +TEST_F_FORK(layout1, effective_access) +{ + const struct rule rules[] = { + { + .path = dir_s1d2, + .access = ACCESS_RO, + }, + { + .path = file1_s2d2, + .access = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE, + }, + {} + }; + const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); + char buf; + int reg_fd; + + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Tests on a directory. */ + ASSERT_EQ(EACCES, test_open("/", O_RDONLY)); + ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY)); + ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY)); + ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY)); + ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY)); + ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY)); + + /* Tests on a file. */ + ASSERT_EQ(EACCES, test_open(dir_s2d2, O_RDONLY)); + ASSERT_EQ(0, test_open(file1_s2d2, O_RDONLY)); + + /* Checks effective read and write actions. */ + reg_fd = open(file1_s2d2, O_RDWR | O_CLOEXEC); + ASSERT_LE(0, reg_fd); + ASSERT_EQ(1, write(reg_fd, ".", 1)); + ASSERT_LE(0, lseek(reg_fd, 0, SEEK_SET)); + ASSERT_EQ(1, read(reg_fd, &buf, 1)); + ASSERT_EQ('.', buf); + ASSERT_EQ(0, close(reg_fd)); + + /* Just in case, double-checks effective actions. */ + reg_fd = open(file1_s2d2, O_RDONLY | O_CLOEXEC); + ASSERT_LE(0, reg_fd); + ASSERT_EQ(-1, write(reg_fd, &buf, 1)); + ASSERT_EQ(EBADF, errno); + ASSERT_EQ(0, close(reg_fd)); +} + +TEST_F_FORK(layout1, unhandled_access) +{ + const struct rule rules[] = { + { + .path = dir_s1d2, + .access = ACCESS_RO, + }, + {} + }; + /* Here, we only handle read accesses, not write accesses. */ + const int ruleset_fd = create_ruleset(_metadata, ACCESS_RO, rules); + + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* + * Because the policy does not handle LANDLOCK_ACCESS_FS_WRITE_FILE, + * opening for write-only should be allowed, but not read-write. + */ + ASSERT_EQ(0, test_open(file1_s1d1, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDWR)); + + ASSERT_EQ(0, test_open(file1_s1d2, O_WRONLY)); + ASSERT_EQ(0, test_open(file1_s1d2, O_RDWR)); +} + +TEST_F_FORK(layout1, ruleset_overlap) +{ + const struct rule rules[] = { + /* These rules should be ORed among them. */ + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE, + }, + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_READ_DIR, + }, + {} + }; + const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); + + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks s1d1 hierarchy. */ + ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDWR)); + ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY)); + + /* Checks s1d2 hierarchy. */ + ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY)); + ASSERT_EQ(0, test_open(file1_s1d2, O_WRONLY)); + ASSERT_EQ(0, test_open(file1_s1d2, O_RDWR)); + ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY)); + + /* Checks s1d3 hierarchy. */ + ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY)); + ASSERT_EQ(0, test_open(file1_s1d3, O_WRONLY)); + ASSERT_EQ(0, test_open(file1_s1d3, O_RDWR)); + ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY)); +} + +TEST_F_FORK(layout1, non_overlapping_accesses) +{ + const struct rule layer1[] = { + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_MAKE_REG, + }, + {} + }; + const struct rule layer2[] = { + { + .path = dir_s1d3, + .access = LANDLOCK_ACCESS_FS_REMOVE_FILE, + }, + {} + }; + int ruleset_fd; + + ASSERT_EQ(0, unlink(file1_s1d1)); + ASSERT_EQ(0, unlink(file1_s1d2)); + + ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_MAKE_REG, + layer1); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + ASSERT_EQ(-1, mknod(file1_s1d1, S_IFREG | 0700, 0)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(0, mknod(file1_s1d2, S_IFREG | 0700, 0)); + ASSERT_EQ(0, unlink(file1_s1d2)); + + ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_REMOVE_FILE, + layer2); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Unchanged accesses for file creation. */ + ASSERT_EQ(-1, mknod(file1_s1d1, S_IFREG | 0700, 0)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(0, mknod(file1_s1d2, S_IFREG | 0700, 0)); + + /* Checks file removing. */ + ASSERT_EQ(-1, unlink(file1_s1d2)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(0, unlink(file1_s1d3)); +} + +TEST_F_FORK(layout1, interleaved_masked_accesses) +{ + /* + * Checks overly restrictive rules: + * layer 1: allows R s1d1/s1d2/s1d3/file1 + * layer 2: allows RW s1d1/s1d2/s1d3 + * allows W s1d1/s1d2 + * denies R s1d1/s1d2 + * layer 3: allows R s1d1 + * layer 4: allows R s1d1/s1d2 + * denies W s1d1/s1d2 + * layer 5: allows R s1d1/s1d2 + * layer 6: allows X ---- + * layer 7: allows W s1d1/s1d2 + * denies R s1d1/s1d2 + */ + const struct rule layer1_read[] = { + /* Allows read access to file1_s1d3 with the first layer. */ + { + .path = file1_s1d3, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + {} + }; + /* First rule with write restrictions. */ + const struct rule layer2_read_write[] = { + /* Start by granting read-write access via its parent directory... */ + { + .path = dir_s1d3, + .access = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE, + }, + /* ...but also denies read access via its grandparent directory. */ + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_WRITE_FILE, + }, + {} + }; + const struct rule layer3_read[] = { + /* Allows read access via its great-grandparent directory. */ + { + .path = dir_s1d1, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + {} + }; + const struct rule layer4_read_write[] = { + /* + * Try to confuse the deny access by denying write (but not + * read) access via its grandparent directory. + */ + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + {} + }; + const struct rule layer5_read[] = { + /* + * Try to override layer2's deny read access by explicitly + * allowing read access via file1_s1d3's grandparent. + */ + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + {} + }; + const struct rule layer6_execute[] = { + /* + * Restricts an unrelated file hierarchy with a new access + * (non-overlapping) type. + */ + { + .path = dir_s2d1, + .access = LANDLOCK_ACCESS_FS_EXECUTE, + }, + {} + }; + const struct rule layer7_read_write[] = { + /* + * Finally, denies read access to file1_s1d3 via its + * grandparent. + */ + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_WRITE_FILE, + }, + {} + }; + int ruleset_fd; + + ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE, + layer1_read); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks that read access is granted for file1_s1d3 with layer 1. */ + ASSERT_EQ(0, test_open(file1_s1d3, O_RDWR)); + ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY)); + ASSERT_EQ(0, test_open(file2_s1d3, O_WRONLY)); + + ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE, layer2_read_write); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks that previous access rights are unchanged with layer 2. */ + ASSERT_EQ(0, test_open(file1_s1d3, O_RDWR)); + ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY)); + ASSERT_EQ(0, test_open(file2_s1d3, O_WRONLY)); + + ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE, + layer3_read); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks that previous access rights are unchanged with layer 3. */ + ASSERT_EQ(0, test_open(file1_s1d3, O_RDWR)); + ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY)); + ASSERT_EQ(0, test_open(file2_s1d3, O_WRONLY)); + + /* This time, denies write access for the file hierarchy. */ + ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE, layer4_read_write); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* + * Checks that the only change with layer 4 is that write access is + * denied. + */ + ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(file2_s1d3, O_WRONLY)); + + ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE, + layer5_read); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks that previous access rights are unchanged with layer 5. */ + ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(file2_s1d3, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY)); + + ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_EXECUTE, + layer6_execute); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks that previous access rights are unchanged with layer 6. */ + ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(file2_s1d3, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY)); + + ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE, layer7_read_write); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks read access is now denied with layer 7. */ + ASSERT_EQ(EACCES, test_open(file1_s1d3, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(file2_s1d3, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY)); +} + +TEST_F_FORK(layout1, inherit_subset) +{ + const struct rule rules[] = { + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_READ_DIR, + }, + {} + }; + const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); + + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + + ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY)); + + /* Write access is forbidden. */ + ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY)); + /* Readdir access is allowed. */ + ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY)); + + /* Write access is forbidden. */ + ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY)); + /* Readdir access is allowed. */ + ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY)); + + /* + * Tests shared rule extension: the following rules should not grant + * any new access, only remove some. Once enforced, these rules are + * ANDed with the previous ones. + */ + add_path_beneath(_metadata, ruleset_fd, LANDLOCK_ACCESS_FS_WRITE_FILE, + dir_s1d2); + /* + * According to ruleset_fd, dir_s1d2 should now have the + * LANDLOCK_ACCESS_FS_READ_FILE and LANDLOCK_ACCESS_FS_WRITE_FILE + * access rights (even if this directory is opened a second time). + * However, when enforcing this updated ruleset, the ruleset tied to + * the current process (i.e. its domain) will still only have the + * dir_s1d2 with LANDLOCK_ACCESS_FS_READ_FILE and + * LANDLOCK_ACCESS_FS_READ_DIR accesses, but + * LANDLOCK_ACCESS_FS_WRITE_FILE must not be allowed because it would + * be a privilege escalation. + */ + enforce_ruleset(_metadata, ruleset_fd); + + /* Same tests and results as above. */ + ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY)); + + /* It is still forbidden to write in file1_s1d2. */ + ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY)); + /* Readdir access is still allowed. */ + ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY)); + + /* It is still forbidden to write in file1_s1d3. */ + ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY)); + /* Readdir access is still allowed. */ + ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY)); + + /* + * Try to get more privileges by adding new access rights to the parent + * directory: dir_s1d1. + */ + add_path_beneath(_metadata, ruleset_fd, ACCESS_RW, dir_s1d1); + enforce_ruleset(_metadata, ruleset_fd); + + /* Same tests and results as above. */ + ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY)); + + /* It is still forbidden to write in file1_s1d2. */ + ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY)); + /* Readdir access is still allowed. */ + ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY)); + + /* It is still forbidden to write in file1_s1d3. */ + ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY)); + /* Readdir access is still allowed. */ + ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY)); + + /* + * Now, dir_s1d3 get a new rule tied to it, only allowing + * LANDLOCK_ACCESS_FS_WRITE_FILE. The (kernel internal) difference is + * that there was no rule tied to it before. + */ + add_path_beneath(_metadata, ruleset_fd, LANDLOCK_ACCESS_FS_WRITE_FILE, + dir_s1d3); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* + * Same tests and results as above, except for open(dir_s1d3) which is + * now denied because the new rule mask the rule previously inherited + * from dir_s1d2. + */ + + /* Same tests and results as above. */ + ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY)); + + /* It is still forbidden to write in file1_s1d2. */ + ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY)); + /* Readdir access is still allowed. */ + ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY)); + + /* It is still forbidden to write in file1_s1d3. */ + ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY)); + /* + * Readdir of dir_s1d3 is still allowed because of the OR policy inside + * the same layer. + */ + ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY)); +} + +TEST_F_FORK(layout1, inherit_superset) +{ + const struct rule rules[] = { + { + .path = dir_s1d3, + .access = ACCESS_RO, + }, + {} + }; + const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); + + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + + /* Readdir access is denied for dir_s1d2. */ + ASSERT_EQ(EACCES, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY)); + /* Readdir access is allowed for dir_s1d3. */ + ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY)); + /* File access is allowed for file1_s1d3. */ + ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY)); + + /* Now dir_s1d2, parent of dir_s1d3, gets a new rule tied to it. */ + add_path_beneath(_metadata, ruleset_fd, LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_READ_DIR, dir_s1d2); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Readdir access is still denied for dir_s1d2. */ + ASSERT_EQ(EACCES, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY)); + /* Readdir access is still allowed for dir_s1d3. */ + ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY)); + /* File access is still allowed for file1_s1d3. */ + ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY)); +} + +TEST_F_FORK(layout1, max_layers) +{ + int i, err; + const struct rule rules[] = { + { + .path = dir_s1d2, + .access = ACCESS_RO, + }, + {} + }; + const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); + + ASSERT_LE(0, ruleset_fd); + for (i = 0; i < 64; i++) + enforce_ruleset(_metadata, ruleset_fd); + + for (i = 0; i < 2; i++) { + err = landlock_restrict_self(ruleset_fd, 0); + ASSERT_EQ(-1, err); + ASSERT_EQ(E2BIG, errno); + } + ASSERT_EQ(0, close(ruleset_fd)); +} + +TEST_F_FORK(layout1, empty_or_same_ruleset) +{ + struct landlock_ruleset_attr ruleset_attr = {}; + int ruleset_fd; + + /* Tests empty handled_access_fs. */ + ruleset_fd = landlock_create_ruleset(&ruleset_attr, + sizeof(ruleset_attr), 0); + ASSERT_LE(-1, ruleset_fd); + ASSERT_EQ(ENOMSG, errno); + + /* Enforces policy which deny read access to all files. */ + ruleset_attr.handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE; + ruleset_fd = landlock_create_ruleset(&ruleset_attr, + sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY)); + ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY)); + + /* Nests a policy which deny read access to all directories. */ + ruleset_attr.handled_access_fs = LANDLOCK_ACCESS_FS_READ_DIR; + ruleset_fd = landlock_create_ruleset(&ruleset_attr, + sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY)); + + /* Enforces a second time with the same ruleset. */ + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); +} + +TEST_F_FORK(layout1, rule_on_mountpoint) +{ + const struct rule rules[] = { + { + .path = dir_s1d1, + .access = ACCESS_RO, + }, + { + /* dir_s3d2 is a mount point. */ + .path = dir_s3d2, + .access = ACCESS_RO, + }, + {} + }; + const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); + + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY)); + + ASSERT_EQ(EACCES, test_open(dir_s2d1, O_RDONLY)); + + ASSERT_EQ(EACCES, test_open(dir_s3d1, O_RDONLY)); + ASSERT_EQ(0, test_open(dir_s3d2, O_RDONLY)); + ASSERT_EQ(0, test_open(dir_s3d3, O_RDONLY)); +} + +TEST_F_FORK(layout1, rule_over_mountpoint) +{ + const struct rule rules[] = { + { + .path = dir_s1d1, + .access = ACCESS_RO, + }, + { + /* dir_s3d2 is a mount point. */ + .path = dir_s3d1, + .access = ACCESS_RO, + }, + {} + }; + const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); + + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY)); + + ASSERT_EQ(EACCES, test_open(dir_s2d1, O_RDONLY)); + + ASSERT_EQ(0, test_open(dir_s3d1, O_RDONLY)); + ASSERT_EQ(0, test_open(dir_s3d2, O_RDONLY)); + ASSERT_EQ(0, test_open(dir_s3d3, O_RDONLY)); +} + +/* + * This test verifies that we can apply a landlock rule on the root directory + * (which might require special handling). + */ +TEST_F_FORK(layout1, rule_over_root_allow_then_deny) +{ + struct rule rules[] = { + { + .path = "/", + .access = ACCESS_RO, + }, + {} + }; + int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); + + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks allowed access. */ + ASSERT_EQ(0, test_open("/", O_RDONLY)); + ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY)); + + rules[0].access = LANDLOCK_ACCESS_FS_READ_FILE; + ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks denied access (on a directory). */ + ASSERT_EQ(EACCES, test_open("/", O_RDONLY)); + ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY)); +} + +TEST_F_FORK(layout1, rule_over_root_deny) +{ + const struct rule rules[] = { + { + .path = "/", + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + {} + }; + const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); + + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks denied access (on a directory). */ + ASSERT_EQ(EACCES, test_open("/", O_RDONLY)); + ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY)); +} + +TEST_F_FORK(layout1, rule_inside_mount_ns) +{ + const struct rule rules[] = { + { + .path = "s3d3", + .access = ACCESS_RO, + }, + {} + }; + int ruleset_fd; + + set_cap(_metadata, CAP_SYS_ADMIN); + ASSERT_EQ(0, syscall(SYS_pivot_root, dir_s3d2, dir_s3d3)) { + TH_LOG("Failed to pivot root: %s", strerror(errno)); + }; + ASSERT_EQ(0, chdir("/")); + clear_cap(_metadata, CAP_SYS_ADMIN); + + ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + ASSERT_EQ(0, test_open("s3d3", O_RDONLY)); + ASSERT_EQ(EACCES, test_open("/", O_RDONLY)); +} + +TEST_F_FORK(layout1, mount_and_pivot) +{ + const struct rule rules[] = { + { + .path = dir_s3d2, + .access = ACCESS_RO, + }, + {} + }; + const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); + + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + set_cap(_metadata, CAP_SYS_ADMIN); + ASSERT_EQ(-1, mount(NULL, dir_s3d2, NULL, MS_RDONLY, NULL)); + ASSERT_EQ(EPERM, errno); + ASSERT_EQ(-1, syscall(SYS_pivot_root, dir_s3d2, dir_s3d3)); + ASSERT_EQ(EPERM, errno); + clear_cap(_metadata, CAP_SYS_ADMIN); +} + +TEST_F_FORK(layout1, move_mount) +{ + const struct rule rules[] = { + { + .path = dir_s3d2, + .access = ACCESS_RO, + }, + {} + }; + const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); + + ASSERT_LE(0, ruleset_fd); + + set_cap(_metadata, CAP_SYS_ADMIN); + ASSERT_EQ(0, syscall(SYS_move_mount, AT_FDCWD, dir_s3d2, AT_FDCWD, + dir_s1d2, 0)) { + TH_LOG("Failed to move mount: %s", strerror(errno)); + } + + ASSERT_EQ(0, syscall(SYS_move_mount, AT_FDCWD, dir_s1d2, AT_FDCWD, + dir_s3d2, 0)); + clear_cap(_metadata, CAP_SYS_ADMIN); + + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + set_cap(_metadata, CAP_SYS_ADMIN); + ASSERT_EQ(-1, syscall(SYS_move_mount, AT_FDCWD, dir_s3d2, AT_FDCWD, + dir_s1d2, 0)); + ASSERT_EQ(EPERM, errno); + clear_cap(_metadata, CAP_SYS_ADMIN); +} + +TEST_F_FORK(layout1, release_inodes) +{ + const struct rule rules[] = { + { + .path = dir_s1d1, + .access = ACCESS_RO, + }, + { + .path = dir_s3d2, + .access = ACCESS_RO, + }, + { + .path = dir_s3d3, + .access = ACCESS_RO, + }, + {} + }; + const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); + + ASSERT_LE(0, ruleset_fd); + /* Unmount a file hierarchy while it is being used by a ruleset. */ + set_cap(_metadata, CAP_SYS_ADMIN); + ASSERT_EQ(0, umount(dir_s3d2)); + clear_cap(_metadata, CAP_SYS_ADMIN); + + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(dir_s3d2, O_RDONLY)); + /* This dir_s3d3 would not be allowed and does not exist anyway. */ + ASSERT_EQ(ENOENT, test_open(dir_s3d3, O_RDONLY)); +} + +enum relative_access { + REL_OPEN, + REL_CHDIR, + REL_CHROOT_ONLY, + REL_CHROOT_CHDIR, +}; + +static void test_relative_path(struct __test_metadata *const _metadata, + const enum relative_access rel) +{ + /* + * Common layer to check that chroot doesn't ignore it (i.e. a chroot + * is not a disconnected root directory). + */ + const struct rule layer1_base[] = { + { + .path = TMP_DIR, + .access = ACCESS_RO, + }, + {} + }; + const struct rule layer2_subs[] = { + { + .path = dir_s1d2, + .access = ACCESS_RO, + }, + { + .path = dir_s2d2, + .access = ACCESS_RO, + }, + {} + }; + int dirfd, ruleset_fd; + + ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer1_base); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer2_subs); + + ASSERT_LE(0, ruleset_fd); + switch (rel) { + case REL_OPEN: + case REL_CHDIR: + break; + case REL_CHROOT_ONLY: + ASSERT_EQ(0, chdir(dir_s2d2)); + break; + case REL_CHROOT_CHDIR: + ASSERT_EQ(0, chdir(dir_s1d2)); + break; + default: + ASSERT_TRUE(false); + return; + } + + set_cap(_metadata, CAP_SYS_CHROOT); + enforce_ruleset(_metadata, ruleset_fd); + + switch (rel) { + case REL_OPEN: + dirfd = open(dir_s1d2, O_DIRECTORY); + ASSERT_LE(0, dirfd); + break; + case REL_CHDIR: + ASSERT_EQ(0, chdir(dir_s1d2)); + dirfd = AT_FDCWD; + break; + case REL_CHROOT_ONLY: + /* Do chroot into dir_s1d2 (relative to dir_s2d2). */ + ASSERT_EQ(0, chroot("../../s1d1/s1d2")) { + TH_LOG("Failed to chroot: %s", strerror(errno)); + } + dirfd = AT_FDCWD; + break; + case REL_CHROOT_CHDIR: + /* Do chroot into dir_s1d2. */ + ASSERT_EQ(0, chroot(".")) { + TH_LOG("Failed to chroot: %s", strerror(errno)); + } + dirfd = AT_FDCWD; + break; + } + + ASSERT_EQ((rel == REL_CHROOT_CHDIR) ? 0 : EACCES, + test_open_rel(dirfd, "..", O_RDONLY)); + ASSERT_EQ(0, test_open_rel(dirfd, ".", O_RDONLY)); + + if (rel == REL_CHROOT_ONLY) { + /* The current directory is dir_s2d2. */ + ASSERT_EQ(0, test_open_rel(dirfd, "./s2d3", O_RDONLY)); + } else { + /* The current directory is dir_s1d2. */ + ASSERT_EQ(0, test_open_rel(dirfd, "./s1d3", O_RDONLY)); + } + + if (rel == REL_CHROOT_ONLY || rel == REL_CHROOT_CHDIR) { + /* Checks the root dir_s1d2. */ + ASSERT_EQ(0, test_open_rel(dirfd, "/..", O_RDONLY)); + ASSERT_EQ(0, test_open_rel(dirfd, "/", O_RDONLY)); + ASSERT_EQ(0, test_open_rel(dirfd, "/f1", O_RDONLY)); + ASSERT_EQ(0, test_open_rel(dirfd, "/s1d3", O_RDONLY)); + } + + if (rel != REL_CHROOT_CHDIR) { + ASSERT_EQ(EACCES, test_open_rel(dirfd, "../../s1d1", O_RDONLY)); + ASSERT_EQ(0, test_open_rel(dirfd, "../../s1d1/s1d2", O_RDONLY)); + ASSERT_EQ(0, test_open_rel(dirfd, "../../s1d1/s1d2/s1d3", O_RDONLY)); + + ASSERT_EQ(EACCES, test_open_rel(dirfd, "../../s2d1", O_RDONLY)); + ASSERT_EQ(0, test_open_rel(dirfd, "../../s2d1/s2d2", O_RDONLY)); + ASSERT_EQ(0, test_open_rel(dirfd, "../../s2d1/s2d2/s2d3", O_RDONLY)); + } + + if (rel == REL_OPEN) + ASSERT_EQ(0, close(dirfd)); + ASSERT_EQ(0, close(ruleset_fd)); +} + +TEST_F_FORK(layout1, relative_open) +{ + test_relative_path(_metadata, REL_OPEN); +} + +TEST_F_FORK(layout1, relative_chdir) +{ + test_relative_path(_metadata, REL_CHDIR); +} + +TEST_F_FORK(layout1, relative_chroot_only) +{ + test_relative_path(_metadata, REL_CHROOT_ONLY); +} + +TEST_F_FORK(layout1, relative_chroot_chdir) +{ + test_relative_path(_metadata, REL_CHROOT_CHDIR); +} + +static void copy_binary(struct __test_metadata *const _metadata, + const char *const dst_path) +{ + int dst_fd, src_fd; + struct stat statbuf; + + dst_fd = open(dst_path, O_WRONLY | O_TRUNC | O_CLOEXEC); + ASSERT_LE(0, dst_fd) { + TH_LOG("Failed to open \"%s\": %s", dst_path, + strerror(errno)); + } + src_fd = open(BINARY_PATH, O_RDONLY | O_CLOEXEC); + ASSERT_LE(0, src_fd) { + TH_LOG("Failed to open \"" BINARY_PATH "\": %s", + strerror(errno)); + } + ASSERT_EQ(0, fstat(src_fd, &statbuf)); + ASSERT_EQ(statbuf.st_size, sendfile(dst_fd, src_fd, 0, + statbuf.st_size)); + ASSERT_EQ(0, close(src_fd)); + ASSERT_EQ(0, close(dst_fd)); +} + +static void test_execute(struct __test_metadata *const _metadata, + const int err, const char *const path) +{ + int status; + char *const argv[] = {(char *)path, NULL}; + const pid_t child = fork(); + + ASSERT_LE(0, child); + if (child == 0) { + ASSERT_EQ(err ? -1 : 0, execve(path, argv, NULL)) { + TH_LOG("Failed to execute \"%s\": %s", path, + strerror(errno)); + }; + ASSERT_EQ(err, errno); + _exit(_metadata->passed ? 2 : 1); + return; + } + ASSERT_EQ(child, waitpid(child, &status, 0)); + ASSERT_EQ(1, WIFEXITED(status)); + ASSERT_EQ(err ? 2 : 0, WEXITSTATUS(status)) { + TH_LOG("Unexpected return code for \"%s\": %s", path, + strerror(errno)); + }; +} + +TEST_F_FORK(layout1, execute) +{ + const struct rule rules[] = { + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_EXECUTE, + }, + {} + }; + const int ruleset_fd = create_ruleset(_metadata, rules[0].access, + rules); + + ASSERT_LE(0, ruleset_fd); + copy_binary(_metadata, file1_s1d1); + copy_binary(_metadata, file1_s1d2); + copy_binary(_metadata, file1_s1d3); + + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY)); + ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY)); + test_execute(_metadata, EACCES, file1_s1d1); + + ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY)); + ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY)); + test_execute(_metadata, 0, file1_s1d2); + + ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY)); + ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY)); + test_execute(_metadata, 0, file1_s1d3); +} + +TEST_F_FORK(layout1, link) +{ + const struct rule rules[] = { + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_MAKE_REG, + }, + {} + }; + const int ruleset_fd = create_ruleset(_metadata, rules[0].access, + rules); + + ASSERT_LE(0, ruleset_fd); + + ASSERT_EQ(0, unlink(file1_s1d1)); + ASSERT_EQ(0, unlink(file1_s1d2)); + ASSERT_EQ(0, unlink(file1_s1d3)); + + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + ASSERT_EQ(-1, link(file2_s1d1, file1_s1d1)); + ASSERT_EQ(EACCES, errno); + /* Denies linking because of reparenting. */ + ASSERT_EQ(-1, link(file1_s2d1, file1_s1d2)); + ASSERT_EQ(EXDEV, errno); + ASSERT_EQ(-1, link(file2_s1d2, file1_s1d3)); + ASSERT_EQ(EXDEV, errno); + + ASSERT_EQ(0, link(file2_s1d2, file1_s1d2)); + ASSERT_EQ(0, link(file2_s1d3, file1_s1d3)); +} + +TEST_F_FORK(layout1, rename_file) +{ + const struct rule rules[] = { + { + .path = dir_s1d3, + .access = LANDLOCK_ACCESS_FS_REMOVE_FILE, + }, + { + .path = dir_s2d2, + .access = LANDLOCK_ACCESS_FS_REMOVE_FILE, + }, + {} + }; + const int ruleset_fd = create_ruleset(_metadata, rules[0].access, + rules); + + ASSERT_LE(0, ruleset_fd); + + ASSERT_EQ(0, unlink(file1_s1d1)); + ASSERT_EQ(0, unlink(file1_s1d2)); + + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* + * Tries to replace a file, from a directory that allows file removal, + * but to a different directory (which also allows file removal). + */ + ASSERT_EQ(-1, rename(file1_s2d3, file1_s1d3)); + ASSERT_EQ(EXDEV, errno); + ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d3, AT_FDCWD, file1_s1d3, + RENAME_EXCHANGE)); + ASSERT_EQ(EXDEV, errno); + ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d3, AT_FDCWD, dir_s1d3, + RENAME_EXCHANGE)); + ASSERT_EQ(EXDEV, errno); + + /* + * Tries to replace a file, from a directory that denies file removal, + * to a different directory (which allows file removal). + */ + ASSERT_EQ(-1, rename(file1_s2d1, file1_s1d3)); + ASSERT_EQ(EXDEV, errno); + ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d1, AT_FDCWD, file1_s1d3, + RENAME_EXCHANGE)); + ASSERT_EQ(EXDEV, errno); + ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s2d2, AT_FDCWD, file1_s1d3, + RENAME_EXCHANGE)); + ASSERT_EQ(EXDEV, errno); + + /* Exchanges files and directories that partially allow removal. */ + ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s2d2, AT_FDCWD, file1_s2d1, + RENAME_EXCHANGE)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d1, AT_FDCWD, dir_s2d2, + RENAME_EXCHANGE)); + ASSERT_EQ(EACCES, errno); + + /* Renames files with different parents. */ + ASSERT_EQ(-1, rename(file1_s2d2, file1_s1d2)); + ASSERT_EQ(EXDEV, errno); + ASSERT_EQ(0, unlink(file1_s1d3)); + ASSERT_EQ(-1, rename(file1_s2d1, file1_s1d3)); + ASSERT_EQ(EXDEV, errno); + + /* Exchanges and renames files with same parent. */ + ASSERT_EQ(0, renameat2(AT_FDCWD, file2_s2d3, AT_FDCWD, file1_s2d3, + RENAME_EXCHANGE)); + ASSERT_EQ(0, rename(file2_s2d3, file1_s2d3)); + + /* Exchanges files and directories with same parent, twice. */ + ASSERT_EQ(0, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s2d3, + RENAME_EXCHANGE)); + ASSERT_EQ(0, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s2d3, + RENAME_EXCHANGE)); +} + +TEST_F_FORK(layout1, rename_dir) +{ + const struct rule rules[] = { + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_REMOVE_DIR, + }, + { + .path = dir_s2d1, + .access = LANDLOCK_ACCESS_FS_REMOVE_DIR, + }, + {} + }; + const int ruleset_fd = create_ruleset(_metadata, rules[0].access, + rules); + + ASSERT_LE(0, ruleset_fd); + + /* Empties dir_s1d3 to allow renaming. */ + ASSERT_EQ(0, unlink(file1_s1d3)); + ASSERT_EQ(0, unlink(file2_s1d3)); + + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Exchanges and renames directory to a different parent. */ + ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s2d3, AT_FDCWD, dir_s1d3, + RENAME_EXCHANGE)); + ASSERT_EQ(EXDEV, errno); + ASSERT_EQ(-1, rename(dir_s2d3, dir_s1d3)); + ASSERT_EQ(EXDEV, errno); + ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s1d3, + RENAME_EXCHANGE)); + ASSERT_EQ(EXDEV, errno); + + /* + * Exchanges directory to the same parent, which doesn't allow + * directory removal. + */ + ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s1d1, AT_FDCWD, dir_s2d1, + RENAME_EXCHANGE)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, dir_s1d2, + RENAME_EXCHANGE)); + ASSERT_EQ(EACCES, errno); + + /* + * Exchanges and renames directory to the same parent, which allows + * directory removal. + */ + ASSERT_EQ(0, renameat2(AT_FDCWD, dir_s1d3, AT_FDCWD, file1_s1d2, + RENAME_EXCHANGE)); + ASSERT_EQ(0, unlink(dir_s1d3)); + ASSERT_EQ(0, mkdir(dir_s1d3, 0700)); + ASSERT_EQ(0, rename(file1_s1d2, dir_s1d3)); + ASSERT_EQ(0, rmdir(dir_s1d3)); +} + +TEST_F_FORK(layout1, remove_dir) +{ + const struct rule rules[] = { + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_REMOVE_DIR, + }, + {} + }; + const int ruleset_fd = create_ruleset(_metadata, rules[0].access, + rules); + + ASSERT_LE(0, ruleset_fd); + + ASSERT_EQ(0, unlink(file1_s1d1)); + ASSERT_EQ(0, unlink(file1_s1d2)); + ASSERT_EQ(0, unlink(file1_s1d3)); + ASSERT_EQ(0, unlink(file2_s1d3)); + + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + ASSERT_EQ(0, rmdir(dir_s1d3)); + ASSERT_EQ(0, mkdir(dir_s1d3, 0700)); + ASSERT_EQ(0, unlinkat(AT_FDCWD, dir_s1d3, AT_REMOVEDIR)); + + /* dir_s1d2 itself cannot be removed. */ + ASSERT_EQ(-1, rmdir(dir_s1d2)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(-1, unlinkat(AT_FDCWD, dir_s1d2, AT_REMOVEDIR)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(-1, rmdir(dir_s1d1)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(-1, unlinkat(AT_FDCWD, dir_s1d1, AT_REMOVEDIR)); + ASSERT_EQ(EACCES, errno); +} + +TEST_F_FORK(layout1, remove_file) +{ + const struct rule rules[] = { + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_REMOVE_FILE, + }, + {} + }; + const int ruleset_fd = create_ruleset(_metadata, rules[0].access, + rules); + + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + ASSERT_EQ(-1, unlink(file1_s1d1)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(-1, unlinkat(AT_FDCWD, file1_s1d1, 0)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(0, unlink(file1_s1d2)); + ASSERT_EQ(0, unlinkat(AT_FDCWD, file1_s1d3, 0)); +} + +static void test_make_file(struct __test_metadata *const _metadata, + const __u64 access, const mode_t mode, const dev_t dev) +{ + const struct rule rules[] = { + { + .path = dir_s1d2, + .access = access, + }, + {} + }; + const int ruleset_fd = create_ruleset(_metadata, access, rules); + + ASSERT_LE(0, ruleset_fd); + + ASSERT_EQ(0, unlink(file1_s1d1)); + ASSERT_EQ(0, unlink(file2_s1d1)); + ASSERT_EQ(0, mknod(file2_s1d1, mode | 0400, dev)) { + TH_LOG("Failed to make file \"%s\": %s", + file2_s1d1, strerror(errno)); + }; + + ASSERT_EQ(0, unlink(file1_s1d2)); + ASSERT_EQ(0, unlink(file2_s1d2)); + + ASSERT_EQ(0, unlink(file1_s1d3)); + ASSERT_EQ(0, unlink(file2_s1d3)); + + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + ASSERT_EQ(-1, mknod(file1_s1d1, mode | 0400, dev)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(-1, link(file2_s1d1, file1_s1d1)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(-1, rename(file2_s1d1, file1_s1d1)); + ASSERT_EQ(EACCES, errno); + + ASSERT_EQ(0, mknod(file1_s1d2, mode | 0400, dev)) { + TH_LOG("Failed to make file \"%s\": %s", + file1_s1d2, strerror(errno)); + }; + ASSERT_EQ(0, link(file1_s1d2, file2_s1d2)); + ASSERT_EQ(0, unlink(file2_s1d2)); + ASSERT_EQ(0, rename(file1_s1d2, file2_s1d2)); + + ASSERT_EQ(0, mknod(file1_s1d3, mode | 0400, dev)); + ASSERT_EQ(0, link(file1_s1d3, file2_s1d3)); + ASSERT_EQ(0, unlink(file2_s1d3)); + ASSERT_EQ(0, rename(file1_s1d3, file2_s1d3)); +} + +TEST_F_FORK(layout1, make_char) +{ + /* Creates a /dev/null device. */ + set_cap(_metadata, CAP_MKNOD); + test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_CHAR, S_IFCHR, + makedev(1, 3)); +} + +TEST_F_FORK(layout1, make_block) +{ + /* Creates a /dev/loop0 device. */ + set_cap(_metadata, CAP_MKNOD); + test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_BLOCK, S_IFBLK, + makedev(7, 0)); +} + +TEST_F_FORK(layout1, make_reg_1) +{ + test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_REG, S_IFREG, 0); +} + +TEST_F_FORK(layout1, make_reg_2) +{ + test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_REG, 0, 0); +} + +TEST_F_FORK(layout1, make_sock) +{ + test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_SOCK, S_IFSOCK, 0); +} + +TEST_F_FORK(layout1, make_fifo) +{ + test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_FIFO, S_IFIFO, 0); +} + +TEST_F_FORK(layout1, make_sym) +{ + const struct rule rules[] = { + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_MAKE_SYM, + }, + {} + }; + const int ruleset_fd = create_ruleset(_metadata, rules[0].access, + rules); + + ASSERT_LE(0, ruleset_fd); + + ASSERT_EQ(0, unlink(file1_s1d1)); + ASSERT_EQ(0, unlink(file2_s1d1)); + ASSERT_EQ(0, symlink("none", file2_s1d1)); + + ASSERT_EQ(0, unlink(file1_s1d2)); + ASSERT_EQ(0, unlink(file2_s1d2)); + + ASSERT_EQ(0, unlink(file1_s1d3)); + ASSERT_EQ(0, unlink(file2_s1d3)); + + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + ASSERT_EQ(-1, symlink("none", file1_s1d1)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(-1, link(file2_s1d1, file1_s1d1)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(-1, rename(file2_s1d1, file1_s1d1)); + ASSERT_EQ(EACCES, errno); + + ASSERT_EQ(0, symlink("none", file1_s1d2)); + ASSERT_EQ(0, link(file1_s1d2, file2_s1d2)); + ASSERT_EQ(0, unlink(file2_s1d2)); + ASSERT_EQ(0, rename(file1_s1d2, file2_s1d2)); + + ASSERT_EQ(0, symlink("none", file1_s1d3)); + ASSERT_EQ(0, link(file1_s1d3, file2_s1d3)); + ASSERT_EQ(0, unlink(file2_s1d3)); + ASSERT_EQ(0, rename(file1_s1d3, file2_s1d3)); +} + +TEST_F_FORK(layout1, make_dir) +{ + const struct rule rules[] = { + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_MAKE_DIR, + }, + {} + }; + const int ruleset_fd = create_ruleset(_metadata, rules[0].access, + rules); + + ASSERT_LE(0, ruleset_fd); + + ASSERT_EQ(0, unlink(file1_s1d1)); + ASSERT_EQ(0, unlink(file1_s1d2)); + ASSERT_EQ(0, unlink(file1_s1d3)); + + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Uses file_* as directory names. */ + ASSERT_EQ(-1, mkdir(file1_s1d1, 0700)); + ASSERT_EQ(EACCES, errno); + ASSERT_EQ(0, mkdir(file1_s1d2, 0700)); + ASSERT_EQ(0, mkdir(file1_s1d3, 0700)); +} + +static int open_proc_fd(struct __test_metadata *const _metadata, const int fd, + const int open_flags) +{ + static const char path_template[] = "/proc/self/fd/%d"; + char procfd_path[sizeof(path_template) + 10]; + const int procfd_path_size = snprintf(procfd_path, sizeof(procfd_path), + path_template, fd); + + ASSERT_LT(procfd_path_size, sizeof(procfd_path)); + return open(procfd_path, open_flags); +} + +TEST_F_FORK(layout1, proc_unlinked_file) +{ + const struct rule rules[] = { + { + .path = file1_s1d2, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + {} + }; + int reg_fd, proc_fd; + const int ruleset_fd = create_ruleset(_metadata, + LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE, rules); + + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + ASSERT_EQ(EACCES, test_open(file1_s1d2, O_RDWR)); + ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY)); + reg_fd = open(file1_s1d2, O_RDONLY | O_CLOEXEC); + ASSERT_LE(0, reg_fd); + ASSERT_EQ(0, unlink(file1_s1d2)); + + proc_fd = open_proc_fd(_metadata, reg_fd, O_RDONLY | O_CLOEXEC); + ASSERT_LE(0, proc_fd); + ASSERT_EQ(0, close(proc_fd)); + + proc_fd = open_proc_fd(_metadata, reg_fd, O_RDWR | O_CLOEXEC); + ASSERT_EQ(-1, proc_fd) { + TH_LOG("Successfully opened /proc/self/fd/%d: %s", + reg_fd, strerror(errno)); + } + ASSERT_EQ(EACCES, errno); + + ASSERT_EQ(0, close(reg_fd)); +} + +TEST_F_FORK(layout1, proc_pipe) +{ + int proc_fd; + int pipe_fds[2]; + char buf = '\0'; + const struct rule rules[] = { + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE, + }, + {} + }; + /* Limits read and write access to files tied to the filesystem. */ + const int ruleset_fd = create_ruleset(_metadata, rules[0].access, + rules); + + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks enforcement for normal files. */ + ASSERT_EQ(0, test_open(file1_s1d2, O_RDWR)); + ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDWR)); + + /* Checks access to pipes through FD. */ + ASSERT_EQ(0, pipe2(pipe_fds, O_CLOEXEC)); + ASSERT_EQ(1, write(pipe_fds[1], ".", 1)) { + TH_LOG("Failed to write in pipe: %s", strerror(errno)); + } + ASSERT_EQ(1, read(pipe_fds[0], &buf, 1)); + ASSERT_EQ('.', buf); + + /* Checks write access to pipe through /proc/self/fd . */ + proc_fd = open_proc_fd(_metadata, pipe_fds[1], O_WRONLY | O_CLOEXEC); + ASSERT_LE(0, proc_fd); + ASSERT_EQ(1, write(proc_fd, ".", 1)) { + TH_LOG("Failed to write through /proc/self/fd/%d: %s", + pipe_fds[1], strerror(errno)); + } + ASSERT_EQ(0, close(proc_fd)); + + /* Checks read access to pipe through /proc/self/fd . */ + proc_fd = open_proc_fd(_metadata, pipe_fds[0], O_RDONLY | O_CLOEXEC); + ASSERT_LE(0, proc_fd); + buf = '\0'; + ASSERT_EQ(1, read(proc_fd, &buf, 1)) { + TH_LOG("Failed to read through /proc/self/fd/%d: %s", + pipe_fds[1], strerror(errno)); + } + ASSERT_EQ(0, close(proc_fd)); + + ASSERT_EQ(0, close(pipe_fds[0])); + ASSERT_EQ(0, close(pipe_fds[1])); +} + +FIXTURE(layout1_bind) { +}; + +FIXTURE_SETUP(layout1_bind) +{ + prepare_layout(_metadata); + + create_layout1(_metadata); + + set_cap(_metadata, CAP_SYS_ADMIN); + ASSERT_EQ(0, mount(dir_s1d2, dir_s2d2, NULL, MS_BIND, NULL)); + clear_cap(_metadata, CAP_SYS_ADMIN); +} + +FIXTURE_TEARDOWN(layout1_bind) +{ + set_cap(_metadata, CAP_SYS_ADMIN); + EXPECT_EQ(0, umount(dir_s2d2)); + clear_cap(_metadata, CAP_SYS_ADMIN); + + remove_layout1(_metadata); + + cleanup_layout(_metadata); +} + +static const char bind_dir_s1d3[] = TMP_DIR "/s2d1/s2d2/s1d3"; +static const char bind_file1_s1d3[] = TMP_DIR "/s2d1/s2d2/s1d3/f1"; + +/* + * layout1_bind hierarchy: + * + * tmp + * ├── s1d1 + * │  ├── f1 + * │  ├── f2 + * │  └── s1d2 + * │  ├── f1 + * │  ├── f2 + * │  └── s1d3 + * │  ├── f1 + * │  └── f2 + * ├── s2d1 + * │  ├── f1 + * │  └── s2d2 + * │  ├── f1 + * │  ├── f2 + * │  └── s1d3 + * │  ├── f1 + * │  └── f2 + * └── s3d1 + * └── s3d2 + * └── s3d3 + */ + +TEST_F_FORK(layout1_bind, no_restriction) +{ + ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY)); + ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY)); + ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY)); + ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY)); + ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY)); + ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY)); + + ASSERT_EQ(0, test_open(dir_s2d1, O_RDONLY)); + ASSERT_EQ(0, test_open(file1_s2d1, O_RDONLY)); + ASSERT_EQ(0, test_open(dir_s2d2, O_RDONLY)); + ASSERT_EQ(0, test_open(file1_s2d2, O_RDONLY)); + ASSERT_EQ(ENOENT, test_open(dir_s2d3, O_RDONLY)); + ASSERT_EQ(ENOENT, test_open(file1_s2d3, O_RDONLY)); + + ASSERT_EQ(0, test_open(bind_dir_s1d3, O_RDONLY)); + ASSERT_EQ(0, test_open(bind_file1_s1d3, O_RDONLY)); + + ASSERT_EQ(0, test_open(dir_s3d1, O_RDONLY)); +} + +TEST_F_FORK(layout1_bind, same_content_same_file) +{ + /* + * Sets access right on parent directories of both source and + * destination mount points. + */ + const struct rule layer1_parent[] = { + { + .path = dir_s1d1, + .access = ACCESS_RO, + }, + { + .path = dir_s2d1, + .access = ACCESS_RW, + }, + {} + }; + /* + * Sets access rights on the same bind-mounted directories. The result + * should be ACCESS_RW for both directories, but not both hierarchies + * because of the first layer. + */ + const struct rule layer2_mount_point[] = { + { + .path = dir_s1d2, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + { + .path = dir_s2d2, + .access = ACCESS_RW, + }, + {} + }; + /* Only allow read-access to the s1d3 hierarchies. */ + const struct rule layer3_source[] = { + { + .path = dir_s1d3, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + {} + }; + /* Removes all access rights. */ + const struct rule layer4_destination[] = { + { + .path = bind_file1_s1d3, + .access = LANDLOCK_ACCESS_FS_WRITE_FILE, + }, + {} + }; + int ruleset_fd; + + /* Sets rules for the parent directories. */ + ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer1_parent); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks source hierarchy. */ + ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY)); + ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY)); + + ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY)); + ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY)); + + /* Checks destination hierarchy. */ + ASSERT_EQ(0, test_open(file1_s2d1, O_RDWR)); + ASSERT_EQ(0, test_open(dir_s2d1, O_RDONLY | O_DIRECTORY)); + + ASSERT_EQ(0, test_open(file1_s2d2, O_RDWR)); + ASSERT_EQ(0, test_open(dir_s2d2, O_RDONLY | O_DIRECTORY)); + + /* Sets rules for the mount points. */ + ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer2_mount_point); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks source hierarchy. */ + ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY)); + + ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY)); + ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY)); + + /* Checks destination hierarchy. */ + ASSERT_EQ(EACCES, test_open(file1_s2d1, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(file1_s2d1, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(dir_s2d1, O_RDONLY | O_DIRECTORY)); + + ASSERT_EQ(0, test_open(file1_s2d2, O_RDWR)); + ASSERT_EQ(0, test_open(dir_s2d2, O_RDONLY | O_DIRECTORY)); + ASSERT_EQ(0, test_open(bind_dir_s1d3, O_RDONLY | O_DIRECTORY)); + + /* Sets a (shared) rule only on the source. */ + ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer3_source); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks source hierarchy. */ + ASSERT_EQ(EACCES, test_open(file1_s1d2, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY)); + + ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY)); + + /* Checks destination hierarchy. */ + ASSERT_EQ(EACCES, test_open(file1_s2d2, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(file1_s2d2, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(dir_s2d2, O_RDONLY | O_DIRECTORY)); + + ASSERT_EQ(0, test_open(bind_file1_s1d3, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(bind_file1_s1d3, O_WRONLY)); + ASSERT_EQ(EACCES, test_open(bind_dir_s1d3, O_RDONLY | O_DIRECTORY)); + + /* Sets a (shared) rule only on the destination. */ + ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer4_destination); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks source hierarchy. */ + ASSERT_EQ(EACCES, test_open(file1_s1d3, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY)); + + /* Checks destination hierarchy. */ + ASSERT_EQ(EACCES, test_open(bind_file1_s1d3, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(bind_file1_s1d3, O_WRONLY)); +} + +#define LOWER_BASE TMP_DIR "/lower" +#define LOWER_DATA LOWER_BASE "/data" +static const char lower_fl1[] = LOWER_DATA "/fl1"; +static const char lower_dl1[] = LOWER_DATA "/dl1"; +static const char lower_dl1_fl2[] = LOWER_DATA "/dl1/fl2"; +static const char lower_fo1[] = LOWER_DATA "/fo1"; +static const char lower_do1[] = LOWER_DATA "/do1"; +static const char lower_do1_fo2[] = LOWER_DATA "/do1/fo2"; +static const char lower_do1_fl3[] = LOWER_DATA "/do1/fl3"; + +static const char (*lower_base_files[])[] = { + &lower_fl1, + &lower_fo1, + NULL +}; +static const char (*lower_base_directories[])[] = { + &lower_dl1, + &lower_do1, + NULL +}; +static const char (*lower_sub_files[])[] = { + &lower_dl1_fl2, + &lower_do1_fo2, + &lower_do1_fl3, + NULL +}; + +#define UPPER_BASE TMP_DIR "/upper" +#define UPPER_DATA UPPER_BASE "/data" +#define UPPER_WORK UPPER_BASE "/work" +static const char upper_fu1[] = UPPER_DATA "/fu1"; +static const char upper_du1[] = UPPER_DATA "/du1"; +static const char upper_du1_fu2[] = UPPER_DATA "/du1/fu2"; +static const char upper_fo1[] = UPPER_DATA "/fo1"; +static const char upper_do1[] = UPPER_DATA "/do1"; +static const char upper_do1_fo2[] = UPPER_DATA "/do1/fo2"; +static const char upper_do1_fu3[] = UPPER_DATA "/do1/fu3"; + +static const char (*upper_base_files[])[] = { + &upper_fu1, + &upper_fo1, + NULL +}; +static const char (*upper_base_directories[])[] = { + &upper_du1, + &upper_do1, + NULL +}; +static const char (*upper_sub_files[])[] = { + &upper_du1_fu2, + &upper_do1_fo2, + &upper_do1_fu3, + NULL +}; + +#define MERGE_BASE TMP_DIR "/merge" +#define MERGE_DATA MERGE_BASE "/data" +static const char merge_fl1[] = MERGE_DATA "/fl1"; +static const char merge_dl1[] = MERGE_DATA "/dl1"; +static const char merge_dl1_fl2[] = MERGE_DATA "/dl1/fl2"; +static const char merge_fu1[] = MERGE_DATA "/fu1"; +static const char merge_du1[] = MERGE_DATA "/du1"; +static const char merge_du1_fu2[] = MERGE_DATA "/du1/fu2"; +static const char merge_fo1[] = MERGE_DATA "/fo1"; +static const char merge_do1[] = MERGE_DATA "/do1"; +static const char merge_do1_fo2[] = MERGE_DATA "/do1/fo2"; +static const char merge_do1_fl3[] = MERGE_DATA "/do1/fl3"; +static const char merge_do1_fu3[] = MERGE_DATA "/do1/fu3"; + +static const char (*merge_base_files[])[] = { + &merge_fl1, + &merge_fu1, + &merge_fo1, + NULL +}; +static const char (*merge_base_directories[])[] = { + &merge_dl1, + &merge_du1, + &merge_do1, + NULL +}; +static const char (*merge_sub_files[])[] = { + &merge_dl1_fl2, + &merge_du1_fu2, + &merge_do1_fo2, + &merge_do1_fl3, + &merge_do1_fu3, + NULL +}; + +/* + * layout2_overlay hierarchy: + * + * tmp + * ├── lower + * │  └── data + * │  ├── dl1 + * │  │  └── fl2 + * │  ├── do1 + * │  │  ├── fl3 + * │  │  └── fo2 + * │  ├── fl1 + * │  └── fo1 + * ├── merge + * │  └── data + * │  ├── dl1 + * │  │  └── fl2 + * │  ├── do1 + * │  │  ├── fl3 + * │  │  ├── fo2 + * │  │  └── fu3 + * │  ├── du1 + * │  │  └── fu2 + * │  ├── fl1 + * │  ├── fo1 + * │  └── fu1 + * └── upper + * ├── data + * │  ├── do1 + * │  │  ├── fo2 + * │  │  └── fu3 + * │  ├── du1 + * │  │  └── fu2 + * │  ├── fo1 + * │  └── fu1 + * └── work + * └── work + */ + +FIXTURE(layout2_overlay) { +}; + +FIXTURE_SETUP(layout2_overlay) +{ + prepare_layout(_metadata); + + create_directory(_metadata, LOWER_BASE); + set_cap(_metadata, CAP_SYS_ADMIN); + /* Creates tmpfs mount points to get deterministic overlayfs. */ + ASSERT_EQ(0, mount("tmp", LOWER_BASE, "tmpfs", 0, "size=4m,mode=700")); + clear_cap(_metadata, CAP_SYS_ADMIN); + create_file(_metadata, lower_fl1); + create_file(_metadata, lower_dl1_fl2); + create_file(_metadata, lower_fo1); + create_file(_metadata, lower_do1_fo2); + create_file(_metadata, lower_do1_fl3); + + create_directory(_metadata, UPPER_BASE); + set_cap(_metadata, CAP_SYS_ADMIN); + ASSERT_EQ(0, mount("tmp", UPPER_BASE, "tmpfs", 0, "size=4m,mode=700")); + clear_cap(_metadata, CAP_SYS_ADMIN); + create_file(_metadata, upper_fu1); + create_file(_metadata, upper_du1_fu2); + create_file(_metadata, upper_fo1); + create_file(_metadata, upper_do1_fo2); + create_file(_metadata, upper_do1_fu3); + ASSERT_EQ(0, mkdir(UPPER_WORK, 0700)); + + create_directory(_metadata, MERGE_DATA); + set_cap(_metadata, CAP_SYS_ADMIN); + set_cap(_metadata, CAP_DAC_OVERRIDE); + ASSERT_EQ(0, mount("overlay", MERGE_DATA, "overlay", 0, + "lowerdir=" LOWER_DATA + ",upperdir=" UPPER_DATA + ",workdir=" UPPER_WORK)); + clear_cap(_metadata, CAP_DAC_OVERRIDE); + clear_cap(_metadata, CAP_SYS_ADMIN); +} + +FIXTURE_TEARDOWN(layout2_overlay) +{ + EXPECT_EQ(0, remove_path(lower_do1_fl3)); + EXPECT_EQ(0, remove_path(lower_dl1_fl2)); + EXPECT_EQ(0, remove_path(lower_fl1)); + EXPECT_EQ(0, remove_path(lower_do1_fo2)); + EXPECT_EQ(0, remove_path(lower_fo1)); + set_cap(_metadata, CAP_SYS_ADMIN); + EXPECT_EQ(0, umount(LOWER_BASE)); + clear_cap(_metadata, CAP_SYS_ADMIN); + EXPECT_EQ(0, remove_path(LOWER_BASE)); + + EXPECT_EQ(0, remove_path(upper_do1_fu3)); + EXPECT_EQ(0, remove_path(upper_du1_fu2)); + EXPECT_EQ(0, remove_path(upper_fu1)); + EXPECT_EQ(0, remove_path(upper_do1_fo2)); + EXPECT_EQ(0, remove_path(upper_fo1)); + EXPECT_EQ(0, remove_path(UPPER_WORK "/work")); + set_cap(_metadata, CAP_SYS_ADMIN); + EXPECT_EQ(0, umount(UPPER_BASE)); + clear_cap(_metadata, CAP_SYS_ADMIN); + EXPECT_EQ(0, remove_path(UPPER_BASE)); + + set_cap(_metadata, CAP_SYS_ADMIN); + EXPECT_EQ(0, umount(MERGE_DATA)); + clear_cap(_metadata, CAP_SYS_ADMIN); + EXPECT_EQ(0, remove_path(MERGE_DATA)); + + cleanup_layout(_metadata); +} + +TEST_F_FORK(layout2_overlay, no_restriction) +{ + ASSERT_EQ(0, test_open(lower_fl1, O_RDONLY)); + ASSERT_EQ(0, test_open(lower_dl1, O_RDONLY)); + ASSERT_EQ(0, test_open(lower_dl1_fl2, O_RDONLY)); + ASSERT_EQ(0, test_open(lower_fo1, O_RDONLY)); + ASSERT_EQ(0, test_open(lower_do1, O_RDONLY)); + ASSERT_EQ(0, test_open(lower_do1_fo2, O_RDONLY)); + ASSERT_EQ(0, test_open(lower_do1_fl3, O_RDONLY)); + + ASSERT_EQ(0, test_open(upper_fu1, O_RDONLY)); + ASSERT_EQ(0, test_open(upper_du1, O_RDONLY)); + ASSERT_EQ(0, test_open(upper_du1_fu2, O_RDONLY)); + ASSERT_EQ(0, test_open(upper_fo1, O_RDONLY)); + ASSERT_EQ(0, test_open(upper_do1, O_RDONLY)); + ASSERT_EQ(0, test_open(upper_do1_fo2, O_RDONLY)); + ASSERT_EQ(0, test_open(upper_do1_fu3, O_RDONLY)); + + ASSERT_EQ(0, test_open(merge_fl1, O_RDONLY)); + ASSERT_EQ(0, test_open(merge_dl1, O_RDONLY)); + ASSERT_EQ(0, test_open(merge_dl1_fl2, O_RDONLY)); + ASSERT_EQ(0, test_open(merge_fu1, O_RDONLY)); + ASSERT_EQ(0, test_open(merge_du1, O_RDONLY)); + ASSERT_EQ(0, test_open(merge_du1_fu2, O_RDONLY)); + ASSERT_EQ(0, test_open(merge_fo1, O_RDONLY)); + ASSERT_EQ(0, test_open(merge_do1, O_RDONLY)); + ASSERT_EQ(0, test_open(merge_do1_fo2, O_RDONLY)); + ASSERT_EQ(0, test_open(merge_do1_fl3, O_RDONLY)); + ASSERT_EQ(0, test_open(merge_do1_fu3, O_RDONLY)); +} + +#define for_each_path(path_list, path_entry, i) \ + for (i = 0, path_entry = *path_list[i]; path_list[i]; \ + path_entry = *path_list[++i]) + +TEST_F_FORK(layout2_overlay, same_content_different_file) +{ + /* Sets access right on parent directories of both layers. */ + const struct rule layer1_base[] = { + { + .path = LOWER_BASE, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + { + .path = UPPER_BASE, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + { + .path = MERGE_BASE, + .access = ACCESS_RW, + }, + {} + }; + const struct rule layer2_data[] = { + { + .path = LOWER_DATA, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + { + .path = UPPER_DATA, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + { + .path = MERGE_DATA, + .access = ACCESS_RW, + }, + {} + }; + /* Sets access right on directories inside both layers. */ + const struct rule layer3_subdirs[] = { + { + .path = lower_dl1, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + { + .path = lower_do1, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + { + .path = upper_du1, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + { + .path = upper_do1, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + { + .path = merge_dl1, + .access = ACCESS_RW, + }, + { + .path = merge_du1, + .access = ACCESS_RW, + }, + { + .path = merge_do1, + .access = ACCESS_RW, + }, + {} + }; + /* Tighten access rights to the files. */ + const struct rule layer4_files[] = { + { + .path = lower_dl1_fl2, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + { + .path = lower_do1_fo2, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + { + .path = lower_do1_fl3, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + { + .path = upper_du1_fu2, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + { + .path = upper_do1_fo2, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + { + .path = upper_do1_fu3, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + { + .path = merge_dl1_fl2, + .access = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE, + }, + { + .path = merge_du1_fu2, + .access = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE, + }, + { + .path = merge_do1_fo2, + .access = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE, + }, + { + .path = merge_do1_fl3, + .access = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE, + }, + { + .path = merge_do1_fu3, + .access = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE, + }, + {} + }; + const struct rule layer5_merge_only[] = { + { + .path = MERGE_DATA, + .access = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_WRITE_FILE, + }, + {} + }; + int ruleset_fd; + size_t i; + const char *path_entry; + + /* Sets rules on base directories (i.e. outside overlay scope). */ + ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer1_base); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks lower layer. */ + for_each_path(lower_base_files, path_entry, i) { + ASSERT_EQ(0, test_open(path_entry, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY)); + } + for_each_path(lower_base_directories, path_entry, i) { + ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY | O_DIRECTORY)); + } + for_each_path(lower_sub_files, path_entry, i) { + ASSERT_EQ(0, test_open(path_entry, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY)); + } + /* Checks upper layer. */ + for_each_path(upper_base_files, path_entry, i) { + ASSERT_EQ(0, test_open(path_entry, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY)); + } + for_each_path(upper_base_directories, path_entry, i) { + ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY | O_DIRECTORY)); + } + for_each_path(upper_sub_files, path_entry, i) { + ASSERT_EQ(0, test_open(path_entry, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY)); + } + /* + * Checks that access rights are independent from the lower and upper + * layers: write access to upper files viewed through the merge point + * is still allowed, and write access to lower file viewed (and copied) + * through the merge point is still allowed. + */ + for_each_path(merge_base_files, path_entry, i) { + ASSERT_EQ(0, test_open(path_entry, O_RDWR)); + } + for_each_path(merge_base_directories, path_entry, i) { + ASSERT_EQ(0, test_open(path_entry, O_RDONLY | O_DIRECTORY)); + } + for_each_path(merge_sub_files, path_entry, i) { + ASSERT_EQ(0, test_open(path_entry, O_RDWR)); + } + + /* Sets rules on data directories (i.e. inside overlay scope). */ + ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer2_data); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks merge. */ + for_each_path(merge_base_files, path_entry, i) { + ASSERT_EQ(0, test_open(path_entry, O_RDWR)); + } + for_each_path(merge_base_directories, path_entry, i) { + ASSERT_EQ(0, test_open(path_entry, O_RDONLY | O_DIRECTORY)); + } + for_each_path(merge_sub_files, path_entry, i) { + ASSERT_EQ(0, test_open(path_entry, O_RDWR)); + } + + /* Same checks with tighter rules. */ + ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer3_subdirs); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks changes for lower layer. */ + for_each_path(lower_base_files, path_entry, i) { + ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY)); + } + /* Checks changes for upper layer. */ + for_each_path(upper_base_files, path_entry, i) { + ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY)); + } + /* Checks all merge accesses. */ + for_each_path(merge_base_files, path_entry, i) { + ASSERT_EQ(EACCES, test_open(path_entry, O_RDWR)); + } + for_each_path(merge_base_directories, path_entry, i) { + ASSERT_EQ(0, test_open(path_entry, O_RDONLY | O_DIRECTORY)); + } + for_each_path(merge_sub_files, path_entry, i) { + ASSERT_EQ(0, test_open(path_entry, O_RDWR)); + } + + /* Sets rules directly on overlayed files. */ + ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer4_files); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks unchanged accesses on lower layer. */ + for_each_path(lower_sub_files, path_entry, i) { + ASSERT_EQ(0, test_open(path_entry, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY)); + } + /* Checks unchanged accesses on upper layer. */ + for_each_path(upper_sub_files, path_entry, i) { + ASSERT_EQ(0, test_open(path_entry, O_RDONLY)); + ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY)); + } + /* Checks all merge accesses. */ + for_each_path(merge_base_files, path_entry, i) { + ASSERT_EQ(EACCES, test_open(path_entry, O_RDWR)); + } + for_each_path(merge_base_directories, path_entry, i) { + ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY | O_DIRECTORY)); + } + for_each_path(merge_sub_files, path_entry, i) { + ASSERT_EQ(0, test_open(path_entry, O_RDWR)); + } + + /* Only allowes access to the merge hierarchy. */ + ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer5_merge_only); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks new accesses on lower layer. */ + for_each_path(lower_sub_files, path_entry, i) { + ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY)); + } + /* Checks new accesses on upper layer. */ + for_each_path(upper_sub_files, path_entry, i) { + ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY)); + } + /* Checks all merge accesses. */ + for_each_path(merge_base_files, path_entry, i) { + ASSERT_EQ(EACCES, test_open(path_entry, O_RDWR)); + } + for_each_path(merge_base_directories, path_entry, i) { + ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY | O_DIRECTORY)); + } + for_each_path(merge_sub_files, path_entry, i) { + ASSERT_EQ(0, test_open(path_entry, O_RDWR)); + } +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/landlock/ptrace_test.c b/tools/testing/selftests/landlock/ptrace_test.c new file mode 100644 index 000000000000..15fbef9cc849 --- /dev/null +++ b/tools/testing/selftests/landlock/ptrace_test.c @@ -0,0 +1,337 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Landlock tests - Ptrace + * + * Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net> + * Copyright © 2019-2020 ANSSI + */ + +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <linux/landlock.h> +#include <signal.h> +#include <sys/prctl.h> +#include <sys/ptrace.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "common.h" + +static void create_domain(struct __test_metadata *const _metadata) +{ + int ruleset_fd; + struct landlock_ruleset_attr ruleset_attr = { + .handled_access_fs = LANDLOCK_ACCESS_FS_MAKE_BLOCK, + }; + + ruleset_fd = landlock_create_ruleset(&ruleset_attr, + sizeof(ruleset_attr), 0); + EXPECT_LE(0, ruleset_fd) { + TH_LOG("Failed to create a ruleset: %s", strerror(errno)); + } + EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); + EXPECT_EQ(0, landlock_restrict_self(ruleset_fd, 0)); + EXPECT_EQ(0, close(ruleset_fd)); +} + +static int test_ptrace_read(const pid_t pid) +{ + static const char path_template[] = "/proc/%d/environ"; + char procenv_path[sizeof(path_template) + 10]; + int procenv_path_size, fd; + + procenv_path_size = snprintf(procenv_path, sizeof(procenv_path), + path_template, pid); + if (procenv_path_size >= sizeof(procenv_path)) + return E2BIG; + + fd = open(procenv_path, O_RDONLY | O_CLOEXEC); + if (fd < 0) + return errno; + /* + * Mixing error codes from close(2) and open(2) should not lead to any + * (access type) confusion for this test. + */ + if (close(fd) != 0) + return errno; + return 0; +} + +FIXTURE(hierarchy) { }; + +FIXTURE_VARIANT(hierarchy) { + const bool domain_both; + const bool domain_parent; + const bool domain_child; +}; + +/* + * Test multiple tracing combinations between a parent process P1 and a child + * process P2. + * + * Yama's scoped ptrace is presumed disabled. If enabled, this optional + * restriction is enforced in addition to any Landlock check, which means that + * all P2 requests to trace P1 would be denied. + */ + +/* + * No domain + * + * P1-. P1 -> P2 : allow + * \ P2 -> P1 : allow + * 'P2 + */ +FIXTURE_VARIANT_ADD(hierarchy, allow_without_domain) { + .domain_both = false, + .domain_parent = false, + .domain_child = false, +}; + +/* + * Child domain + * + * P1--. P1 -> P2 : allow + * \ P2 -> P1 : deny + * .'-----. + * | P2 | + * '------' + */ +FIXTURE_VARIANT_ADD(hierarchy, allow_with_one_domain) { + .domain_both = false, + .domain_parent = false, + .domain_child = true, +}; + +/* + * Parent domain + * .------. + * | P1 --. P1 -> P2 : deny + * '------' \ P2 -> P1 : allow + * ' + * P2 + */ +FIXTURE_VARIANT_ADD(hierarchy, deny_with_parent_domain) { + .domain_both = false, + .domain_parent = true, + .domain_child = false, +}; + +/* + * Parent + child domain (siblings) + * .------. + * | P1 ---. P1 -> P2 : deny + * '------' \ P2 -> P1 : deny + * .---'--. + * | P2 | + * '------' + */ +FIXTURE_VARIANT_ADD(hierarchy, deny_with_sibling_domain) { + .domain_both = false, + .domain_parent = true, + .domain_child = true, +}; + +/* + * Same domain (inherited) + * .-------------. + * | P1----. | P1 -> P2 : allow + * | \ | P2 -> P1 : allow + * | ' | + * | P2 | + * '-------------' + */ +FIXTURE_VARIANT_ADD(hierarchy, allow_sibling_domain) { + .domain_both = true, + .domain_parent = false, + .domain_child = false, +}; + +/* + * Inherited + child domain + * .-----------------. + * | P1----. | P1 -> P2 : allow + * | \ | P2 -> P1 : deny + * | .-'----. | + * | | P2 | | + * | '------' | + * '-----------------' + */ +FIXTURE_VARIANT_ADD(hierarchy, allow_with_nested_domain) { + .domain_both = true, + .domain_parent = false, + .domain_child = true, +}; + +/* + * Inherited + parent domain + * .-----------------. + * |.------. | P1 -> P2 : deny + * || P1 ----. | P2 -> P1 : allow + * |'------' \ | + * | ' | + * | P2 | + * '-----------------' + */ +FIXTURE_VARIANT_ADD(hierarchy, deny_with_nested_and_parent_domain) { + .domain_both = true, + .domain_parent = true, + .domain_child = false, +}; + +/* + * Inherited + parent and child domain (siblings) + * .-----------------. + * | .------. | P1 -> P2 : deny + * | | P1 . | P2 -> P1 : deny + * | '------'\ | + * | \ | + * | .--'---. | + * | | P2 | | + * | '------' | + * '-----------------' + */ +FIXTURE_VARIANT_ADD(hierarchy, deny_with_forked_domain) { + .domain_both = true, + .domain_parent = true, + .domain_child = true, +}; + +FIXTURE_SETUP(hierarchy) +{ } + +FIXTURE_TEARDOWN(hierarchy) +{ } + +/* Test PTRACE_TRACEME and PTRACE_ATTACH for parent and child. */ +TEST_F(hierarchy, trace) +{ + pid_t child, parent; + int status, err_proc_read; + int pipe_child[2], pipe_parent[2]; + char buf_parent; + long ret; + + /* + * Removes all effective and permitted capabilities to not interfere + * with cap_ptrace_access_check() in case of PTRACE_MODE_FSCREDS. + */ + drop_caps(_metadata); + + parent = getpid(); + ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC)); + ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC)); + if (variant->domain_both) { + create_domain(_metadata); + if (!_metadata->passed) + /* Aborts before forking. */ + return; + } + + child = fork(); + ASSERT_LE(0, child); + if (child == 0) { + char buf_child; + + ASSERT_EQ(0, close(pipe_parent[1])); + ASSERT_EQ(0, close(pipe_child[0])); + if (variant->domain_child) + create_domain(_metadata); + + /* Waits for the parent to be in a domain, if any. */ + ASSERT_EQ(1, read(pipe_parent[0], &buf_child, 1)); + + /* Tests PTRACE_ATTACH and PTRACE_MODE_READ on the parent. */ + err_proc_read = test_ptrace_read(parent); + ret = ptrace(PTRACE_ATTACH, parent, NULL, 0); + if (variant->domain_child) { + EXPECT_EQ(-1, ret); + EXPECT_EQ(EPERM, errno); + EXPECT_EQ(EACCES, err_proc_read); + } else { + EXPECT_EQ(0, ret); + EXPECT_EQ(0, err_proc_read); + } + if (ret == 0) { + ASSERT_EQ(parent, waitpid(parent, &status, 0)); + ASSERT_EQ(1, WIFSTOPPED(status)); + ASSERT_EQ(0, ptrace(PTRACE_DETACH, parent, NULL, 0)); + } + + /* Tests child PTRACE_TRACEME. */ + ret = ptrace(PTRACE_TRACEME); + if (variant->domain_parent) { + EXPECT_EQ(-1, ret); + EXPECT_EQ(EPERM, errno); + } else { + EXPECT_EQ(0, ret); + } + + /* + * Signals that the PTRACE_ATTACH test is done and the + * PTRACE_TRACEME test is ongoing. + */ + ASSERT_EQ(1, write(pipe_child[1], ".", 1)); + + if (!variant->domain_parent) { + ASSERT_EQ(0, raise(SIGSTOP)); + } + + /* Waits for the parent PTRACE_ATTACH test. */ + ASSERT_EQ(1, read(pipe_parent[0], &buf_child, 1)); + _exit(_metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE); + return; + } + + ASSERT_EQ(0, close(pipe_child[1])); + ASSERT_EQ(0, close(pipe_parent[0])); + if (variant->domain_parent) + create_domain(_metadata); + + /* Signals that the parent is in a domain, if any. */ + ASSERT_EQ(1, write(pipe_parent[1], ".", 1)); + + /* + * Waits for the child to test PTRACE_ATTACH on the parent and start + * testing PTRACE_TRACEME. + */ + ASSERT_EQ(1, read(pipe_child[0], &buf_parent, 1)); + + /* Tests child PTRACE_TRACEME. */ + if (!variant->domain_parent) { + ASSERT_EQ(child, waitpid(child, &status, 0)); + ASSERT_EQ(1, WIFSTOPPED(status)); + ASSERT_EQ(0, ptrace(PTRACE_DETACH, child, NULL, 0)); + } else { + /* The child should not be traced by the parent. */ + EXPECT_EQ(-1, ptrace(PTRACE_DETACH, child, NULL, 0)); + EXPECT_EQ(ESRCH, errno); + } + + /* Tests PTRACE_ATTACH and PTRACE_MODE_READ on the child. */ + err_proc_read = test_ptrace_read(child); + ret = ptrace(PTRACE_ATTACH, child, NULL, 0); + if (variant->domain_parent) { + EXPECT_EQ(-1, ret); + EXPECT_EQ(EPERM, errno); + EXPECT_EQ(EACCES, err_proc_read); + } else { + EXPECT_EQ(0, ret); + EXPECT_EQ(0, err_proc_read); + } + if (ret == 0) { + ASSERT_EQ(child, waitpid(child, &status, 0)); + ASSERT_EQ(1, WIFSTOPPED(status)); + ASSERT_EQ(0, ptrace(PTRACE_DETACH, child, NULL, 0)); + } + + /* Signals that the parent PTRACE_ATTACH test is done. */ + ASSERT_EQ(1, write(pipe_parent[1], ".", 1)); + ASSERT_EQ(child, waitpid(child, &status, 0)); + if (WIFSIGNALED(status) || !WIFEXITED(status) || + WEXITSTATUS(status) != EXIT_SUCCESS) + _metadata->passed = 0; +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/landlock/true.c b/tools/testing/selftests/landlock/true.c new file mode 100644 index 000000000000..3f9ccbf52783 --- /dev/null +++ b/tools/testing/selftests/landlock/true.c @@ -0,0 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 +int main(void) +{ + return 0; +} diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index a5ce26d548e4..0af84ad48aa7 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -1,6 +1,10 @@ # This mimics the top-level Makefile. We do it explicitly here so that this # Makefile can operate with or without the kbuild infrastructure. +ifneq ($(LLVM),) +CC := clang +else CC := $(CROSS_COMPILE)gcc +endif ifeq (0,$(MAKELEVEL)) ifeq ($(OUTPUT),) @@ -74,7 +78,8 @@ ifdef building_out_of_srctree rsync -aq $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(OUTPUT); \ fi @if [ "X$(TEST_PROGS)" != "X" ]; then \ - $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(OUTPUT)/$(TEST_PROGS)) ; \ + $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) \ + $(addprefix $(OUTPUT)/,$(TEST_PROGS))) ; \ else \ $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS)); \ fi diff --git a/tools/testing/selftests/lkdtm/.gitignore b/tools/testing/selftests/lkdtm/.gitignore index f26212605b6b..d4b0be857deb 100644 --- a/tools/testing/selftests/lkdtm/.gitignore +++ b/tools/testing/selftests/lkdtm/.gitignore @@ -1,2 +1,3 @@ *.sh !run.sh +!stack-entropy.sh diff --git a/tools/testing/selftests/lkdtm/Makefile b/tools/testing/selftests/lkdtm/Makefile index 1bcc9ee990eb..c71109ceeb2d 100644 --- a/tools/testing/selftests/lkdtm/Makefile +++ b/tools/testing/selftests/lkdtm/Makefile @@ -5,6 +5,7 @@ include ../lib.mk # NOTE: $(OUTPUT) won't get default value if used before lib.mk TEST_FILES := tests.txt +TEST_PROGS := stack-entropy.sh TEST_GEN_PROGS = $(patsubst %,$(OUTPUT)/%.sh,$(shell awk '{print $$1}' tests.txt | sed -e 's/\#//')) all: $(TEST_GEN_PROGS) diff --git a/tools/testing/selftests/lkdtm/stack-entropy.sh b/tools/testing/selftests/lkdtm/stack-entropy.sh new file mode 100755 index 000000000000..b1b8a5097cbb --- /dev/null +++ b/tools/testing/selftests/lkdtm/stack-entropy.sh @@ -0,0 +1,36 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Measure kernel stack entropy by sampling via LKDTM's REPORT_STACK test. +set -e +samples="${1:-1000}" + +# Capture dmesg continuously since it may fill up depending on sample size. +log=$(mktemp -t stack-entropy-XXXXXX) +dmesg --follow >"$log" & pid=$! +report=-1 +for i in $(seq 1 $samples); do + echo "REPORT_STACK" >/sys/kernel/debug/provoke-crash/DIRECT + if [ -t 1 ]; then + percent=$(( 100 * $i / $samples )) + if [ "$percent" -ne "$report" ]; then + /bin/echo -en "$percent%\r" + report="$percent" + fi + fi +done +kill "$pid" + +# Count unique offsets since last run. +seen=$(tac "$log" | grep -m1 -B"$samples"0 'Starting stack offset' | \ + grep 'Stack offset' | awk '{print $NF}' | sort | uniq -c | wc -l) +bits=$(echo "obase=2; $seen" | bc | wc -L) +echo "Bits of stack entropy: $bits" +rm -f "$log" + +# We would expect any functional stack randomization to be at least 5 bits. +if [ "$bits" -lt 5 ]; then + exit 1 +else + exit 0 +fi diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 25f198bec0b2..3915bb7bfc39 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -23,6 +23,8 @@ TEST_PROGS += drop_monitor_tests.sh TEST_PROGS += vrf_route_leaking.sh TEST_PROGS += bareudp.sh TEST_PROGS += unicast_extensions.sh +TEST_PROGS += udpgro_fwd.sh +TEST_PROGS += veth.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any @@ -37,6 +39,8 @@ TEST_GEN_FILES += ipsec TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls +TEST_FILES := settings + KSFT_KHDR_INSTALL := 1 include ../lib.mk diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index d98fb85e201c..49774a8a7736 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -19,10 +19,39 @@ ret=0 ksft_skip=4 # all tests in this script. Can be overridden with -t option -IPV4_TESTS="ipv4_fcnal ipv4_grp_fcnal ipv4_withv6_fcnal ipv4_fcnal_runtime ipv4_large_grp ipv4_compat_mode ipv4_fdb_grp_fcnal ipv4_torture" -IPV6_TESTS="ipv6_fcnal ipv6_grp_fcnal ipv6_fcnal_runtime ipv6_large_grp ipv6_compat_mode ipv6_fdb_grp_fcnal ipv6_torture" - -ALL_TESTS="basic ${IPV4_TESTS} ${IPV6_TESTS}" +IPV4_TESTS=" + ipv4_fcnal + ipv4_grp_fcnal + ipv4_res_grp_fcnal + ipv4_withv6_fcnal + ipv4_fcnal_runtime + ipv4_large_grp + ipv4_large_res_grp + ipv4_compat_mode + ipv4_fdb_grp_fcnal + ipv4_torture + ipv4_res_torture +" + +IPV6_TESTS=" + ipv6_fcnal + ipv6_grp_fcnal + ipv6_res_grp_fcnal + ipv6_fcnal_runtime + ipv6_large_grp + ipv6_large_res_grp + ipv6_compat_mode + ipv6_fdb_grp_fcnal + ipv6_torture + ipv6_res_torture +" + +ALL_TESTS=" + basic + basic_res + ${IPV4_TESTS} + ${IPV6_TESTS} +" TESTS="${ALL_TESTS}" VERBOSE=0 PAUSE_ON_FAIL=no @@ -232,6 +261,19 @@ check_nexthop() check_output "${out}" "${expected}" } +check_nexthop_bucket() +{ + local nharg="$1" + local expected="$2" + local out + + # remove the idle time since we cannot match it + out=$($IP nexthop bucket ${nharg} \ + | sed s/idle_time\ [0-9.]*\ // 2>/dev/null) + + check_output "${out}" "${expected}" +} + check_route() { local pfx="$1" @@ -308,6 +350,25 @@ check_large_grp() log_test $? 0 "Dump large (x$ecmp) ecmp groups" } +check_large_res_grp() +{ + local ipv=$1 + local buckets=$2 + local ipstr="" + + if [ $ipv -eq 4 ]; then + ipstr="172.16.1.2" + else + ipstr="2001:db8:91::2" + fi + + # create a resilient group with $buckets buckets and dump them + run_cmd "$IP nexthop add id 100 via $ipstr dev veth1" + run_cmd "$IP nexthop add id 1000 group 100 type resilient buckets $buckets" + run_cmd "$IP nexthop bucket list" + log_test $? 0 "Dump large (x$buckets) nexthop buckets" +} + start_ip_monitor() { local mtype=$1 @@ -344,6 +405,15 @@ check_nexthop_fdb_support() fi } +check_nexthop_res_support() +{ + $IP nexthop help 2>&1 | grep -q resilient + if [ $? -ne 0 ]; then + echo "SKIP: iproute2 too old, missing resilient nexthop group support" + return $ksft_skip + fi +} + ipv6_fdb_grp_fcnal() { local rc @@ -666,6 +736,70 @@ ipv6_grp_fcnal() log_test $? 2 "Nexthop group can not have a blackhole and another nexthop" } +ipv6_res_grp_fcnal() +{ + local rc + + echo + echo "IPv6 resilient groups functional" + echo "--------------------------------" + + check_nexthop_res_support + if [ $? -eq $ksft_skip ]; then + return $ksft_skip + fi + + # + # migration of nexthop buckets - equal weights + # + run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1" + run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1" + run_cmd "$IP nexthop add id 102 group 62/63 type resilient buckets 2 idle_timer 0" + + run_cmd "$IP nexthop del id 63" + check_nexthop "id 102" \ + "id 102 group 62 type resilient buckets 2 idle_timer 0 unbalanced_timer 0 unbalanced_time 0" + log_test $? 0 "Nexthop group updated when entry is deleted" + check_nexthop_bucket "list id 102" \ + "id 102 index 0 nhid 62 id 102 index 1 nhid 62" + log_test $? 0 "Nexthop buckets updated when entry is deleted" + + run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1" + run_cmd "$IP nexthop replace id 102 group 62/63 type resilient buckets 2 idle_timer 0" + check_nexthop "id 102" \ + "id 102 group 62/63 type resilient buckets 2 idle_timer 0 unbalanced_timer 0 unbalanced_time 0" + log_test $? 0 "Nexthop group updated after replace" + check_nexthop_bucket "list id 102" \ + "id 102 index 0 nhid 63 id 102 index 1 nhid 62" + log_test $? 0 "Nexthop buckets updated after replace" + + $IP nexthop flush >/dev/null 2>&1 + + # + # migration of nexthop buckets - unequal weights + # + run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1" + run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1" + run_cmd "$IP nexthop add id 102 group 62,3/63,1 type resilient buckets 4 idle_timer 0" + + run_cmd "$IP nexthop del id 63" + check_nexthop "id 102" \ + "id 102 group 62,3 type resilient buckets 4 idle_timer 0 unbalanced_timer 0 unbalanced_time 0" + log_test $? 0 "Nexthop group updated when entry is deleted - nECMP" + check_nexthop_bucket "list id 102" \ + "id 102 index 0 nhid 62 id 102 index 1 nhid 62 id 102 index 2 nhid 62 id 102 index 3 nhid 62" + log_test $? 0 "Nexthop buckets updated when entry is deleted - nECMP" + + run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1" + run_cmd "$IP nexthop replace id 102 group 62,3/63,1 type resilient buckets 4 idle_timer 0" + check_nexthop "id 102" \ + "id 102 group 62,3/63 type resilient buckets 4 idle_timer 0 unbalanced_timer 0 unbalanced_time 0" + log_test $? 0 "Nexthop group updated after replace - nECMP" + check_nexthop_bucket "list id 102" \ + "id 102 index 0 nhid 63 id 102 index 1 nhid 62 id 102 index 2 nhid 62 id 102 index 3 nhid 62" + log_test $? 0 "Nexthop buckets updated after replace - nECMP" +} + ipv6_fcnal_runtime() { local rc @@ -824,6 +958,22 @@ ipv6_large_grp() $IP nexthop flush >/dev/null 2>&1 } +ipv6_large_res_grp() +{ + echo + echo "IPv6 large resilient group (128k buckets)" + echo "-----------------------------------------" + + check_nexthop_res_support + if [ $? -eq $ksft_skip ]; then + return $ksft_skip + fi + + check_large_res_grp 6 $((128 * 1024)) + + $IP nexthop flush >/dev/null 2>&1 +} + ipv6_del_add_loop1() { while :; do @@ -874,11 +1024,67 @@ ipv6_torture() sleep 300 kill -9 $pid1 $pid2 $pid3 $pid4 $pid5 + wait $pid1 $pid2 $pid3 $pid4 $pid5 2>/dev/null # if we did not crash, success log_test 0 0 "IPv6 torture test" } +ipv6_res_grp_replace_loop() +{ + while :; do + $IP nexthop replace id 102 group 100/101 type resilient + done >/dev/null 2>&1 +} + +ipv6_res_torture() +{ + local pid1 + local pid2 + local pid3 + local pid4 + local pid5 + + echo + echo "IPv6 runtime resilient nexthop group torture" + echo "--------------------------------------------" + + check_nexthop_res_support + if [ $? -eq $ksft_skip ]; then + return $ksft_skip + fi + + if [ ! -x "$(command -v mausezahn)" ]; then + echo "SKIP: Could not run test; need mausezahn tool" + return + fi + + run_cmd "$IP nexthop add id 100 via 2001:db8:91::2 dev veth1" + run_cmd "$IP nexthop add id 101 via 2001:db8:92::2 dev veth3" + run_cmd "$IP nexthop add id 102 group 100/101 type resilient buckets 512 idle_timer 0" + run_cmd "$IP route add 2001:db8:101::1 nhid 102" + run_cmd "$IP route add 2001:db8:101::2 nhid 102" + + ipv6_del_add_loop1 & + pid1=$! + ipv6_res_grp_replace_loop & + pid2=$! + ip netns exec me ping -f 2001:db8:101::1 >/dev/null 2>&1 & + pid3=$! + ip netns exec me ping -f 2001:db8:101::2 >/dev/null 2>&1 & + pid4=$! + ip netns exec me mausezahn -6 veth1 \ + -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 \ + -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & + pid5=$! + + sleep 300 + kill -9 $pid1 $pid2 $pid3 $pid4 $pid5 + wait $pid1 $pid2 $pid3 $pid4 $pid5 2>/dev/null + + # if we did not crash, success + log_test 0 0 "IPv6 resilient nexthop group torture test" +} ipv4_fcnal() { @@ -1038,6 +1244,70 @@ ipv4_grp_fcnal() log_test $? 2 "Nexthop group can not have a blackhole and another nexthop" } +ipv4_res_grp_fcnal() +{ + local rc + + echo + echo "IPv4 resilient groups functional" + echo "--------------------------------" + + check_nexthop_res_support + if [ $? -eq $ksft_skip ]; then + return $ksft_skip + fi + + # + # migration of nexthop buckets - equal weights + # + run_cmd "$IP nexthop add id 12 via 172.16.1.2 dev veth1" + run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1" + run_cmd "$IP nexthop add id 102 group 12/13 type resilient buckets 2 idle_timer 0" + + run_cmd "$IP nexthop del id 13" + check_nexthop "id 102" \ + "id 102 group 12 type resilient buckets 2 idle_timer 0 unbalanced_timer 0 unbalanced_time 0" + log_test $? 0 "Nexthop group updated when entry is deleted" + check_nexthop_bucket "list id 102" \ + "id 102 index 0 nhid 12 id 102 index 1 nhid 12" + log_test $? 0 "Nexthop buckets updated when entry is deleted" + + run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1" + run_cmd "$IP nexthop replace id 102 group 12/13 type resilient buckets 2 idle_timer 0" + check_nexthop "id 102" \ + "id 102 group 12/13 type resilient buckets 2 idle_timer 0 unbalanced_timer 0 unbalanced_time 0" + log_test $? 0 "Nexthop group updated after replace" + check_nexthop_bucket "list id 102" \ + "id 102 index 0 nhid 13 id 102 index 1 nhid 12" + log_test $? 0 "Nexthop buckets updated after replace" + + $IP nexthop flush >/dev/null 2>&1 + + # + # migration of nexthop buckets - unequal weights + # + run_cmd "$IP nexthop add id 12 via 172.16.1.2 dev veth1" + run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1" + run_cmd "$IP nexthop add id 102 group 12,3/13,1 type resilient buckets 4 idle_timer 0" + + run_cmd "$IP nexthop del id 13" + check_nexthop "id 102" \ + "id 102 group 12,3 type resilient buckets 4 idle_timer 0 unbalanced_timer 0 unbalanced_time 0" + log_test $? 0 "Nexthop group updated when entry is deleted - nECMP" + check_nexthop_bucket "list id 102" \ + "id 102 index 0 nhid 12 id 102 index 1 nhid 12 id 102 index 2 nhid 12 id 102 index 3 nhid 12" + log_test $? 0 "Nexthop buckets updated when entry is deleted - nECMP" + + run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1" + run_cmd "$IP nexthop replace id 102 group 12,3/13,1 type resilient buckets 4 idle_timer 0" + check_nexthop "id 102" \ + "id 102 group 12,3/13 type resilient buckets 4 idle_timer 0 unbalanced_timer 0 unbalanced_time 0" + log_test $? 0 "Nexthop group updated after replace - nECMP" + check_nexthop_bucket "list id 102" \ + "id 102 index 0 nhid 13 id 102 index 1 nhid 12 id 102 index 2 nhid 12 id 102 index 3 nhid 12" + log_test $? 0 "Nexthop buckets updated after replace - nECMP" +} + ipv4_withv6_fcnal() { local lladdr @@ -1259,6 +1529,22 @@ ipv4_large_grp() $IP nexthop flush >/dev/null 2>&1 } +ipv4_large_res_grp() +{ + echo + echo "IPv4 large resilient group (128k buckets)" + echo "-----------------------------------------" + + check_nexthop_res_support + if [ $? -eq $ksft_skip ]; then + return $ksft_skip + fi + + check_large_res_grp 4 $((128 * 1024)) + + $IP nexthop flush >/dev/null 2>&1 +} + sysctl_nexthop_compat_mode_check() { local sysctlname="net.ipv4.nexthop_compat_mode" @@ -1476,11 +1762,68 @@ ipv4_torture() sleep 300 kill -9 $pid1 $pid2 $pid3 $pid4 $pid5 + wait $pid1 $pid2 $pid3 $pid4 $pid5 2>/dev/null # if we did not crash, success log_test 0 0 "IPv4 torture test" } +ipv4_res_grp_replace_loop() +{ + while :; do + $IP nexthop replace id 102 group 100/101 type resilient + done >/dev/null 2>&1 +} + +ipv4_res_torture() +{ + local pid1 + local pid2 + local pid3 + local pid4 + local pid5 + + echo + echo "IPv4 runtime resilient nexthop group torture" + echo "--------------------------------------------" + + check_nexthop_res_support + if [ $? -eq $ksft_skip ]; then + return $ksft_skip + fi + + if [ ! -x "$(command -v mausezahn)" ]; then + echo "SKIP: Could not run test; need mausezahn tool" + return + fi + + run_cmd "$IP nexthop add id 100 via 172.16.1.2 dev veth1" + run_cmd "$IP nexthop add id 101 via 172.16.2.2 dev veth3" + run_cmd "$IP nexthop add id 102 group 100/101 type resilient buckets 512 idle_timer 0" + run_cmd "$IP route add 172.16.101.1 nhid 102" + run_cmd "$IP route add 172.16.101.2 nhid 102" + + ipv4_del_add_loop1 & + pid1=$! + ipv4_res_grp_replace_loop & + pid2=$! + ip netns exec me ping -f 172.16.101.1 >/dev/null 2>&1 & + pid3=$! + ip netns exec me ping -f 172.16.101.2 >/dev/null 2>&1 & + pid4=$! + ip netns exec me mausezahn veth1 \ + -B 172.16.101.2 -A 172.16.1.1 -c 0 \ + -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & + pid5=$! + + sleep 300 + kill -9 $pid1 $pid2 $pid3 $pid4 $pid5 + wait $pid1 $pid2 $pid3 $pid4 $pid5 2>/dev/null + + # if we did not crash, success + log_test 0 0 "IPv4 resilient nexthop group torture test" +} + basic() { echo @@ -1590,6 +1933,219 @@ basic() log_test $? 2 "Nexthop group and blackhole" $IP nexthop flush >/dev/null 2>&1 + + # Test to ensure that flushing with a multi-part nexthop dump works as + # expected. + local batch_file=$(mktemp) + + for i in $(seq 1 $((64 * 1024))); do + echo "nexthop add id $i blackhole" >> $batch_file + done + + $IP -b $batch_file + $IP nexthop flush >/dev/null 2>&1 + [[ $($IP nexthop | wc -l) -eq 0 ]] + log_test $? 0 "Large scale nexthop flushing" + + rm $batch_file +} + +check_nexthop_buckets_balance() +{ + local nharg=$1; shift + local ret + + while (($# > 0)); do + local selector=$1; shift + local condition=$1; shift + local count + + count=$($IP -j nexthop bucket ${nharg} ${selector} | jq length) + (( $count $condition )) + ret=$? + if ((ret != 0)); then + return $ret + fi + done + + return 0 +} + +basic_res() +{ + echo + echo "Basic resilient nexthop group functional tests" + echo "----------------------------------------------" + + check_nexthop_res_support + if [ $? -eq $ksft_skip ]; then + return $ksft_skip + fi + + run_cmd "$IP nexthop add id 1 dev veth1" + + # + # resilient nexthop group addition + # + + run_cmd "$IP nexthop add id 101 group 1 type resilient buckets 8" + log_test $? 0 "Add a nexthop group with default parameters" + + run_cmd "$IP nexthop get id 101" + check_nexthop "id 101" \ + "id 101 group 1 type resilient buckets 8 idle_timer 120 unbalanced_timer 0 unbalanced_time 0" + log_test $? 0 "Get a nexthop group with default parameters" + + run_cmd "$IP nexthop add id 102 group 1 type resilient + buckets 4 idle_timer 100 unbalanced_timer 5" + run_cmd "$IP nexthop get id 102" + check_nexthop "id 102" \ + "id 102 group 1 type resilient buckets 4 idle_timer 100 unbalanced_timer 5 unbalanced_time 0" + log_test $? 0 "Get a nexthop group with non-default parameters" + + run_cmd "$IP nexthop add id 103 group 1 type resilient buckets 0" + log_test $? 2 "Add a nexthop group with 0 buckets" + + # + # resilient nexthop group replacement + # + + run_cmd "$IP nexthop replace id 101 group 1 type resilient + buckets 8 idle_timer 240 unbalanced_timer 80" + log_test $? 0 "Replace nexthop group parameters" + check_nexthop "id 101" \ + "id 101 group 1 type resilient buckets 8 idle_timer 240 unbalanced_timer 80 unbalanced_time 0" + log_test $? 0 "Get a nexthop group after replacing parameters" + + run_cmd "$IP nexthop replace id 101 group 1 type resilient idle_timer 512" + log_test $? 0 "Replace idle timer" + check_nexthop "id 101" \ + "id 101 group 1 type resilient buckets 8 idle_timer 512 unbalanced_timer 80 unbalanced_time 0" + log_test $? 0 "Get a nexthop group after replacing idle timer" + + run_cmd "$IP nexthop replace id 101 group 1 type resilient unbalanced_timer 256" + log_test $? 0 "Replace unbalanced timer" + check_nexthop "id 101" \ + "id 101 group 1 type resilient buckets 8 idle_timer 512 unbalanced_timer 256 unbalanced_time 0" + log_test $? 0 "Get a nexthop group after replacing unbalanced timer" + + run_cmd "$IP nexthop replace id 101 group 1 type resilient" + log_test $? 0 "Replace with no parameters" + check_nexthop "id 101" \ + "id 101 group 1 type resilient buckets 8 idle_timer 512 unbalanced_timer 256 unbalanced_time 0" + log_test $? 0 "Get a nexthop group after replacing no parameters" + + run_cmd "$IP nexthop replace id 101 group 1" + log_test $? 2 "Replace nexthop group type - implicit" + + run_cmd "$IP nexthop replace id 101 group 1 type mpath" + log_test $? 2 "Replace nexthop group type - explicit" + + run_cmd "$IP nexthop replace id 101 group 1 type resilient buckets 1024" + log_test $? 2 "Replace number of nexthop buckets" + + check_nexthop "id 101" \ + "id 101 group 1 type resilient buckets 8 idle_timer 512 unbalanced_timer 256 unbalanced_time 0" + log_test $? 0 "Get a nexthop group after replacing with invalid parameters" + + # + # resilient nexthop buckets dump + # + + $IP nexthop flush >/dev/null 2>&1 + run_cmd "$IP nexthop add id 1 dev veth1" + run_cmd "$IP nexthop add id 2 dev veth3" + run_cmd "$IP nexthop add id 101 group 1/2 type resilient buckets 4" + run_cmd "$IP nexthop add id 201 group 1/2" + + check_nexthop_bucket "" \ + "id 101 index 0 nhid 2 id 101 index 1 nhid 2 id 101 index 2 nhid 1 id 101 index 3 nhid 1" + log_test $? 0 "Dump all nexthop buckets" + + check_nexthop_bucket "list id 101" \ + "id 101 index 0 nhid 2 id 101 index 1 nhid 2 id 101 index 2 nhid 1 id 101 index 3 nhid 1" + log_test $? 0 "Dump all nexthop buckets in a group" + + (( $($IP -j nexthop bucket list id 101 | + jq '[.[] | select(.bucket.idle_time > 0 and + .bucket.idle_time < 2)] | length') == 4 )) + log_test $? 0 "All nexthop buckets report a positive near-zero idle time" + + check_nexthop_bucket "list dev veth1" \ + "id 101 index 2 nhid 1 id 101 index 3 nhid 1" + log_test $? 0 "Dump all nexthop buckets with a specific nexthop device" + + check_nexthop_bucket "list nhid 2" \ + "id 101 index 0 nhid 2 id 101 index 1 nhid 2" + log_test $? 0 "Dump all nexthop buckets with a specific nexthop identifier" + + run_cmd "$IP nexthop bucket list id 111" + log_test $? 2 "Dump all nexthop buckets in a non-existent group" + + run_cmd "$IP nexthop bucket list id 201" + log_test $? 2 "Dump all nexthop buckets in a non-resilient group" + + run_cmd "$IP nexthop bucket list dev bla" + log_test $? 255 "Dump all nexthop buckets using a non-existent device" + + run_cmd "$IP nexthop bucket list groups" + log_test $? 255 "Dump all nexthop buckets with invalid 'groups' keyword" + + run_cmd "$IP nexthop bucket list fdb" + log_test $? 255 "Dump all nexthop buckets with invalid 'fdb' keyword" + + # + # resilient nexthop buckets get requests + # + + check_nexthop_bucket "get id 101 index 0" "id 101 index 0 nhid 2" + log_test $? 0 "Get a valid nexthop bucket" + + run_cmd "$IP nexthop bucket get id 101 index 999" + log_test $? 2 "Get a nexthop bucket with valid group, but invalid index" + + run_cmd "$IP nexthop bucket get id 201 index 0" + log_test $? 2 "Get a nexthop bucket from a non-resilient group" + + run_cmd "$IP nexthop bucket get id 999 index 0" + log_test $? 2 "Get a nexthop bucket from a non-existent group" + + # + # tests for bucket migration + # + + $IP nexthop flush >/dev/null 2>&1 + + run_cmd "$IP nexthop add id 1 dev veth1" + run_cmd "$IP nexthop add id 2 dev veth3" + run_cmd "$IP nexthop add id 101 + group 1/2 type resilient buckets 10 + idle_timer 1 unbalanced_timer 20" + + check_nexthop_buckets_balance "list id 101" \ + "nhid 1" "== 5" \ + "nhid 2" "== 5" + log_test $? 0 "Initial bucket allocation" + + run_cmd "$IP nexthop replace id 101 + group 1,2/2,3 type resilient" + check_nexthop_buckets_balance "list id 101" \ + "nhid 1" "== 4" \ + "nhid 2" "== 6" + log_test $? 0 "Bucket allocation after replace" + + # Check that increase in idle timer does not make buckets appear busy. + run_cmd "$IP nexthop replace id 101 + group 1,2/2,3 type resilient + idle_timer 10" + run_cmd "$IP nexthop replace id 101 + group 1/2 type resilient" + check_nexthop_buckets_balance "list id 101" \ + "nhid 1" "== 5" \ + "nhid 2" "== 5" + log_test $? 0 "Buckets migrated after idle timer change" + + $IP nexthop flush >/dev/null 2>&1 } ################################################################################ diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 2b5707738609..76d9487fb03c 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -9,7 +9,7 @@ ret=0 ksft_skip=4 # all tests in this script. Can be overridden with -t option -TESTS="unregister down carrier nexthop suppress ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr" +TESTS="unregister down carrier nexthop suppress ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr ipv4_mangle ipv6_mangle" VERBOSE=0 PAUSE_ON_FAIL=no @@ -1653,6 +1653,154 @@ ipv4_route_v6_gw_test() route_cleanup } +socat_check() +{ + if [ ! -x "$(command -v socat)" ]; then + echo "socat command not found. Skipping test" + return 1 + fi + + return 0 +} + +iptables_check() +{ + iptables -t mangle -L OUTPUT &> /dev/null + if [ $? -ne 0 ]; then + echo "iptables configuration not supported. Skipping test" + return 1 + fi + + return 0 +} + +ip6tables_check() +{ + ip6tables -t mangle -L OUTPUT &> /dev/null + if [ $? -ne 0 ]; then + echo "ip6tables configuration not supported. Skipping test" + return 1 + fi + + return 0 +} + +ipv4_mangle_test() +{ + local rc + + echo + echo "IPv4 mangling tests" + + socat_check || return 1 + iptables_check || return 1 + + route_setup + sleep 2 + + local tmp_file=$(mktemp) + ip netns exec ns2 socat UDP4-LISTEN:54321,fork $tmp_file & + + # Add a FIB rule and a route that will direct our connection to the + # listening server. + $IP rule add pref 100 ipproto udp sport 12345 dport 54321 table 123 + $IP route add table 123 172.16.101.0/24 dev veth1 + + # Add an unreachable route to the main table that will block our + # connection in case the FIB rule is not hit. + $IP route add unreachable 172.16.101.2/32 + + run_cmd "echo a | $NS_EXEC socat STDIN UDP4:172.16.101.2:54321,sourceport=12345" + log_test $? 0 " Connection with correct parameters" + + run_cmd "echo a | $NS_EXEC socat STDIN UDP4:172.16.101.2:54321,sourceport=11111" + log_test $? 1 " Connection with incorrect parameters" + + # Add a mangling rule and make sure connection is still successful. + $NS_EXEC iptables -t mangle -A OUTPUT -j MARK --set-mark 1 + + run_cmd "echo a | $NS_EXEC socat STDIN UDP4:172.16.101.2:54321,sourceport=12345" + log_test $? 0 " Connection with correct parameters - mangling" + + # Delete the mangling rule and make sure connection is still + # successful. + $NS_EXEC iptables -t mangle -D OUTPUT -j MARK --set-mark 1 + + run_cmd "echo a | $NS_EXEC socat STDIN UDP4:172.16.101.2:54321,sourceport=12345" + log_test $? 0 " Connection with correct parameters - no mangling" + + # Verify connections were indeed successful on server side. + [[ $(cat $tmp_file | wc -l) -eq 3 ]] + log_test $? 0 " Connection check - server side" + + $IP route del unreachable 172.16.101.2/32 + $IP route del table 123 172.16.101.0/24 dev veth1 + $IP rule del pref 100 + + { kill %% && wait %%; } 2>/dev/null + rm $tmp_file + + route_cleanup +} + +ipv6_mangle_test() +{ + local rc + + echo + echo "IPv6 mangling tests" + + socat_check || return 1 + ip6tables_check || return 1 + + route_setup + sleep 2 + + local tmp_file=$(mktemp) + ip netns exec ns2 socat UDP6-LISTEN:54321,fork $tmp_file & + + # Add a FIB rule and a route that will direct our connection to the + # listening server. + $IP -6 rule add pref 100 ipproto udp sport 12345 dport 54321 table 123 + $IP -6 route add table 123 2001:db8:101::/64 dev veth1 + + # Add an unreachable route to the main table that will block our + # connection in case the FIB rule is not hit. + $IP -6 route add unreachable 2001:db8:101::2/128 + + run_cmd "echo a | $NS_EXEC socat STDIN UDP6:[2001:db8:101::2]:54321,sourceport=12345" + log_test $? 0 " Connection with correct parameters" + + run_cmd "echo a | $NS_EXEC socat STDIN UDP6:[2001:db8:101::2]:54321,sourceport=11111" + log_test $? 1 " Connection with incorrect parameters" + + # Add a mangling rule and make sure connection is still successful. + $NS_EXEC ip6tables -t mangle -A OUTPUT -j MARK --set-mark 1 + + run_cmd "echo a | $NS_EXEC socat STDIN UDP6:[2001:db8:101::2]:54321,sourceport=12345" + log_test $? 0 " Connection with correct parameters - mangling" + + # Delete the mangling rule and make sure connection is still + # successful. + $NS_EXEC ip6tables -t mangle -D OUTPUT -j MARK --set-mark 1 + + run_cmd "echo a | $NS_EXEC socat STDIN UDP6:[2001:db8:101::2]:54321,sourceport=12345" + log_test $? 0 " Connection with correct parameters - no mangling" + + # Verify connections were indeed successful on server side. + [[ $(cat $tmp_file | wc -l) -eq 3 ]] + log_test $? 0 " Connection check - server side" + + $IP -6 route del unreachable 2001:db8:101::2/128 + $IP -6 route del table 123 2001:db8:101::/64 dev veth1 + $IP -6 rule del pref 100 + + { kill %% && wait %%; } 2>/dev/null + rm $tmp_file + + route_cleanup +} + ################################################################################ # usage @@ -1725,6 +1873,8 @@ do ipv6_route_metrics) ipv6_route_metrics_test;; ipv4_route_metrics) ipv4_route_metrics_test;; ipv4_route_v6_gw) ipv4_route_v6_gw_test;; + ipv4_mangle) ipv4_mangle_test;; + ipv6_mangle) ipv6_mangle_test;; help) echo "Test names: $TESTS"; exit 0;; esac diff --git a/tools/testing/selftests/net/forwarding/dual_vxlan_bridge.sh b/tools/testing/selftests/net/forwarding/dual_vxlan_bridge.sh new file mode 100755 index 000000000000..5148d97a5df8 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/dual_vxlan_bridge.sh @@ -0,0 +1,366 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +--------------------+ +----------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | + h1.10 | | + h2.20 | +# | | 192.0.2.1/28 | | | 192.0.2.2/28 | +# | | | | | | +# | + $h1 | | + $h2 | +# | | | | | | +# +----|---------------+ +--|-------------------+ +# | | +# +----|--------------------------------------------------|--------------------+ +# | SW | | | +# | +--|-------------------------------+ +----------------|------------------+ | +# | | + $swp1 BR1 (802.1ad) | | BR2 (802.1d) + $swp2 | | +# | | vid 100 pvid untagged | | | | | +# | | | | + $swp2.20 | | +# | | | | | | +# | | + vx100 (vxlan) | | + vx200 (vxlan) | | +# | | local 192.0.2.17 | | local 192.0.2.17 | | +# | | remote 192.0.2.34 | | remote 192.0.2.50 | | +# | | id 1000 dstport $VXPORT | | id 2000 dstport $VXPORT | | +# | | vid 100 pvid untagged | | | | +# | +--------------------------------- + +-----------------------------------+ | +# | | +# | 192.0.2.32/28 via 192.0.2.18 | +# | 192.0.2.48/28 via 192.0.2.18 | +# | | +# | + $rp1 | +# | | 192.0.2.17/28 | +# +----|-----------------------------------------------------------------------+ +# | +# +----|--------------------------------------------------------+ +# | | VRP2 (vrf) | +# | + $rp2 | +# | 192.0.2.18/28 | +# | | (maybe) HW +# ============================================================================= +# | | (likely) SW +# | + v1 (veth) + v3 (veth) | +# | | 192.0.2.33/28 | 192.0.2.49/28 | +# +----|---------------------------------------|----------------+ +# | | +# +----|------------------------------+ +----|------------------------------+ +# | + v2 (veth) NS1 (netns) | | + v4 (veth) NS2 (netns) | +# | 192.0.2.34/28 | | 192.0.2.50/28 | +# | | | | +# | 192.0.2.16/28 via 192.0.2.33 | | 192.0.2.16/28 via 192.0.2.49 | +# | 192.0.2.50/32 via 192.0.2.33 | | 192.0.2.34/32 via 192.0.2.49 | +# | | | | +# | +-------------------------------+ | | +-------------------------------+ | +# | | BR3 (802.1ad) | | | | BR3 (802.1d) | | +# | | + vx100 (vxlan) | | | | + vx200 (vxlan) | | +# | | local 192.0.2.34 | | | | local 192.0.2.50 | | +# | | remote 192.0.2.17 | | | | remote 192.0.2.17 | | +# | | remote 192.0.2.50 | | | | remote 192.0.2.34 | | +# | | id 1000 dstport $VXPORT | | | | id 2000 dstport $VXPORT | | +# | | vid 100 pvid untagged | | | | | | +# | | | | | | + w1.20 | | +# | | | | | | | | | +# | | + w1 (veth) | | | | + w1 (veth) | | +# | | | vid 100 pvid untagged | | | | | | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | | | | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | | VW2 (vrf) | | | | | VW2 (vrf) | | +# | | + w2 (veth) | | | | + w2 (veth) | | +# | | | | | | | | | | +# | | | | | | | | | | +# | | + w2.10 | | | | + w2.20 | | +# | | 192.0.2.3/28 | | | | 192.0.2.4/28 | | +# | +-------------------------------+ | | +-------------------------------+ | +# +-----------------------------------+ +-----------------------------------+ + +: ${VXPORT:=4789} +export VXPORT + +: ${ALL_TESTS:=" + ping_ipv4 + "} + +NUM_NETIFS=6 +source lib.sh + +h1_create() +{ + simple_if_init $h1 + tc qdisc add dev $h1 clsact + vlan_create $h1 10 v$h1 192.0.2.1/28 +} + +h1_destroy() +{ + vlan_destroy $h1 10 + tc qdisc del dev $h1 clsact + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + tc qdisc add dev $h2 clsact + vlan_create $h2 20 v$h2 192.0.2.2/28 +} + +h2_destroy() +{ + vlan_destroy $h2 20 + tc qdisc del dev $h2 clsact + simple_if_fini $h2 +} + +rp1_set_addr() +{ + ip address add dev $rp1 192.0.2.17/28 + + ip route add 192.0.2.32/28 nexthop via 192.0.2.18 + ip route add 192.0.2.48/28 nexthop via 192.0.2.18 +} + +rp1_unset_addr() +{ + ip route del 192.0.2.48/28 nexthop via 192.0.2.18 + ip route del 192.0.2.32/28 nexthop via 192.0.2.18 + + ip address del dev $rp1 192.0.2.17/28 +} + +switch_create() +{ + #### BR1 #### + ip link add name br1 type bridge vlan_filtering 1 \ + vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip link set dev br1 address $(mac_get $swp1) + ip link set dev br1 up + + #### BR2 #### + ip link add name br2 type bridge vlan_filtering 0 mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip link set dev br2 address $(mac_get $swp2) + ip link set dev br2 up + + ip link set dev $rp1 up + rp1_set_addr + + #### VX100 #### + ip link add name vx100 type vxlan id 1000 local 192.0.2.17 \ + dstport "$VXPORT" nolearning noudpcsum tos inherit ttl 100 + ip link set dev vx100 up + + ip link set dev vx100 master br1 + bridge vlan add vid 100 dev vx100 pvid untagged + + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + bridge vlan add vid 100 dev $swp1 pvid untagged + + #### VX200 #### + ip link add name vx200 type vxlan id 2000 local 192.0.2.17 \ + dstport "$VXPORT" nolearning noudpcsum tos inherit ttl 100 + ip link set dev vx200 up + + ip link set dev vx200 master br2 + + ip link set dev $swp2 up + ip link add name $swp2.20 link $swp2 type vlan id 20 + ip link set dev $swp2.20 master br2 + ip link set dev $swp2.20 up + + bridge fdb append dev vx100 00:00:00:00:00:00 dst 192.0.2.34 self + bridge fdb append dev vx200 00:00:00:00:00:00 dst 192.0.2.50 self +} + +switch_destroy() +{ + bridge fdb del dev vx200 00:00:00:00:00:00 dst 192.0.2.50 self + bridge fdb del dev vx100 00:00:00:00:00:00 dst 192.0.2.34 self + + ip link set dev vx200 nomaster + ip link set dev vx200 down + ip link del dev vx200 + + ip link del dev $swp2.20 + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + + bridge vlan del vid 100 dev $swp1 + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + ip link set dev vx100 nomaster + ip link set dev vx100 down + ip link del dev vx100 + + rp1_unset_addr + ip link set dev $rp1 down + + ip link set dev br2 down + ip link del dev br2 + + ip link set dev br1 down + ip link del dev br1 +} + +vrp2_create() +{ + simple_if_init $rp2 192.0.2.18/28 + __simple_if_init v1 v$rp2 192.0.2.33/28 + __simple_if_init v3 v$rp2 192.0.2.49/28 + tc qdisc add dev v1 clsact +} + +vrp2_destroy() +{ + tc qdisc del dev v1 clsact + __simple_if_fini v3 192.0.2.49/28 + __simple_if_fini v1 192.0.2.33/28 + simple_if_fini $rp2 192.0.2.18/28 +} + +ns_init_common() +{ + local in_if=$1; shift + local in_addr=$1; shift + local other_in_addr=$1; shift + local vxlan_name=$1; shift + local vxlan_id=$1; shift + local vlan_id=$1; shift + local host_addr=$1; shift + local nh_addr=$1; shift + + ip link set dev $in_if up + ip address add dev $in_if $in_addr/28 + tc qdisc add dev $in_if clsact + + ip link add name br3 type bridge vlan_filtering 0 + ip link set dev br3 up + + ip link add name w1 type veth peer name w2 + + ip link set dev w1 master br3 + ip link set dev w1 up + + ip link add name $vxlan_name type vxlan id $vxlan_id local $in_addr \ + dstport "$VXPORT" + ip link set dev $vxlan_name up + bridge fdb append dev $vxlan_name 00:00:00:00:00:00 dst 192.0.2.17 self + bridge fdb append dev $vxlan_name 00:00:00:00:00:00 dst $other_in_addr self + + ip link set dev $vxlan_name master br3 + tc qdisc add dev $vxlan_name clsact + + simple_if_init w2 + vlan_create w2 $vlan_id vw2 $host_addr/28 + + ip route add 192.0.2.16/28 nexthop via $nh_addr + ip route add $other_in_addr/32 nexthop via $nh_addr +} +export -f ns_init_common + +ns1_create() +{ + ip netns add ns1 + ip link set dev v2 netns ns1 + in_ns ns1 \ + ns_init_common v2 192.0.2.34 192.0.2.50 vx100 1000 10 192.0.2.3 \ + 192.0.2.33 + + in_ns ns1 bridge vlan add vid 100 dev vx100 pvid untagged +} + +ns1_destroy() +{ + ip netns exec ns1 ip link set dev v2 netns 1 + ip netns del ns1 +} + +ns2_create() +{ + ip netns add ns2 + ip link set dev v4 netns ns2 + in_ns ns2 \ + ns_init_common v4 192.0.2.50 192.0.2.34 vx200 2000 20 192.0.2.4 \ + 192.0.2.49 + + in_ns ns2 ip link add name w1.20 link w1 type vlan id 20 + in_ns ns2 ip link set dev w1.20 master br3 + in_ns ns2 ip link set dev w1.20 up +} + +ns2_destroy() +{ + ip netns exec ns2 ip link set dev v4 netns 1 + ip netns del ns2 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp1=${NETIFS[p5]} + rp2=${NETIFS[p6]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + switch_create + + ip link add name v1 type veth peer name v2 + ip link add name v3 type veth peer name v4 + vrp2_create + ns1_create + ns2_create + + r1_mac=$(in_ns ns1 mac_get w2) + r2_mac=$(in_ns ns2 mac_get w2) + h2_mac=$(mac_get $h2) +} + +cleanup() +{ + pre_cleanup + + ns2_destroy + ns1_destroy + vrp2_destroy + ip link del dev v3 + ip link del dev v1 + + switch_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.3 ": local->remote 1 through VxLAN with an 802.1ad bridge" + ping_test $h2 192.0.2.4 ": local->remote 2 through VxLAN with an 802.1d bridge" +} + +test_all() +{ + echo "Running tests with UDP port $VXPORT" + tests_run +} + +trap cleanup EXIT + +setup_prepare +setup_wait +test_all + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/fib_offload_lib.sh b/tools/testing/selftests/net/forwarding/fib_offload_lib.sh index 66496659bea7..e134a5f529c9 100644 --- a/tools/testing/selftests/net/forwarding/fib_offload_lib.sh +++ b/tools/testing/selftests/net/forwarding/fib_offload_lib.sh @@ -224,7 +224,7 @@ fib_ipv4_plen_test() ip -n $ns link set dev dummy1 up # Add two routes with the same key and different prefix length and - # make sure both are in hardware. It can be verfied that both are + # make sure both are in hardware. It can be verified that both are # sharing the same leaf by checking the /proc/net/fib_trie ip -n $ns route add 192.0.2.0/24 dev dummy1 ip -n $ns route add 192.0.2.0/25 dev dummy1 diff --git a/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh b/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh new file mode 100755 index 000000000000..088b65e64d66 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh @@ -0,0 +1,361 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test traffic distribution when a wECMP route forwards traffic to two GRE +# tunnels. +# +# +-------------------------+ +# | H1 | +# | $h1 + | +# | 192.0.2.1/28 | | +# | 2001:db8:1::1/64 | | +# +-------------------|-----+ +# | +# +-------------------|------------------------+ +# | SW1 | | +# | $ol1 + | +# | 192.0.2.2/28 | +# | 2001:db8:1::2/64 | +# | | +# | + g1a (gre) + g1b (gre) | +# | loc=192.0.2.65 loc=192.0.2.81 | +# | rem=192.0.2.66 --. rem=192.0.2.82 --. | +# | tos=inherit | tos=inherit | | +# | .------------------' | | +# | | .------------------' | +# | v v | +# | + $ul1.111 (vlan) + $ul1.222 (vlan) | +# | | 192.0.2.129/28 | 192.0.2.145/28 | +# | \ / | +# | \________________/ | +# | | | +# | + $ul1 | +# +------------|-------------------------------+ +# | +# +------------|-------------------------------+ +# | SW2 + $ul2 | +# | _______|________ | +# | / \ | +# | / \ | +# | + $ul2.111 (vlan) + $ul2.222 (vlan) | +# | ^ 192.0.2.130/28 ^ 192.0.2.146/28 | +# | | | | +# | | '------------------. | +# | '------------------. | | +# | + g2a (gre) | + g2b (gre) | | +# | loc=192.0.2.66 | loc=192.0.2.82 | | +# | rem=192.0.2.65 --' rem=192.0.2.81 --' | +# | tos=inherit tos=inherit | +# | | +# | $ol2 + | +# | 192.0.2.17/28 | | +# | 2001:db8:2::1/64 | | +# +-------------------|------------------------+ +# | +# +-------------------|-----+ +# | H2 | | +# | $h2 + | +# | 192.0.2.18/28 | +# | 2001:db8:2::2/64 | +# +-------------------------+ + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + multipath_ipv4 + multipath_ipv6 + multipath_ipv6_l4 +" + +NUM_NETIFS=6 +source lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64 + ip route add vrf v$h1 192.0.2.16/28 via 192.0.2.2 + ip route add vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2 +} + +h1_destroy() +{ + ip route del vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2 + ip route del vrf v$h1 192.0.2.16/28 via 192.0.2.2 + simple_if_fini $h1 192.0.2.1/28 +} + +sw1_create() +{ + simple_if_init $ol1 192.0.2.2/28 2001:db8:1::2/64 + __simple_if_init $ul1 v$ol1 + vlan_create $ul1 111 v$ol1 192.0.2.129/28 + vlan_create $ul1 222 v$ol1 192.0.2.145/28 + + tunnel_create g1a gre 192.0.2.65 192.0.2.66 tos inherit dev v$ol1 + __simple_if_init g1a v$ol1 192.0.2.65/32 + ip route add vrf v$ol1 192.0.2.66/32 via 192.0.2.130 + + tunnel_create g1b gre 192.0.2.81 192.0.2.82 tos inherit dev v$ol1 + __simple_if_init g1b v$ol1 192.0.2.81/32 + ip route add vrf v$ol1 192.0.2.82/32 via 192.0.2.146 + + ip -6 nexthop add id 101 dev g1a + ip -6 nexthop add id 102 dev g1b + ip nexthop add id 103 group 101/102 type resilient buckets 512 \ + idle_timer 0 + + ip route add vrf v$ol1 192.0.2.16/28 nhid 103 + ip route add vrf v$ol1 2001:db8:2::/64 nhid 103 +} + +sw1_destroy() +{ + ip route del vrf v$ol1 2001:db8:2::/64 + ip route del vrf v$ol1 192.0.2.16/28 + + ip nexthop del id 103 + ip -6 nexthop del id 102 + ip -6 nexthop del id 101 + + ip route del vrf v$ol1 192.0.2.82/32 via 192.0.2.146 + __simple_if_fini g1b 192.0.2.81/32 + tunnel_destroy g1b + + ip route del vrf v$ol1 192.0.2.66/32 via 192.0.2.130 + __simple_if_fini g1a 192.0.2.65/32 + tunnel_destroy g1a + + vlan_destroy $ul1 222 + vlan_destroy $ul1 111 + __simple_if_fini $ul1 + simple_if_fini $ol1 192.0.2.2/28 2001:db8:1::2/64 +} + +sw2_create() +{ + simple_if_init $ol2 192.0.2.17/28 2001:db8:2::1/64 + __simple_if_init $ul2 v$ol2 + vlan_create $ul2 111 v$ol2 192.0.2.130/28 + vlan_create $ul2 222 v$ol2 192.0.2.146/28 + + tunnel_create g2a gre 192.0.2.66 192.0.2.65 tos inherit dev v$ol2 + __simple_if_init g2a v$ol2 192.0.2.66/32 + ip route add vrf v$ol2 192.0.2.65/32 via 192.0.2.129 + + tunnel_create g2b gre 192.0.2.82 192.0.2.81 tos inherit dev v$ol2 + __simple_if_init g2b v$ol2 192.0.2.82/32 + ip route add vrf v$ol2 192.0.2.81/32 via 192.0.2.145 + + ip -6 nexthop add id 201 dev g2a + ip -6 nexthop add id 202 dev g2b + ip nexthop add id 203 group 201/202 type resilient buckets 512 \ + idle_timer 0 + + ip route add vrf v$ol2 192.0.2.0/28 nhid 203 + ip route add vrf v$ol2 2001:db8:1::/64 nhid 203 + + tc qdisc add dev $ul2 clsact + tc filter add dev $ul2 ingress pref 111 prot 802.1Q \ + flower vlan_id 111 action pass + tc filter add dev $ul2 ingress pref 222 prot 802.1Q \ + flower vlan_id 222 action pass +} + +sw2_destroy() +{ + tc qdisc del dev $ul2 clsact + + ip route del vrf v$ol2 2001:db8:1::/64 + ip route del vrf v$ol2 192.0.2.0/28 + + ip nexthop del id 203 + ip -6 nexthop del id 202 + ip -6 nexthop del id 201 + + ip route del vrf v$ol2 192.0.2.81/32 via 192.0.2.145 + __simple_if_fini g2b 192.0.2.82/32 + tunnel_destroy g2b + + ip route del vrf v$ol2 192.0.2.65/32 via 192.0.2.129 + __simple_if_fini g2a 192.0.2.66/32 + tunnel_destroy g2a + + vlan_destroy $ul2 222 + vlan_destroy $ul2 111 + __simple_if_fini $ul2 + simple_if_fini $ol2 192.0.2.17/28 2001:db8:2::1/64 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.18/28 2001:db8:2::2/64 + ip route add vrf v$h2 192.0.2.0/28 via 192.0.2.17 + ip route add vrf v$h2 2001:db8:1::/64 via 2001:db8:2::1 +} + +h2_destroy() +{ + ip route del vrf v$h2 2001:db8:1::/64 via 2001:db8:2::1 + ip route del vrf v$h2 192.0.2.0/28 via 192.0.2.17 + simple_if_fini $h2 192.0.2.18/28 2001:db8:2::2/64 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + ol1=${NETIFS[p2]} + + ul1=${NETIFS[p3]} + ul2=${NETIFS[p4]} + + ol2=${NETIFS[p5]} + h2=${NETIFS[p6]} + + vrf_prepare + h1_create + sw1_create + sw2_create + h2_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + h2_destroy + sw2_destroy + sw1_destroy + h1_destroy + vrf_cleanup +} + +multipath4_test() +{ + local what=$1; shift + local weight1=$1; shift + local weight2=$1; shift + + sysctl_set net.ipv4.fib_multipath_hash_policy 1 + ip nexthop replace id 103 group 101,$weight1/102,$weight2 \ + type resilient + + local t0_111=$(tc_rule_stats_get $ul2 111 ingress) + local t0_222=$(tc_rule_stats_get $ul2 222 ingress) + + ip vrf exec v$h1 \ + $MZ $h1 -q -p 64 -A 192.0.2.1 -B 192.0.2.18 \ + -d 1msec -t udp "sp=1024,dp=0-32768" + + local t1_111=$(tc_rule_stats_get $ul2 111 ingress) + local t1_222=$(tc_rule_stats_get $ul2 222 ingress) + + local d111=$((t1_111 - t0_111)) + local d222=$((t1_222 - t0_222)) + multipath_eval "$what" $weight1 $weight2 $d111 $d222 + + ip nexthop replace id 103 group 101/102 type resilient + sysctl_restore net.ipv4.fib_multipath_hash_policy +} + +multipath6_test() +{ + local what=$1; shift + local weight1=$1; shift + local weight2=$1; shift + + sysctl_set net.ipv6.fib_multipath_hash_policy 0 + ip nexthop replace id 103 group 101,$weight1/102,$weight2 \ + type resilient + + local t0_111=$(tc_rule_stats_get $ul2 111 ingress) + local t0_222=$(tc_rule_stats_get $ul2 222 ingress) + + # Generate 16384 echo requests, each with a random flow label. + for ((i=0; i < 16384; ++i)); do + ip vrf exec v$h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null + done + + local t1_111=$(tc_rule_stats_get $ul2 111 ingress) + local t1_222=$(tc_rule_stats_get $ul2 222 ingress) + + local d111=$((t1_111 - t0_111)) + local d222=$((t1_222 - t0_222)) + multipath_eval "$what" $weight1 $weight2 $d111 $d222 + + ip nexthop replace id 103 group 101/102 type resilient + sysctl_restore net.ipv6.fib_multipath_hash_policy +} + +multipath6_l4_test() +{ + local what=$1; shift + local weight1=$1; shift + local weight2=$1; shift + + sysctl_set net.ipv6.fib_multipath_hash_policy 1 + ip nexthop replace id 103 group 101,$weight1/102,$weight2 \ + type resilient + + local t0_111=$(tc_rule_stats_get $ul2 111 ingress) + local t0_222=$(tc_rule_stats_get $ul2 222 ingress) + + ip vrf exec v$h1 \ + $MZ $h1 -6 -q -p 64 -A 2001:db8:1::1 -B 2001:db8:2::2 \ + -d 1msec -t udp "sp=1024,dp=0-32768" + + local t1_111=$(tc_rule_stats_get $ul2 111 ingress) + local t1_222=$(tc_rule_stats_get $ul2 222 ingress) + + local d111=$((t1_111 - t0_111)) + local d222=$((t1_222 - t0_222)) + multipath_eval "$what" $weight1 $weight2 $d111 $d222 + + ip nexthop replace id 103 group 101/102 type resilient + sysctl_restore net.ipv6.fib_multipath_hash_policy +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.18 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::2 +} + +multipath_ipv4() +{ + log_info "Running IPv4 multipath tests" + multipath4_test "ECMP" 1 1 + multipath4_test "Weighted MP 2:1" 2 1 + multipath4_test "Weighted MP 11:45" 11 45 +} + +multipath_ipv6() +{ + log_info "Running IPv6 multipath tests" + multipath6_test "ECMP" 1 1 + multipath6_test "Weighted MP 2:1" 2 1 + multipath6_test "Weighted MP 11:45" 11 45 +} + +multipath_ipv6_l4() +{ + log_info "Running IPv6 L4 hash multipath tests" + multipath6_l4_test "ECMP" 1 1 + multipath6_l4_test "Weighted MP 2:1" 2 1 + multipath6_l4_test "Weighted MP 11:45" 11 45 +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index be71012b8fc5..42e28c983d41 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -353,6 +353,11 @@ wait_for_offload() "$@" | grep -q offload } +wait_for_trap() +{ + "$@" | grep -q trap +} + until_counter_is() { local expr=$1; shift @@ -767,6 +772,15 @@ rate() echo $((8 * (t1 - t0) / interval)) } +packets_rate() +{ + local t0=$1; shift + local t1=$1; shift + local interval=$1; shift + + echo $(((t1 - t0) / interval)) +} + mac_get() { local if_name=$1 diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh index c02291e9841e..880e3ab9d088 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh @@ -271,7 +271,7 @@ test_span_gre_fdb_roaming() while ((RET == 0)); do bridge fdb del dev $swp3 $h3mac vlan 555 master 2>/dev/null - bridge fdb add dev $swp2 $h3mac vlan 555 master + bridge fdb add dev $swp2 $h3mac vlan 555 master static sleep 1 fail_test_span_gre_dir $tundev ingress diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh index 13db1cb50e57..6406cd76a19d 100644 --- a/tools/testing/selftests/net/forwarding/mirror_lib.sh +++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh @@ -20,6 +20,13 @@ mirror_uninstall() tc filter del dev $swp1 $direction pref 1000 } +is_ipv6() +{ + local addr=$1; shift + + [[ -z ${addr//[0-9a-fA-F:]/} ]] +} + mirror_test() { local vrf_name=$1; shift @@ -29,9 +36,17 @@ mirror_test() local pref=$1; shift local expect=$1; shift + if is_ipv6 $dip; then + local proto=-6 + local type="icmp6 type=128" # Echo request. + else + local proto= + local type="icmp echoreq" + fi + local t0=$(tc_rule_stats_get $dev $pref) - $MZ $vrf_name ${sip:+-A $sip} -B $dip -a own -b bc -q \ - -c 10 -d 100msec -t icmp type=8 + $MZ $proto $vrf_name ${sip:+-A $sip} -B $dip -a own -b bc -q \ + -c 10 -d 100msec -t $type sleep 0.5 local t1=$(tc_rule_stats_get $dev $pref) local delta=$((t1 - t0)) diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh new file mode 100755 index 000000000000..4898dd4118f1 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh @@ -0,0 +1,400 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + multipath_test +" +NUM_NETIFS=8 +source lib.sh + +h1_create() +{ + vrf_create "vrf-h1" + ip link set dev $h1 master vrf-h1 + + ip link set dev vrf-h1 up + ip link set dev $h1 up + + ip address add 192.0.2.2/24 dev $h1 + ip address add 2001:db8:1::2/64 dev $h1 + + ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1 + ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1 +} + +h1_destroy() +{ + ip route del 2001:db8:2::/64 vrf vrf-h1 + ip route del 198.51.100.0/24 vrf vrf-h1 + + ip address del 2001:db8:1::2/64 dev $h1 + ip address del 192.0.2.2/24 dev $h1 + + ip link set dev $h1 down + vrf_destroy "vrf-h1" +} + +h2_create() +{ + vrf_create "vrf-h2" + ip link set dev $h2 master vrf-h2 + + ip link set dev vrf-h2 up + ip link set dev $h2 up + + ip address add 198.51.100.2/24 dev $h2 + ip address add 2001:db8:2::2/64 dev $h2 + + ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1 + ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1 +} + +h2_destroy() +{ + ip route del 2001:db8:1::/64 vrf vrf-h2 + ip route del 192.0.2.0/24 vrf vrf-h2 + + ip address del 2001:db8:2::2/64 dev $h2 + ip address del 198.51.100.2/24 dev $h2 + + ip link set dev $h2 down + vrf_destroy "vrf-h2" +} + +router1_create() +{ + vrf_create "vrf-r1" + ip link set dev $rp11 master vrf-r1 + ip link set dev $rp12 master vrf-r1 + ip link set dev $rp13 master vrf-r1 + + ip link set dev vrf-r1 up + ip link set dev $rp11 up + ip link set dev $rp12 up + ip link set dev $rp13 up + + ip address add 192.0.2.1/24 dev $rp11 + ip address add 2001:db8:1::1/64 dev $rp11 + + ip address add 169.254.2.12/24 dev $rp12 + ip address add fe80:2::12/64 dev $rp12 + + ip address add 169.254.3.13/24 dev $rp13 + ip address add fe80:3::13/64 dev $rp13 +} + +router1_destroy() +{ + ip route del 2001:db8:2::/64 vrf vrf-r1 + ip route del 198.51.100.0/24 vrf vrf-r1 + + ip address del fe80:3::13/64 dev $rp13 + ip address del 169.254.3.13/24 dev $rp13 + + ip address del fe80:2::12/64 dev $rp12 + ip address del 169.254.2.12/24 dev $rp12 + + ip address del 2001:db8:1::1/64 dev $rp11 + ip address del 192.0.2.1/24 dev $rp11 + + ip nexthop del id 103 + ip nexthop del id 101 + ip nexthop del id 102 + ip nexthop del id 106 + ip nexthop del id 104 + ip nexthop del id 105 + + ip link set dev $rp13 down + ip link set dev $rp12 down + ip link set dev $rp11 down + + vrf_destroy "vrf-r1" +} + +router2_create() +{ + vrf_create "vrf-r2" + ip link set dev $rp21 master vrf-r2 + ip link set dev $rp22 master vrf-r2 + ip link set dev $rp23 master vrf-r2 + + ip link set dev vrf-r2 up + ip link set dev $rp21 up + ip link set dev $rp22 up + ip link set dev $rp23 up + + ip address add 198.51.100.1/24 dev $rp21 + ip address add 2001:db8:2::1/64 dev $rp21 + + ip address add 169.254.2.22/24 dev $rp22 + ip address add fe80:2::22/64 dev $rp22 + + ip address add 169.254.3.23/24 dev $rp23 + ip address add fe80:3::23/64 dev $rp23 +} + +router2_destroy() +{ + ip route del 2001:db8:1::/64 vrf vrf-r2 + ip route del 192.0.2.0/24 vrf vrf-r2 + + ip address del fe80:3::23/64 dev $rp23 + ip address del 169.254.3.23/24 dev $rp23 + + ip address del fe80:2::22/64 dev $rp22 + ip address del 169.254.2.22/24 dev $rp22 + + ip address del 2001:db8:2::1/64 dev $rp21 + ip address del 198.51.100.1/24 dev $rp21 + + ip nexthop del id 201 + ip nexthop del id 202 + ip nexthop del id 204 + ip nexthop del id 205 + + ip link set dev $rp23 down + ip link set dev $rp22 down + ip link set dev $rp21 down + + vrf_destroy "vrf-r2" +} + +routing_nh_obj() +{ + ip nexthop add id 101 via 169.254.2.22 dev $rp12 + ip nexthop add id 102 via 169.254.3.23 dev $rp13 + ip nexthop add id 103 group 101/102 type resilient buckets 512 \ + idle_timer 0 + ip route add 198.51.100.0/24 vrf vrf-r1 nhid 103 + + ip nexthop add id 104 via fe80:2::22 dev $rp12 + ip nexthop add id 105 via fe80:3::23 dev $rp13 + ip nexthop add id 106 group 104/105 type resilient buckets 512 \ + idle_timer 0 + ip route add 2001:db8:2::/64 vrf vrf-r1 nhid 106 + + ip nexthop add id 201 via 169.254.2.12 dev $rp22 + ip nexthop add id 202 via 169.254.3.13 dev $rp23 + ip nexthop add id 203 group 201/202 type resilient buckets 512 \ + idle_timer 0 + ip route add 192.0.2.0/24 vrf vrf-r2 nhid 203 + + ip nexthop add id 204 via fe80:2::12 dev $rp22 + ip nexthop add id 205 via fe80:3::13 dev $rp23 + ip nexthop add id 206 group 204/205 type resilient buckets 512 \ + idle_timer 0 + ip route add 2001:db8:1::/64 vrf vrf-r2 nhid 206 +} + +multipath4_test() +{ + local desc="$1" + local weight_rp12=$2 + local weight_rp13=$3 + local t0_rp12 t0_rp13 t1_rp12 t1_rp13 + local packets_rp12 packets_rp13 + + # Transmit multiple flows from h1 to h2 and make sure they are + # distributed between both multipath links (rp12 and rp13) + # according to the provided weights. + sysctl_set net.ipv4.fib_multipath_hash_policy 1 + + t0_rp12=$(link_stats_tx_packets_get $rp12) + t0_rp13=$(link_stats_tx_packets_get $rp13) + + ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ + -d 1msec -t udp "sp=1024,dp=0-32768" + + t1_rp12=$(link_stats_tx_packets_get $rp12) + t1_rp13=$(link_stats_tx_packets_get $rp13) + + let "packets_rp12 = $t1_rp12 - $t0_rp12" + let "packets_rp13 = $t1_rp13 - $t0_rp13" + multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13 + + # Restore settings. + sysctl_restore net.ipv4.fib_multipath_hash_policy +} + +multipath6_l4_test() +{ + local desc="$1" + local weight_rp12=$2 + local weight_rp13=$3 + local t0_rp12 t0_rp13 t1_rp12 t1_rp13 + local packets_rp12 packets_rp13 + + # Transmit multiple flows from h1 to h2 and make sure they are + # distributed between both multipath links (rp12 and rp13) + # according to the provided weights. + sysctl_set net.ipv6.fib_multipath_hash_policy 1 + + t0_rp12=$(link_stats_tx_packets_get $rp12) + t0_rp13=$(link_stats_tx_packets_get $rp13) + + $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \ + -d 1msec -t udp "sp=1024,dp=0-32768" + + t1_rp12=$(link_stats_tx_packets_get $rp12) + t1_rp13=$(link_stats_tx_packets_get $rp13) + + let "packets_rp12 = $t1_rp12 - $t0_rp12" + let "packets_rp13 = $t1_rp13 - $t0_rp13" + multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13 + + sysctl_restore net.ipv6.fib_multipath_hash_policy +} + +multipath_test() +{ + # Without an idle timer, weight replacement should happen immediately. + log_info "Running multipath tests without an idle timer" + ip nexthop replace id 103 group 101/102 type resilient idle_timer 0 + ip nexthop replace id 106 group 104/105 type resilient idle_timer 0 + + log_info "Running IPv4 multipath tests" + ip nexthop replace id 103 group 101,1/102,1 type resilient + multipath4_test "ECMP" 1 1 + ip nexthop replace id 103 group 101,2/102,1 type resilient + multipath4_test "Weighted MP 2:1" 2 1 + ip nexthop replace id 103 group 101,11/102,45 type resilient + multipath4_test "Weighted MP 11:45" 11 45 + + ip nexthop replace id 103 group 101,1/102,1 type resilient + + log_info "Running IPv6 L4 hash multipath tests" + ip nexthop replace id 106 group 104,1/105,1 type resilient + multipath6_l4_test "ECMP" 1 1 + ip nexthop replace id 106 group 104,2/105,1 type resilient + multipath6_l4_test "Weighted MP 2:1" 2 1 + ip nexthop replace id 106 group 104,11/105,45 type resilient + multipath6_l4_test "Weighted MP 11:45" 11 45 + + ip nexthop replace id 106 group 104,1/105,1 type resilient + + # With an idle timer, weight replacement should not happen, so the + # expected ratio should always be the initial one (1:1). + log_info "Running multipath tests with an idle timer of 120 seconds" + ip nexthop replace id 103 group 101/102 type resilient idle_timer 120 + ip nexthop replace id 106 group 104/105 type resilient idle_timer 120 + + log_info "Running IPv4 multipath tests" + ip nexthop replace id 103 group 101,1/102,1 type resilient + multipath4_test "ECMP" 1 1 + ip nexthop replace id 103 group 101,2/102,1 type resilient + multipath4_test "Weighted MP 2:1" 1 1 + ip nexthop replace id 103 group 101,11/102,45 type resilient + multipath4_test "Weighted MP 11:45" 1 1 + + ip nexthop replace id 103 group 101,1/102,1 type resilient + + log_info "Running IPv6 L4 hash multipath tests" + ip nexthop replace id 106 group 104,1/105,1 type resilient + multipath6_l4_test "ECMP" 1 1 + ip nexthop replace id 106 group 104,2/105,1 type resilient + multipath6_l4_test "Weighted MP 2:1" 1 1 + ip nexthop replace id 106 group 104,11/105,45 type resilient + multipath6_l4_test "Weighted MP 11:45" 1 1 + + ip nexthop replace id 106 group 104,1/105,1 type resilient + + # With a short idle timer and enough idle time, weight replacement + # should happen. + log_info "Running multipath tests with an idle timer of 5 seconds" + ip nexthop replace id 103 group 101/102 type resilient idle_timer 5 + ip nexthop replace id 106 group 104/105 type resilient idle_timer 5 + + log_info "Running IPv4 multipath tests" + sleep 10 + ip nexthop replace id 103 group 101,1/102,1 type resilient + multipath4_test "ECMP" 1 1 + sleep 10 + ip nexthop replace id 103 group 101,2/102,1 type resilient + multipath4_test "Weighted MP 2:1" 2 1 + sleep 10 + ip nexthop replace id 103 group 101,11/102,45 type resilient + multipath4_test "Weighted MP 11:45" 11 45 + + ip nexthop replace id 103 group 101,1/102,1 type resilient + + log_info "Running IPv6 L4 hash multipath tests" + sleep 10 + ip nexthop replace id 106 group 104,1/105,1 type resilient + multipath6_l4_test "ECMP" 1 1 + sleep 10 + ip nexthop replace id 106 group 104,2/105,1 type resilient + multipath6_l4_test "Weighted MP 2:1" 2 1 + sleep 10 + ip nexthop replace id 106 group 104,11/105,45 type resilient + multipath6_l4_test "Weighted MP 11:45" 11 45 + + ip nexthop replace id 106 group 104,1/105,1 type resilient +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp11=${NETIFS[p2]} + + rp12=${NETIFS[p3]} + rp22=${NETIFS[p4]} + + rp13=${NETIFS[p5]} + rp23=${NETIFS[p6]} + + rp21=${NETIFS[p7]} + h2=${NETIFS[p8]} + + vrf_prepare + + h1_create + h2_create + + router1_create + router2_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router2_destroy + router1_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 198.51.100.2 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::2 +} + +ip nexthop ls >/dev/null 2>&1 +if [ $? -ne 0 ]; then + echo "Nexthop objects not supported; skipping tests" + exit 0 +fi + +trap cleanup EXIT + +setup_prepare +setup_wait +routing_nh_obj + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tc_police.sh b/tools/testing/selftests/net/forwarding/tc_police.sh index 160f9cccdfb7..4f9f17cb45d6 100755 --- a/tools/testing/selftests/net/forwarding/tc_police.sh +++ b/tools/testing/selftests/net/forwarding/tc_police.sh @@ -35,6 +35,8 @@ ALL_TESTS=" police_shared_test police_rx_mirror_test police_tx_mirror_test + police_pps_rx_test + police_pps_tx_test " NUM_NETIFS=6 source tc_common.sh @@ -290,6 +292,60 @@ police_tx_mirror_test() police_mirror_common_test $rp2 egress "police tx and mirror" } +police_pps_common_test() +{ + local test_name=$1; shift + + RET=0 + + # Rule to measure bandwidth on ingress of $h2 + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \ + action drop + + mausezahn $h1 -a own -b $(mac_get $rp1) -A 192.0.2.1 -B 198.51.100.1 \ + -t udp sp=12345,dp=54321 -p 1000 -c 0 -q & + + local t0=$(tc_rule_stats_get $h2 1 ingress .packets) + sleep 10 + local t1=$(tc_rule_stats_get $h2 1 ingress .packets) + + local er=$((2000)) + local nr=$(packets_rate $t0 $t1 10) + local nr_pct=$((100 * (nr - er) / er)) + ((-10 <= nr_pct && nr_pct <= 10)) + check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-10%." + + log_test "$test_name" + + { kill %% && wait %%; } 2>/dev/null + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower +} + +police_pps_rx_test() +{ + # Rule to police traffic destined to $h2 on ingress of $rp1 + tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \ + dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \ + action police pkts_rate 2000 pkts_burst 400 conform-exceed drop/ok + + police_pps_common_test "police pps on rx" + + tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower +} + +police_pps_tx_test() +{ + # Rule to police traffic destined to $h2 on egress of $rp2 + tc filter add dev $rp2 egress protocol ip pref 1 handle 101 flower \ + dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \ + action police pkts_rate 2000 pkts_burst 400 conform-exceed drop/ok + + police_pps_common_test "police pps on tx" + + tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower +} + setup_prepare() { h1=${NETIFS[p1]} diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh index ce6bea9675c0..eb307ca37bfa 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh @@ -657,10 +657,21 @@ test_ecn_decap() { # In accordance with INET_ECN_decapsulate() __test_ecn_decap 00 00 0x00 + __test_ecn_decap 00 01 0x00 + __test_ecn_decap 00 02 0x00 + # 00 03 is tested in test_ecn_decap_error() + __test_ecn_decap 01 00 0x01 __test_ecn_decap 01 01 0x01 - __test_ecn_decap 02 01 0x02 + __test_ecn_decap 01 02 0x01 __test_ecn_decap 01 03 0x03 + __test_ecn_decap 02 00 0x02 + __test_ecn_decap 02 01 0x01 + __test_ecn_decap 02 02 0x02 __test_ecn_decap 02 03 0x03 + __test_ecn_decap 03 00 0x03 + __test_ecn_decap 03 01 0x03 + __test_ecn_decap 03 02 0x03 + __test_ecn_decap 03 03 0x03 test_ecn_decap_error } diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index 00bb158b4a5d..f1464f09b080 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -6,7 +6,7 @@ KSFT_KHDR_INSTALL := 1 CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \ - simult_flows.sh + simult_flows.sh mptcp_sockopt.sh TEST_GEN_FILES = mptcp_connect pm_nl_ctl diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 39edce4f541c..2674ba20d524 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -5,8 +5,9 @@ rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) ns="ns1-$rndh" ksft_skip=4 test_cnt=1 +timeout_poll=100 +timeout_test=$((timeout_poll * 2 + 1)) ret=0 -pids=() flush_pids() { @@ -14,18 +15,14 @@ flush_pids() # give it some time sleep 1.1 - for pid in ${pids[@]}; do - [ -d /proc/$pid ] && kill -SIGUSR1 $pid >/dev/null 2>&1 - done - pids=() + ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGUSR1 &>/dev/null } cleanup() { + ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGKILL &>/dev/null + ip netns del $ns - for pid in ${pids[@]}; do - [ -d /proc/$pid ] && kill -9 $pid >/dev/null 2>&1 - done } ip -Version > /dev/null 2>&1 @@ -79,39 +76,57 @@ trap cleanup EXIT ip netns add $ns ip -n $ns link set dev lo up -echo "a" | ip netns exec $ns ./mptcp_connect -p 10000 -l 0.0.0.0 -t 100 >/dev/null & +echo "a" | \ + timeout ${timeout_test} \ + ip netns exec $ns \ + ./mptcp_connect -p 10000 -l -t ${timeout_poll} \ + 0.0.0.0 >/dev/null & sleep 0.1 -pids[0]=$! chk_msk_nr 0 "no msk on netns creation" -echo "b" | ip netns exec $ns ./mptcp_connect -p 10000 127.0.0.1 -j -t 100 >/dev/null & +echo "b" | \ + timeout ${timeout_test} \ + ip netns exec $ns \ + ./mptcp_connect -p 10000 -j -t ${timeout_poll} \ + 127.0.0.1 >/dev/null & sleep 0.1 -pids[1]=$! chk_msk_nr 2 "after MPC handshake " chk_msk_remote_key_nr 2 "....chk remote_key" chk_msk_fallback_nr 0 "....chk no fallback" flush_pids -echo "a" | ip netns exec $ns ./mptcp_connect -p 10001 -s TCP -l 0.0.0.0 -t 100 >/dev/null & -pids[0]=$! +echo "a" | \ + timeout ${timeout_test} \ + ip netns exec $ns \ + ./mptcp_connect -p 10001 -l -s TCP -t ${timeout_poll} \ + 0.0.0.0 >/dev/null & sleep 0.1 -echo "b" | ip netns exec $ns ./mptcp_connect -p 10001 127.0.0.1 -j -t 100 >/dev/null & -pids[1]=$! +echo "b" | \ + timeout ${timeout_test} \ + ip netns exec $ns \ + ./mptcp_connect -p 10001 -j -t ${timeout_poll} \ + 127.0.0.1 >/dev/null & sleep 0.1 chk_msk_fallback_nr 1 "check fallback" flush_pids NR_CLIENTS=100 for I in `seq 1 $NR_CLIENTS`; do - echo "a" | ip netns exec $ns ./mptcp_connect -p $((I+10001)) -l 0.0.0.0 -t 100 -w 10 >/dev/null & - pids[$((I*2))]=$! + echo "a" | \ + timeout ${timeout_test} \ + ip netns exec $ns \ + ./mptcp_connect -p $((I+10001)) -l -w 10 \ + -t ${timeout_poll} 0.0.0.0 >/dev/null & done sleep 0.1 for I in `seq 1 $NR_CLIENTS`; do - echo "b" | ip netns exec $ns ./mptcp_connect -p $((I+10001)) 127.0.0.1 -t 100 -w 10 >/dev/null & - pids[$((I*2 + 1))]=$! + echo "b" | \ + timeout ${timeout_test} \ + ip netns exec $ns \ + ./mptcp_connect -p $((I+10001)) -w 10 \ + -t ${timeout_poll} 127.0.0.1 >/dev/null & done sleep 1.5 diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index 77bb62feb872..d88e1fdfb147 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -45,7 +45,14 @@ enum cfg_mode { CFG_MODE_SENDFILE, }; +enum cfg_peek { + CFG_NONE_PEEK, + CFG_WITH_PEEK, + CFG_AFTER_PEEK, +}; + static enum cfg_mode cfg_mode = CFG_MODE_POLL; +static enum cfg_peek cfg_peek = CFG_NONE_PEEK; static const char *cfg_host; static const char *cfg_port = "12000"; static int cfg_sock_proto = IPPROTO_MPTCP; @@ -55,7 +62,9 @@ static int cfg_sndbuf; static int cfg_rcvbuf; static bool cfg_join; static bool cfg_remove; +static unsigned int cfg_do_w; static int cfg_wait; +static uint32_t cfg_mark; static void die_usage(void) { @@ -68,8 +77,11 @@ static void die_usage(void) fprintf(stderr, "\t-p num -- use port num\n"); fprintf(stderr, "\t-s [MPTCP|TCP] -- use mptcp(default) or tcp sockets\n"); fprintf(stderr, "\t-m [poll|mmap|sendfile] -- use poll(default)/mmap+write/sendfile\n"); + fprintf(stderr, "\t-M mark -- set socket packet mark\n"); fprintf(stderr, "\t-u -- check mptcp ulp\n"); fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n"); + fprintf(stderr, + "\t-P [saveWithPeek|saveAfterPeek] -- save data with/after MSG_PEEK form tcp socket\n"); exit(1); } @@ -139,6 +151,17 @@ static void set_sndbuf(int fd, unsigned int size) } } +static void set_mark(int fd, uint32_t mark) +{ + int err; + + err = setsockopt(fd, SOL_SOCKET, SO_MARK, &mark, sizeof(mark)); + if (err) { + perror("set SO_MARK"); + exit(1); + } +} + static int sock_listen_mptcp(const char * const listenaddr, const char * const port) { @@ -247,6 +270,9 @@ static int sock_connect_mptcp(const char * const remoteaddr, continue; } + if (cfg_mark) + set_mark(sock, cfg_mark); + if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) break; /* success */ @@ -272,8 +298,8 @@ static size_t do_rnd_write(const int fd, char *buf, const size_t len) if (cfg_join && first && do_w > 100) do_w = 100; - if (cfg_remove && do_w > 50) - do_w = 50; + if (cfg_remove && do_w > cfg_do_w) + do_w = cfg_do_w; bw = write(fd, buf, do_w); if (bw < 0) @@ -314,6 +340,8 @@ static size_t do_write(const int fd, char *buf, const size_t len) static ssize_t do_rnd_read(const int fd, char *buf, const size_t len) { + int ret = 0; + char tmp[16384]; size_t cap = rand(); cap &= 0xffff; @@ -323,7 +351,17 @@ static ssize_t do_rnd_read(const int fd, char *buf, const size_t len) else if (cap > len) cap = len; - return read(fd, buf, cap); + if (cfg_peek == CFG_WITH_PEEK) { + ret = recv(fd, buf, cap, MSG_PEEK); + ret = (ret < 0) ? ret : read(fd, tmp, ret); + } else if (cfg_peek == CFG_AFTER_PEEK) { + ret = recv(fd, buf, cap, MSG_PEEK); + ret = (ret < 0) ? ret : read(fd, buf, cap); + } else { + ret = read(fd, buf, cap); + } + + return ret; } static void set_nonblock(int fd) @@ -802,6 +840,26 @@ int parse_mode(const char *mode) return 0; } +int parse_peek(const char *mode) +{ + if (!strcasecmp(mode, "saveWithPeek")) + return CFG_WITH_PEEK; + if (!strcasecmp(mode, "saveAfterPeek")) + return CFG_AFTER_PEEK; + + fprintf(stderr, "Unknown: %s\n", mode); + fprintf(stderr, "Supported MSG_PEEK mode are:\n"); + fprintf(stderr, + "\t\t\"saveWithPeek\" - recv data with flags 'MSG_PEEK' and save the peek data into file\n"); + fprintf(stderr, + "\t\t\"saveAfterPeek\" - read and save data into file after recv with flags 'MSG_PEEK'\n"); + + die_usage(); + + /* silence compiler warning */ + return 0; +} + static int parse_int(const char *size) { unsigned long s; @@ -829,7 +887,7 @@ static void parse_opts(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "6jrlp:s:hut:m:S:R:w:")) != -1) { + while ((c = getopt(argc, argv, "6jr:lp:s:hut:m:S:R:w:M:P:")) != -1) { switch (c) { case 'j': cfg_join = true; @@ -840,6 +898,9 @@ static void parse_opts(int argc, char **argv) cfg_remove = true; cfg_mode = CFG_MODE_POLL; cfg_wait = 400000; + cfg_do_w = atoi(optarg); + if (cfg_do_w <= 0) + cfg_do_w = 50; break; case 'l': listen_mode = true; @@ -876,6 +937,12 @@ static void parse_opts(int argc, char **argv) case 'w': cfg_wait = atoi(optarg)*1000000; break; + case 'M': + cfg_mark = strtol(optarg, NULL, 0); + break; + case 'P': + cfg_peek = parse_peek(optarg); + break; } } @@ -907,6 +974,8 @@ int main(int argc, char *argv[]) set_rcvbuf(fd, cfg_rcvbuf); if (cfg_sndbuf) set_sndbuf(fd, cfg_sndbuf); + if (cfg_mark) + set_mark(fd, cfg_mark); return main_loop_s(fd); } diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 10a030b53b23..9236609731b1 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -11,7 +11,8 @@ cin="" cout="" ksft_skip=4 capture=false -timeout=30 +timeout_poll=30 +timeout_test=$((timeout_poll * 2 + 1)) ipv6=true ethtool_random_on=true tc_delay="$((RANDOM%50))" @@ -273,7 +274,7 @@ check_mptcp_disabled() ip netns exec ${disabled_ns} sysctl -q net.mptcp.enabled=0 local err=0 - LANG=C ip netns exec ${disabled_ns} ./mptcp_connect -t $timeout -p 10000 -s MPTCP 127.0.0.1 < "$cin" 2>&1 | \ + LANG=C ip netns exec ${disabled_ns} ./mptcp_connect -p 10000 -s MPTCP 127.0.0.1 < "$cin" 2>&1 | \ grep -q "^socket: Protocol not available$" && err=1 ip netns delete ${disabled_ns} @@ -374,7 +375,7 @@ do_transfer() local srv_proto="$4" local connect_addr="$5" local local_addr="$6" - local extra_args="" + local extra_args="$7" local port port=$((10000+$TEST_COUNT)) @@ -393,9 +394,9 @@ do_transfer() fi if [ -n "$extra_args" ] && $options_log; then - options_log=false echo "INFO: extra options: $extra_args" fi + options_log=false :> "$cout" :> "$sout" @@ -425,19 +426,32 @@ do_transfer() sleep 1 fi + NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ + nstat -n + if [ ${listener_ns} != ${connector_ns} ]; then + NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ + nstat -n + fi + local stat_synrx_last_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") local stat_ackrx_last_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") local stat_cookietx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent") local stat_cookierx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv") - ip netns exec ${listener_ns} ./mptcp_connect -t $timeout -l -p $port -s ${srv_proto} $extra_args $local_addr < "$sin" > "$sout" & + timeout ${timeout_test} \ + ip netns exec ${listener_ns} \ + ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ + $extra_args $local_addr < "$sin" > "$sout" & local spid=$! wait_local_port_listen "${listener_ns}" "${port}" local start start=$(date +%s%3N) - ip netns exec ${connector_ns} ./mptcp_connect -t $timeout -p $port -s ${cl_proto} $extra_args $connect_addr < "$cin" > "$cout" & + timeout ${timeout_test} \ + ip netns exec ${connector_ns} \ + ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ + $extra_args $connect_addr < "$cin" > "$cout" & local cpid=$! wait $cpid @@ -575,6 +589,7 @@ run_tests_lo() local connector_ns="$2" local connect_addr="$3" local loopback="$4" + local extra_args="$5" local lret=0 # skip if test programs are running inside same netns for subsequent runs. @@ -594,7 +609,8 @@ run_tests_lo() local_addr="0.0.0.0" fi - do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} ${local_addr} + do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP \ + ${connect_addr} ${local_addr} "${extra_args}" lret=$? if [ $lret -ne 0 ]; then ret=$lret @@ -608,14 +624,16 @@ run_tests_lo() fi fi - do_transfer ${listener_ns} ${connector_ns} MPTCP TCP ${connect_addr} ${local_addr} + do_transfer ${listener_ns} ${connector_ns} MPTCP TCP \ + ${connect_addr} ${local_addr} "${extra_args}" lret=$? if [ $lret -ne 0 ]; then ret=$lret return 1 fi - do_transfer ${listener_ns} ${connector_ns} TCP MPTCP ${connect_addr} ${local_addr} + do_transfer ${listener_ns} ${connector_ns} TCP MPTCP \ + ${connect_addr} ${local_addr} "${extra_args}" lret=$? if [ $lret -ne 0 ]; then ret=$lret @@ -623,7 +641,8 @@ run_tests_lo() fi if [ $do_tcp -gt 1 ] ;then - do_transfer ${listener_ns} ${connector_ns} TCP TCP ${connect_addr} ${local_addr} + do_transfer ${listener_ns} ${connector_ns} TCP TCP \ + ${connect_addr} ${local_addr} "${extra_args}" lret=$? if [ $lret -ne 0 ]; then ret=$lret @@ -639,6 +658,15 @@ run_tests() run_tests_lo $1 $2 $3 0 } +run_tests_peekmode() +{ + local peekmode="$1" + + echo "INFO: with peek mode: ${peekmode}" + run_tests_lo "$ns1" "$ns1" 10.0.1.1 1 "-P ${peekmode}" + run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-P ${peekmode}" +} + make_file "$cin" "client" make_file "$sin" "server" @@ -718,6 +746,9 @@ for sender in $ns1 $ns2 $ns3 $ns4;do run_tests "$ns4" $sender dead:beef:3::1 done +run_tests_peekmode "saveWithPeek" +run_tests_peekmode "saveAfterPeek" + time_end=$(date +%s) time_run=$((time_end-time_start)) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 964db9ed544f..fd99485cf2a4 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -8,9 +8,11 @@ cin="" cinsent="" cout="" ksft_skip=4 -timeout=30 +timeout_poll=30 +timeout_test=$((timeout_poll * 2 + 1)) mptcp_connect="" capture=0 +do_all_tests=1 TEST_COUNT=0 @@ -76,6 +78,7 @@ cleanup_partial() for netns in "$ns1" "$ns2"; do ip netns del $netns + rm -f /tmp/$netns.{nstat,out} done } @@ -121,12 +124,6 @@ reset_with_add_addr_timeout() -j DROP } -for arg in "$@"; do - if [ "$arg" = "-c" ]; then - capture=1 - fi -done - ip -Version > /dev/null 2>&1 if [ $? -ne 0 ];then echo "SKIP: Could not run test without ip tool" @@ -237,10 +234,17 @@ do_transfer() sleep 1 fi + NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ + nstat -n + NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ + nstat -n + if [ $speed = "fast" ]; then mptcp_connect="./mptcp_connect -j" - else - mptcp_connect="./mptcp_connect -r" + elif [ $speed = "slow" ]; then + mptcp_connect="./mptcp_connect -r 50" + elif [ $speed = "least" ]; then + mptcp_connect="./mptcp_connect -r 10" fi local local_addr @@ -250,17 +254,26 @@ do_transfer() local_addr="0.0.0.0" fi - ip netns exec ${listener_ns} $mptcp_connect -t $timeout -l -p $port \ - -s ${srv_proto} ${local_addr} < "$sin" > "$sout" & + timeout ${timeout_test} \ + ip netns exec ${listener_ns} \ + $mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ + ${local_addr} < "$sin" > "$sout" & spid=$! sleep 1 if [ "$test_link_fail" -eq 0 ];then - ip netns exec ${connector_ns} $mptcp_connect -t $timeout -p $port -s ${cl_proto} $connect_addr < "$cin" > "$cout" & + timeout ${timeout_test} \ + ip netns exec ${connector_ns} \ + $mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ + $connect_addr < "$cin" > "$cout" & else - ( cat "$cin" ; sleep 2; link_failure $listener_ns ; cat "$cin" ) | tee "$cinsent" | \ - ip netns exec ${connector_ns} $mptcp_connect -t $timeout -p $port -s ${cl_proto} $connect_addr > "$cout" & + ( cat "$cin" ; sleep 2; link_failure $listener_ns ; cat "$cin" ) | \ + tee "$cinsent" | \ + timeout ${timeout_test} \ + ip netns exec ${connector_ns} \ + $mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ + $connect_addr > "$cout" & fi cpid=$! @@ -284,17 +297,25 @@ do_transfer() let rm_nr_ns1=-addr_nr_ns1 if [ $rm_nr_ns1 -lt 8 ]; then counter=1 - sleep 1 - - while [ $counter -le $rm_nr_ns1 ] - do - ip netns exec ${listener_ns} ./pm_nl_ctl del $counter + dump=(`ip netns exec ${listener_ns} ./pm_nl_ctl dump`) + if [ ${#dump[@]} -gt 0 ]; then + id=${dump[1]} sleep 1 - let counter+=1 - done - else + + while [ $counter -le $rm_nr_ns1 ] + do + ip netns exec ${listener_ns} ./pm_nl_ctl del $id + sleep 1 + let counter+=1 + let id+=1 + done + fi + elif [ $rm_nr_ns1 -eq 8 ]; then sleep 1 ip netns exec ${listener_ns} ./pm_nl_ctl flush + elif [ $rm_nr_ns1 -eq 9 ]; then + sleep 1 + ip netns exec ${listener_ns} ./pm_nl_ctl del 0 ${connect_addr} fi fi @@ -318,17 +339,31 @@ do_transfer() let rm_nr_ns2=-addr_nr_ns2 if [ $rm_nr_ns2 -lt 8 ]; then counter=1 - sleep 1 - - while [ $counter -le $rm_nr_ns2 ] - do - ip netns exec ${connector_ns} ./pm_nl_ctl del $counter + dump=(`ip netns exec ${connector_ns} ./pm_nl_ctl dump`) + if [ ${#dump[@]} -gt 0 ]; then + id=${dump[1]} sleep 1 - let counter+=1 - done - else + + while [ $counter -le $rm_nr_ns2 ] + do + ip netns exec ${connector_ns} ./pm_nl_ctl del $id + sleep 1 + let counter+=1 + let id+=1 + done + fi + elif [ $rm_nr_ns2 -eq 8 ]; then sleep 1 ip netns exec ${connector_ns} ./pm_nl_ctl flush + elif [ $rm_nr_ns2 -eq 9 ]; then + local addr + if is_v6 "${connect_addr}"; then + addr="dead:beef:1::2" + else + addr="10.0.1.2" + fi + sleep 1 + ip netns exec ${connector_ns} ./pm_nl_ctl del 0 $addr fi fi @@ -354,12 +389,19 @@ do_transfer() kill $cappid fi + NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ + nstat | grep Tcp > /tmp/${listener_ns}.out + NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ + nstat | grep Tcp > /tmp/${connector_ns}.out + if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then echo " client exit code $retc, server $rets" 1>&2 echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2 - ip netns exec ${listener_ns} ss -nita 1>&2 -o "sport = :$port" + ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port" + cat /tmp/${listener_ns}.out echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2 - ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port" + ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port" + cat /tmp/${connector_ns}.out cat "$capout" ret=1 @@ -610,11 +652,22 @@ chk_rm_nr() { local rm_addr_nr=$1 local rm_subflow_nr=$2 + local invert=${3:-""} local count local dump_stats + local addr_ns + local subflow_ns + + if [ -z $invert ]; then + addr_ns=$ns1 + subflow_ns=$ns2 + elif [ $invert = "invert" ]; then + addr_ns=$ns2 + subflow_ns=$ns1 + fi printf "%-39s %s" " " "rm " - count=`ip netns exec $ns1 nstat -as | grep MPTcpExtRmAddr | awk '{print $2}'` + count=`ip netns exec $addr_ns nstat -as | grep MPTcpExtRmAddr | awk '{print $2}'` [ -z "$count" ] && count=0 if [ "$count" != "$rm_addr_nr" ]; then echo "[fail] got $count RM_ADDR[s] expected $rm_addr_nr" @@ -625,7 +678,7 @@ chk_rm_nr() fi echo -n " - sf " - count=`ip netns exec $ns2 nstat -as | grep MPTcpExtRmSubflow | awk '{print $2}'` + count=`ip netns exec $subflow_ns nstat -as | grep MPTcpExtRmSubflow | awk '{print $2}'` [ -z "$count" ] && count=0 if [ "$count" != "$rm_subflow_nr" ]; then echo "[fail] got $count RM_SUBFLOW[s] expected $rm_subflow_nr" @@ -724,6 +777,14 @@ subflows_tests() ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "multiple subflows, limited by server" 2 2 1 + + # single subflow, dev + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow dev ns2eth3 + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "single subflow, dev" 1 1 1 } signal_address_tests() @@ -767,6 +828,28 @@ signal_address_tests() run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "multiple subflows and signal" 3 3 3 chk_add_nr 1 1 + + # signal addresses + reset + ip netns exec $ns1 ./pm_nl_ctl limits 3 3 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal + ip netns exec $ns1 ./pm_nl_ctl add 10.0.4.1 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 3 3 + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "signal addresses" 3 3 3 + chk_add_nr 3 3 + + # signal invalid addresses + reset + ip netns exec $ns1 ./pm_nl_ctl limits 3 3 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.12.1 flags signal + ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal + ip netns exec $ns1 ./pm_nl_ctl add 10.0.14.1 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 3 3 + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "signal invalid addresses" 1 1 1 + chk_add_nr 3 3 } link_failure_tests() @@ -802,6 +885,26 @@ add_addr_timeout_tests() run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow chk_join_nr "signal address, ADD_ADDR6 timeout" 1 1 1 chk_add_nr 4 0 + + # signal addresses timeout + reset_with_add_addr_timeout + ip netns exec $ns1 ./pm_nl_ctl limits 2 2 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 2 2 + run_tests $ns1 $ns2 10.0.1.1 0 0 0 least + chk_join_nr "signal addresses, ADD_ADDR timeout" 2 2 2 + chk_add_nr 8 0 + + # signal invalid addresses timeout + reset_with_add_addr_timeout + ip netns exec $ns1 ./pm_nl_ctl limits 2 2 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.12.1 flags signal + ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 2 2 + run_tests $ns1 $ns2 10.0.1.1 0 0 0 least + chk_join_nr "invalid address, ADD_ADDR timeout" 1 1 1 + chk_add_nr 8 0 } remove_tests() @@ -833,7 +936,7 @@ remove_tests() run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow chk_join_nr "remove single address" 1 1 1 chk_add_nr 1 1 - chk_rm_nr 0 0 + chk_rm_nr 1 1 invert # subflow and signal, remove reset @@ -858,6 +961,30 @@ remove_tests() chk_add_nr 1 1 chk_rm_nr 2 2 + # addresses remove + reset + ip netns exec $ns1 ./pm_nl_ctl limits 3 3 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal id 250 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal + ip netns exec $ns1 ./pm_nl_ctl add 10.0.4.1 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 3 3 + run_tests $ns1 $ns2 10.0.1.1 0 -3 0 slow + chk_join_nr "remove addresses" 3 3 3 + chk_add_nr 3 3 + chk_rm_nr 3 3 invert + + # invalid addresses remove + reset + ip netns exec $ns1 ./pm_nl_ctl limits 3 3 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.12.1 flags signal + ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal + ip netns exec $ns1 ./pm_nl_ctl add 10.0.14.1 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 3 3 + run_tests $ns1 $ns2 10.0.1.1 0 -3 0 slow + chk_join_nr "remove invalid addresses" 1 1 1 + chk_add_nr 3 3 + chk_rm_nr 3 1 invert + # subflows and signal, flush reset ip netns exec $ns1 ./pm_nl_ctl limits 0 3 @@ -869,6 +996,60 @@ remove_tests() chk_join_nr "flush subflows and signal" 3 3 3 chk_add_nr 1 1 chk_rm_nr 2 2 + + # subflows flush + reset + ip netns exec $ns1 ./pm_nl_ctl limits 3 3 + ip netns exec $ns2 ./pm_nl_ctl limits 3 3 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow id 150 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow + chk_join_nr "flush subflows" 3 3 3 + chk_rm_nr 3 3 + + # addresses flush + reset + ip netns exec $ns1 ./pm_nl_ctl limits 3 3 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal id 250 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal + ip netns exec $ns1 ./pm_nl_ctl add 10.0.4.1 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 3 3 + run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow + chk_join_nr "flush addresses" 3 3 3 + chk_add_nr 3 3 + chk_rm_nr 3 3 invert + + # invalid addresses flush + reset + ip netns exec $ns1 ./pm_nl_ctl limits 3 3 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.12.1 flags signal + ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal + ip netns exec $ns1 ./pm_nl_ctl add 10.0.14.1 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 3 3 + run_tests $ns1 $ns2 10.0.1.1 0 -8 0 slow + chk_join_nr "flush invalid addresses" 1 1 1 + chk_add_nr 3 3 + chk_rm_nr 3 1 invert + + # remove id 0 subflow + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 0 0 -9 slow + chk_join_nr "remove id 0 subflow" 1 1 1 + chk_rm_nr 1 1 + + # remove id 0 address + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + run_tests $ns1 $ns2 10.0.1.1 0 -9 0 slow + chk_join_nr "remove id 0 address" 1 1 1 + chk_add_nr 1 1 + chk_rm_nr 1 1 invert } add_tests() @@ -945,7 +1126,7 @@ ipv6_tests() run_tests $ns1 $ns2 dead:beef:1::1 0 -1 0 slow chk_join_nr "remove single address IPv6" 1 1 1 chk_add_nr 1 1 - chk_rm_nr 0 0 + chk_rm_nr 1 1 invert # subflow and signal IPv6, remove reset @@ -1088,7 +1269,7 @@ add_addr_ports_tests() run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow chk_join_nr "remove single address with port" 1 1 1 chk_add_nr 1 1 1 - chk_rm_nr 0 0 + chk_rm_nr 1 1 invert # subflow and signal with port, remove reset @@ -1221,7 +1402,8 @@ usage() echo " -4 v4mapped_tests" echo " -b backup_tests" echo " -p add_addr_ports_tests" - echo " -c syncookies_tests" + echo " -k syncookies_tests" + echo " -c capture pcap files" echo " -h help" } @@ -1235,12 +1417,24 @@ make_file "$cin" "client" 1 make_file "$sin" "server" 1 trap cleanup EXIT -if [ -z $1 ]; then +for arg in "$@"; do + # check for "capture" arg before launching tests + if [[ "${arg}" =~ ^"-"[0-9a-zA-Z]*"c"[0-9a-zA-Z]*$ ]]; then + capture=1 + fi + + # exception for the capture option, the rest means: a part of the tests + if [ "${arg}" != "-c" ]; then + do_all_tests=0 + fi +done + +if [ $do_all_tests -eq 1 ]; then all_tests exit $ret fi -while getopts 'fsltra64bpch' opt; do +while getopts 'fsltra64bpkch' opt; do case $opt in f) subflows_tests @@ -1272,9 +1466,11 @@ while getopts 'fsltra64bpch' opt; do p) add_addr_ports_tests ;; - c) + k) syncookies_tests ;; + c) + ;; h | *) usage ;; diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh new file mode 100755 index 000000000000..2fa13946ac04 --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -0,0 +1,276 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ret=0 +sin="" +sout="" +cin="" +cout="" +ksft_skip=4 +timeout_poll=30 +timeout_test=$((timeout_poll * 2 + 1)) +mptcp_connect="" +do_all_tests=1 + +add_mark_rules() +{ + local ns=$1 + local m=$2 + + for t in iptables ip6tables; do + # just to debug: check we have multiple subflows connection requests + ip netns exec $ns $t -A OUTPUT -p tcp --syn -m mark --mark $m -j ACCEPT + + # RST packets might be handled by a internal dummy socket + ip netns exec $ns $t -A OUTPUT -p tcp --tcp-flags RST RST -m mark --mark 0 -j ACCEPT + + ip netns exec $ns $t -A OUTPUT -p tcp -m mark --mark $m -j ACCEPT + ip netns exec $ns $t -A OUTPUT -p tcp -m mark --mark 0 -j DROP + done +} + +init() +{ + rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) + + ns1="ns1-$rndh" + ns2="ns2-$rndh" + + for netns in "$ns1" "$ns2";do + ip netns add $netns || exit $ksft_skip + ip -net $netns link set lo up + ip netns exec $netns sysctl -q net.mptcp.enabled=1 + ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0 + ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0 + done + + for i in `seq 1 4`; do + ip link add ns1eth$i netns "$ns1" type veth peer name ns2eth$i netns "$ns2" + ip -net "$ns1" addr add 10.0.$i.1/24 dev ns1eth$i + ip -net "$ns1" addr add dead:beef:$i::1/64 dev ns1eth$i nodad + ip -net "$ns1" link set ns1eth$i up + + ip -net "$ns2" addr add 10.0.$i.2/24 dev ns2eth$i + ip -net "$ns2" addr add dead:beef:$i::2/64 dev ns2eth$i nodad + ip -net "$ns2" link set ns2eth$i up + + # let $ns2 reach any $ns1 address from any interface + ip -net "$ns2" route add default via 10.0.$i.1 dev ns2eth$i metric 10$i + + ip netns exec $ns1 ./pm_nl_ctl add 10.0.$i.1 flags signal + ip netns exec $ns1 ./pm_nl_ctl add dead:beef:$i::1 flags signal + + ip netns exec $ns2 ./pm_nl_ctl add 10.0.$i.2 flags signal + ip netns exec $ns2 ./pm_nl_ctl add dead:beef:$i::2 flags signal + done + + ip netns exec $ns1 ./pm_nl_ctl limits 8 8 + ip netns exec $ns2 ./pm_nl_ctl limits 8 8 + + add_mark_rules $ns1 1 + add_mark_rules $ns2 2 +} + +cleanup() +{ + for netns in "$ns1" "$ns2"; do + ip netns del $netns + done + rm -f "$cin" "$cout" + rm -f "$sin" "$sout" +} + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +iptables -V > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run all tests without iptables tool" + exit $ksft_skip +fi + +ip6tables -V > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run all tests without ip6tables tool" + exit $ksft_skip +fi + +check_mark() +{ + local ns=$1 + local af=$2 + + tables=iptables + + if [ $af -eq 6 ];then + tables=ip6tables + fi + + counters=$(ip netns exec $ns $tables -v -L OUTPUT | grep DROP) + values=${counters%DROP*} + + for v in $values; do + if [ $v -ne 0 ]; then + echo "FAIL: got $tables $values in ns $ns , not 0 - not all expected packets marked" 1>&2 + return 1 + fi + done + + return 0 +} + +print_file_err() +{ + ls -l "$1" 1>&2 + echo "Trailing bytes are: " + tail -c 27 "$1" +} + +check_transfer() +{ + in=$1 + out=$2 + what=$3 + + cmp "$in" "$out" > /dev/null 2>&1 + if [ $? -ne 0 ] ;then + echo "[ FAIL ] $what does not match (in, out):" + print_file_err "$in" + print_file_err "$out" + ret=1 + + return 1 + fi + + return 0 +} + +# $1: IP address +is_v6() +{ + [ -z "${1##*:*}" ] +} + +do_transfer() +{ + listener_ns="$1" + connector_ns="$2" + cl_proto="$3" + srv_proto="$4" + connect_addr="$5" + + port=12001 + + :> "$cout" + :> "$sout" + + mptcp_connect="./mptcp_connect -r 20" + + local local_addr + if is_v6 "${connect_addr}"; then + local_addr="::" + else + local_addr="0.0.0.0" + fi + + timeout ${timeout_test} \ + ip netns exec ${listener_ns} \ + $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} \ + ${local_addr} < "$sin" > "$sout" & + spid=$! + + sleep 1 + + timeout ${timeout_test} \ + ip netns exec ${connector_ns} \ + $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} \ + $connect_addr < "$cin" > "$cout" & + + cpid=$! + + wait $cpid + retc=$? + wait $spid + rets=$? + + if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then + echo " client exit code $retc, server $rets" 1>&2 + echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2 + ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port" + + echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2 + ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port" + + ret=1 + return 1 + fi + + if [ $local_addr = "::" ];then + check_mark $listener_ns 6 + check_mark $connector_ns 6 + else + check_mark $listener_ns 4 + check_mark $connector_ns 4 + fi + + check_transfer $cin $sout "file received by server" + + rets=$? + + if [ $retc -eq 0 ] && [ $rets -eq 0 ];then + return 0 + fi + + return 1 +} + +make_file() +{ + name=$1 + who=$2 + size=$3 + + dd if=/dev/urandom of="$name" bs=1024 count=$size 2> /dev/null + echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name" + + echo "Created $name (size $size KB) containing data sent by $who" +} + +run_tests() +{ + listener_ns="$1" + connector_ns="$2" + connect_addr="$3" + lret=0 + + do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} + + lret=$? + + if [ $lret -ne 0 ]; then + ret=$lret + return + fi +} + +sin=$(mktemp) +sout=$(mktemp) +cin=$(mktemp) +cout=$(mktemp) +init +make_file "$cin" "client" 1 +make_file "$sin" "server" 1 +trap cleanup EXIT + +run_tests $ns1 $ns2 10.0.1.1 +run_tests $ns1 $ns2 dead:beef:1::1 + + +if [ $ret -eq 0 ];then + echo "PASS: all packets had packet mark set" +fi + +exit $ret diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index a617e293734c..3c741abe034e 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -100,12 +100,12 @@ done check "ip netns exec $ns1 ./pm_nl_ctl get 9" "id 9 flags signal 10.0.1.9" "hard addr limit" check "ip netns exec $ns1 ./pm_nl_ctl get 10" "" "above hard addr limit" -for i in `seq 9 256`; do +ip netns exec $ns1 ./pm_nl_ctl del 9 +for i in `seq 10 255`; do + ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.9 id $i ip netns exec $ns1 ./pm_nl_ctl del $i - ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.9 id $((i+1)) done check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags 10.0.1.1 -id 2 flags 10.0.0.9 id 3 flags signal,backup 10.0.1.3 id 4 flags signal 10.0.1.4 id 5 flags signal 10.0.1.5 diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c index 7b4167f3f9a2..115decfdc1ef 100644 --- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c +++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c @@ -26,7 +26,7 @@ static void syntax(char *argv[]) { fprintf(stderr, "%s add|get|set|del|flush|dump|accept [<args>]\n", argv[0]); fprintf(stderr, "\tadd [flags signal|subflow|backup] [id <nr>] [dev <name>] <ip>\n"); - fprintf(stderr, "\tdel <id>\n"); + fprintf(stderr, "\tdel <id> [<ip>]\n"); fprintf(stderr, "\tget <id>\n"); fprintf(stderr, "\tset <ip> [flags backup|nobackup]\n"); fprintf(stderr, "\tflush\n"); @@ -301,6 +301,7 @@ int del_addr(int fd, int pm_family, int argc, char *argv[]) 1024]; struct rtattr *rta, *nest; struct nlmsghdr *nh; + u_int16_t family; int nest_start; u_int8_t id; int off = 0; @@ -310,11 +311,14 @@ int del_addr(int fd, int pm_family, int argc, char *argv[]) off = init_genl_req(data, pm_family, MPTCP_PM_CMD_DEL_ADDR, MPTCP_PM_VER); - /* the only argument is the address id */ - if (argc != 3) + /* the only argument is the address id (nonzero) */ + if (argc != 3 && argc != 4) syntax(argv); id = atoi(argv[2]); + /* zero id with the IP address */ + if (!id && argc != 4) + syntax(argv); nest_start = off; nest = (void *)(data + off); @@ -328,6 +332,30 @@ int del_addr(int fd, int pm_family, int argc, char *argv[]) rta->rta_len = RTA_LENGTH(1); memcpy(RTA_DATA(rta), &id, 1); off += NLMSG_ALIGN(rta->rta_len); + + if (!id) { + /* addr data */ + rta = (void *)(data + off); + if (inet_pton(AF_INET, argv[3], RTA_DATA(rta))) { + family = AF_INET; + rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4; + rta->rta_len = RTA_LENGTH(4); + } else if (inet_pton(AF_INET6, argv[3], RTA_DATA(rta))) { + family = AF_INET6; + rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6; + rta->rta_len = RTA_LENGTH(16); + } else { + error(1, errno, "can't parse ip %s", argv[3]); + } + off += NLMSG_ALIGN(rta->rta_len); + + /* family */ + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY; + rta->rta_len = RTA_LENGTH(2); + memcpy(RTA_DATA(rta), &family, 2); + off += NLMSG_ALIGN(rta->rta_len); + } nest->rta_len = off - nest_start; do_nl_req(fd, nh, off, 0); diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index f039ee57eb3c..3aeef3bcb101 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -7,7 +7,8 @@ ns2="ns2-$rndh" ns3="ns3-$rndh" capture=false ksft_skip=4 -timeout=30 +timeout_poll=30 +timeout_test=$((timeout_poll * 2 + 1)) test_cnt=1 ret=0 bail=0 @@ -157,14 +158,20 @@ do_transfer() sleep 1 fi - ip netns exec ${ns3} ./mptcp_connect -jt $timeout -l -p $port 0.0.0.0 < "$sin" > "$sout" & + timeout ${timeout_test} \ + ip netns exec ${ns3} \ + ./mptcp_connect -jt ${timeout_poll} -l -p $port \ + 0.0.0.0 < "$sin" > "$sout" & local spid=$! wait_local_port_listen "${ns3}" "${port}" local start start=$(date +%s%3N) - ip netns exec ${ns1} ./mptcp_connect -jt $timeout -p $port 10.0.3.3 < "$cin" > "$cout" & + timeout ${timeout_test} \ + ip netns exec ${ns1} \ + ./mptcp_connect -jt ${timeout_poll} -p $port \ + 10.0.3.3 < "$cin" > "$cout" & local cpid=$! wait $cpid diff --git a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c index 7b01b7c2ec10..066efd30e294 100644 --- a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c +++ b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c @@ -30,25 +30,25 @@ struct reuse_opts { }; struct reuse_opts unreusable_opts[12] = { - {0, 0, 0, 0}, - {0, 0, 0, 1}, - {0, 0, 1, 0}, - {0, 0, 1, 1}, - {0, 1, 0, 0}, - {0, 1, 0, 1}, - {0, 1, 1, 0}, - {0, 1, 1, 1}, - {1, 0, 0, 0}, - {1, 0, 0, 1}, - {1, 0, 1, 0}, - {1, 0, 1, 1}, + {{0, 0}, {0, 0}}, + {{0, 0}, {0, 1}}, + {{0, 0}, {1, 0}}, + {{0, 0}, {1, 1}}, + {{0, 1}, {0, 0}}, + {{0, 1}, {0, 1}}, + {{0, 1}, {1, 0}}, + {{0, 1}, {1, 1}}, + {{1, 0}, {0, 0}}, + {{1, 0}, {0, 1}}, + {{1, 0}, {1, 0}}, + {{1, 0}, {1, 1}}, }; struct reuse_opts reusable_opts[4] = { - {1, 1, 0, 0}, - {1, 1, 0, 1}, - {1, 1, 1, 0}, - {1, 1, 1, 1}, + {{1, 1}, {0, 0}}, + {{1, 1}, {0, 1}}, + {{1, 1}, {1, 0}}, + {{1, 1}, {1, 1}}, }; int bind_port(struct __test_metadata *_metadata, int reuseaddr, int reuseport) diff --git a/tools/testing/selftests/net/settings b/tools/testing/selftests/net/settings new file mode 100644 index 000000000000..694d70710ff0 --- /dev/null +++ b/tools/testing/selftests/net/settings @@ -0,0 +1 @@ +timeout=300 diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c index b4cca382d125..59067f64b775 100644 --- a/tools/testing/selftests/net/so_txtime.c +++ b/tools/testing/selftests/net/so_txtime.c @@ -2,9 +2,12 @@ /* * Test the SO_TXTIME API * - * Takes two streams of { payload, delivery time }[], one input and one output. - * Sends the input stream and verifies arrival matches the output stream. - * The two streams can differ due to out-of-order delivery and drops. + * Takes a stream of { payload, delivery time }[], to be sent across two + * processes. Start this program on two separate network namespaces or + * connected hosts, one instance in transmit mode and the other in receive + * mode using the '-r' option. Receiver will compare arrival timestamps to + * the expected stream. Sender will read transmit timestamps from the error + * queue. The streams can differ due to out-of-order delivery and drops. */ #define _GNU_SOURCE @@ -28,14 +31,17 @@ #include <sys/types.h> #include <time.h> #include <unistd.h> +#include <poll.h> static int cfg_clockid = CLOCK_TAI; -static bool cfg_do_ipv4; -static bool cfg_do_ipv6; static uint16_t cfg_port = 8000; static int cfg_variance_us = 4000; +static uint64_t cfg_start_time_ns; +static int cfg_mark; +static bool cfg_rx; static uint64_t glob_tstart; +static uint64_t tdeliver_max; /* encode one timed transmission (of a 1B payload) */ struct timed_send { @@ -44,18 +50,21 @@ struct timed_send { }; #define MAX_NUM_PKT 8 -static struct timed_send cfg_in[MAX_NUM_PKT]; -static struct timed_send cfg_out[MAX_NUM_PKT]; +static struct timed_send cfg_buf[MAX_NUM_PKT]; static int cfg_num_pkt; static int cfg_errq_level; static int cfg_errq_type; -static uint64_t gettime_ns(void) +static struct sockaddr_storage cfg_dst_addr; +static struct sockaddr_storage cfg_src_addr; +static socklen_t cfg_alen; + +static uint64_t gettime_ns(clockid_t clock) { struct timespec ts; - if (clock_gettime(cfg_clockid, &ts)) + if (clock_gettime(clock, &ts)) error(1, errno, "gettime"); return ts.tv_sec * (1000ULL * 1000 * 1000) + ts.tv_nsec; @@ -75,6 +84,8 @@ static void do_send_one(int fdt, struct timed_send *ts) msg.msg_iov = &iov; msg.msg_iovlen = 1; + msg.msg_name = (struct sockaddr *)&cfg_dst_addr; + msg.msg_namelen = cfg_alen; if (ts->delay_us >= 0) { memset(control, 0, sizeof(control)); @@ -82,6 +93,8 @@ static void do_send_one(int fdt, struct timed_send *ts) msg.msg_controllen = sizeof(control); tdeliver = glob_tstart + ts->delay_us * 1000; + tdeliver_max = tdeliver_max > tdeliver ? + tdeliver_max : tdeliver; cm = CMSG_FIRSTHDR(&msg); cm->cmsg_level = SOL_SOCKET; @@ -98,7 +111,7 @@ static void do_send_one(int fdt, struct timed_send *ts) } -static bool do_recv_one(int fdr, struct timed_send *ts) +static void do_recv_one(int fdr, struct timed_send *ts) { int64_t tstop, texpect; char rbuf[2]; @@ -106,13 +119,13 @@ static bool do_recv_one(int fdr, struct timed_send *ts) ret = recv(fdr, rbuf, sizeof(rbuf), 0); if (ret == -1 && errno == EAGAIN) - return true; + error(1, EAGAIN, "recv: timeout"); if (ret == -1) error(1, errno, "read"); if (ret != 1) error(1, 0, "read: %dB", ret); - tstop = (gettime_ns() - glob_tstart) / 1000; + tstop = (gettime_ns(cfg_clockid) - glob_tstart) / 1000; texpect = ts->delay_us >= 0 ? ts->delay_us : 0; fprintf(stderr, "payload:%c delay:%lld expected:%lld (us)\n", @@ -123,8 +136,6 @@ static bool do_recv_one(int fdr, struct timed_send *ts) if (llabs(tstop - texpect) > cfg_variance_us) error(1, 0, "exceeds variance (%d us)", cfg_variance_us); - - return false; } static void do_recv_verify_empty(int fdr) @@ -137,18 +148,18 @@ static void do_recv_verify_empty(int fdr) error(1, 0, "recv: not empty as expected (%d, %d)", ret, errno); } -static void do_recv_errqueue_timeout(int fdt) +static int do_recv_errqueue_timeout(int fdt) { char control[CMSG_SPACE(sizeof(struct sock_extended_err)) + CMSG_SPACE(sizeof(struct sockaddr_in6))] = {0}; char data[sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + sizeof(struct udphdr) + 1]; struct sock_extended_err *err; + int ret, num_tstamp = 0; struct msghdr msg = {0}; struct iovec iov = {0}; struct cmsghdr *cm; int64_t tstamp = 0; - int ret; iov.iov_base = data; iov.iov_len = sizeof(data); @@ -206,9 +217,47 @@ static void do_recv_errqueue_timeout(int fdt) msg.msg_flags = 0; msg.msg_controllen = sizeof(control); + num_tstamp++; } - error(1, 0, "recv: timeout"); + return num_tstamp; +} + +static void recv_errqueue_msgs(int fdt) +{ + struct pollfd pfd = { .fd = fdt, .events = POLLERR }; + const int timeout_ms = 10; + int ret, num_tstamp = 0; + + do { + ret = poll(&pfd, 1, timeout_ms); + if (ret == -1) + error(1, errno, "poll"); + + if (ret && (pfd.revents & POLLERR)) + num_tstamp += do_recv_errqueue_timeout(fdt); + + if (num_tstamp == cfg_num_pkt) + break; + + } while (gettime_ns(cfg_clockid) < tdeliver_max); +} + +static void start_time_wait(void) +{ + uint64_t now; + int err; + + if (!cfg_start_time_ns) + return; + + now = gettime_ns(CLOCK_REALTIME); + if (cfg_start_time_ns < now) + return; + + err = usleep((cfg_start_time_ns - now) / 1000); + if (err) + error(1, errno, "usleep"); } static void setsockopt_txtime(int fd) @@ -245,6 +294,10 @@ static int setup_tx(struct sockaddr *addr, socklen_t alen) setsockopt_txtime(fd); + if (cfg_mark && + setsockopt(fd, SOL_SOCKET, SO_MARK, &cfg_mark, sizeof(cfg_mark))) + error(1, errno, "setsockopt mark"); + return fd; } @@ -266,31 +319,70 @@ static int setup_rx(struct sockaddr *addr, socklen_t alen) return fd; } -static void do_test(struct sockaddr *addr, socklen_t alen) +static void do_test_tx(struct sockaddr *addr, socklen_t alen) { - int fdt, fdr, i; + int fdt, i; fprintf(stderr, "\nSO_TXTIME ipv%c clock %s\n", addr->sa_family == PF_INET ? '4' : '6', cfg_clockid == CLOCK_TAI ? "tai" : "monotonic"); fdt = setup_tx(addr, alen); - fdr = setup_rx(addr, alen); - glob_tstart = gettime_ns(); + start_time_wait(); + glob_tstart = gettime_ns(cfg_clockid); for (i = 0; i < cfg_num_pkt; i++) - do_send_one(fdt, &cfg_in[i]); + do_send_one(fdt, &cfg_buf[i]); + + recv_errqueue_msgs(fdt); + + if (close(fdt)) + error(1, errno, "close t"); +} + +static void do_test_rx(struct sockaddr *addr, socklen_t alen) +{ + int fdr, i; + + fdr = setup_rx(addr, alen); + + start_time_wait(); + glob_tstart = gettime_ns(cfg_clockid); + for (i = 0; i < cfg_num_pkt; i++) - if (do_recv_one(fdr, &cfg_out[i])) - do_recv_errqueue_timeout(fdt); + do_recv_one(fdr, &cfg_buf[i]); do_recv_verify_empty(fdr); if (close(fdr)) error(1, errno, "close r"); - if (close(fdt)) - error(1, errno, "close t"); +} + +static void setup_sockaddr(int domain, const char *str_addr, + struct sockaddr_storage *sockaddr) +{ + struct sockaddr_in6 *addr6 = (void *) sockaddr; + struct sockaddr_in *addr4 = (void *) sockaddr; + + switch (domain) { + case PF_INET: + memset(addr4, 0, sizeof(*addr4)); + addr4->sin_family = AF_INET; + addr4->sin_port = htons(cfg_port); + if (str_addr && + inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1) + error(1, 0, "ipv4 parse error: %s", str_addr); + break; + case PF_INET6: + memset(addr6, 0, sizeof(*addr6)); + addr6->sin6_family = AF_INET6; + addr6->sin6_port = htons(cfg_port); + if (str_addr && + inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1) + error(1, 0, "ipv6 parse error: %s", str_addr); + break; + } } static int parse_io(const char *optarg, struct timed_send *array) @@ -323,17 +415,46 @@ static int parse_io(const char *optarg, struct timed_send *array) return aoff / 2; } +static void usage(const char *progname) +{ + fprintf(stderr, "\nUsage: %s [options] <payload>\n" + "Options:\n" + " -4 only IPv4\n" + " -6 only IPv6\n" + " -c <clock> monotonic (default) or tai\n" + " -D <addr> destination IP address (server)\n" + " -S <addr> source IP address (client)\n" + " -r run rx mode\n" + " -t <nsec> start time (UTC nanoseconds)\n" + " -m <mark> socket mark\n" + "\n", + progname); + exit(1); +} + static void parse_opts(int argc, char **argv) { - int c, ilen, olen; + char *daddr = NULL, *saddr = NULL; + int domain = PF_UNSPEC; + int c; - while ((c = getopt(argc, argv, "46c:")) != -1) { + while ((c = getopt(argc, argv, "46c:S:D:rt:m:")) != -1) { switch (c) { case '4': - cfg_do_ipv4 = true; + if (domain != PF_UNSPEC) + error(1, 0, "Pass one of -4 or -6"); + domain = PF_INET; + cfg_alen = sizeof(struct sockaddr_in); + cfg_errq_level = SOL_IP; + cfg_errq_type = IP_RECVERR; break; case '6': - cfg_do_ipv6 = true; + if (domain != PF_UNSPEC) + error(1, 0, "Pass one of -4 or -6"); + domain = PF_INET6; + cfg_alen = sizeof(struct sockaddr_in6); + cfg_errq_level = SOL_IPV6; + cfg_errq_type = IPV6_RECVERR; break; case 'c': if (!strcmp(optarg, "tai")) @@ -344,50 +465,50 @@ static void parse_opts(int argc, char **argv) else error(1, 0, "unknown clock id %s", optarg); break; + case 'S': + saddr = optarg; + break; + case 'D': + daddr = optarg; + break; + case 'r': + cfg_rx = true; + break; + case 't': + cfg_start_time_ns = strtol(optarg, NULL, 0); + break; + case 'm': + cfg_mark = strtol(optarg, NULL, 0); + break; default: - error(1, 0, "parse error at %d", optind); + usage(argv[0]); } } - if (argc - optind != 2) - error(1, 0, "Usage: %s [-46] -c <clock> <in> <out>", argv[0]); + if (argc - optind != 1) + usage(argv[0]); + + if (domain == PF_UNSPEC) + error(1, 0, "Pass one of -4 or -6"); + if (!daddr) + error(1, 0, "-D <server addr> required\n"); + if (!cfg_rx && !saddr) + error(1, 0, "-S <client addr> required\n"); - ilen = parse_io(argv[optind], cfg_in); - olen = parse_io(argv[optind + 1], cfg_out); - if (ilen != olen) - error(1, 0, "i/o streams len mismatch (%d, %d)\n", ilen, olen); - cfg_num_pkt = ilen; + setup_sockaddr(domain, daddr, &cfg_dst_addr); + setup_sockaddr(domain, saddr, &cfg_src_addr); + + cfg_num_pkt = parse_io(argv[optind], cfg_buf); } int main(int argc, char **argv) { parse_opts(argc, argv); - if (cfg_do_ipv6) { - struct sockaddr_in6 addr6 = {0}; - - addr6.sin6_family = AF_INET6; - addr6.sin6_port = htons(cfg_port); - addr6.sin6_addr = in6addr_loopback; - - cfg_errq_level = SOL_IPV6; - cfg_errq_type = IPV6_RECVERR; - - do_test((void *)&addr6, sizeof(addr6)); - } - - if (cfg_do_ipv4) { - struct sockaddr_in addr4 = {0}; - - addr4.sin_family = AF_INET; - addr4.sin_port = htons(cfg_port); - addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK); - - cfg_errq_level = SOL_IP; - cfg_errq_type = IP_RECVERR; - - do_test((void *)&addr4, sizeof(addr4)); - } + if (cfg_rx) + do_test_rx((void *)&cfg_dst_addr, cfg_alen); + else + do_test_tx((void *)&cfg_src_addr, cfg_alen); return 0; } diff --git a/tools/testing/selftests/net/so_txtime.sh b/tools/testing/selftests/net/so_txtime.sh index 3f7800eaecb1..3f06f4d286a9 100755 --- a/tools/testing/selftests/net/so_txtime.sh +++ b/tools/testing/selftests/net/so_txtime.sh @@ -3,32 +3,85 @@ # # Regression tests for the SO_TXTIME interface -# Run in network namespace -if [[ $# -eq 0 ]]; then - if ! ./in_netns.sh $0 __subprocess; then - # test is time sensitive, can be flaky - echo "test failed: retry once" - ./in_netns.sh $0 __subprocess +set -e + +readonly DEV="veth0" +readonly BIN="./so_txtime" + +readonly RAND="$(mktemp -u XXXXXX)" +readonly NSPREFIX="ns-${RAND}" +readonly NS1="${NSPREFIX}1" +readonly NS2="${NSPREFIX}2" + +readonly SADDR4='192.168.1.1' +readonly DADDR4='192.168.1.2' +readonly SADDR6='fd::1' +readonly DADDR6='fd::2' + +cleanup() { + ip netns del "${NS2}" + ip netns del "${NS1}" +} + +trap cleanup EXIT + +# Create virtual ethernet pair between network namespaces +ip netns add "${NS1}" +ip netns add "${NS2}" + +ip link add "${DEV}" netns "${NS1}" type veth \ + peer name "${DEV}" netns "${NS2}" + +# Bring the devices up +ip -netns "${NS1}" link set "${DEV}" up +ip -netns "${NS2}" link set "${DEV}" up + +# Set fixed MAC addresses on the devices +ip -netns "${NS1}" link set dev "${DEV}" address 02:02:02:02:02:02 +ip -netns "${NS2}" link set dev "${DEV}" address 06:06:06:06:06:06 + +# Add fixed IP addresses to the devices +ip -netns "${NS1}" addr add 192.168.1.1/24 dev "${DEV}" +ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}" +ip -netns "${NS1}" addr add fd::1/64 dev "${DEV}" nodad +ip -netns "${NS2}" addr add fd::2/64 dev "${DEV}" nodad + +do_test() { + local readonly IP="$1" + local readonly CLOCK="$2" + local readonly TXARGS="$3" + local readonly RXARGS="$4" + + if [[ "${IP}" == "4" ]]; then + local readonly SADDR="${SADDR4}" + local readonly DADDR="${DADDR4}" + elif [[ "${IP}" == "6" ]]; then + local readonly SADDR="${SADDR6}" + local readonly DADDR="${DADDR6}" + else + echo "Invalid IP version ${IP}" + exit 1 fi - exit $? -fi + local readonly START="$(date +%s%N --date="+ 0.1 seconds")" + ip netns exec "${NS2}" "${BIN}" -"${IP}" -c "${CLOCK}" -t "${START}" -S "${SADDR}" -D "${DADDR}" "${RXARGS}" -r & + ip netns exec "${NS1}" "${BIN}" -"${IP}" -c "${CLOCK}" -t "${START}" -S "${SADDR}" -D "${DADDR}" "${TXARGS}" + wait "$!" +} -set -e +ip netns exec "${NS1}" tc qdisc add dev "${DEV}" root fq +do_test 4 mono a,-1 a,-1 +do_test 6 mono a,0 a,0 +do_test 6 mono a,10 a,10 +do_test 4 mono a,10,b,20 a,10,b,20 +do_test 6 mono a,20,b,10 b,20,a,20 -tc qdisc add dev lo root fq -./so_txtime -4 -6 -c mono a,-1 a,-1 -./so_txtime -4 -6 -c mono a,0 a,0 -./so_txtime -4 -6 -c mono a,10 a,10 -./so_txtime -4 -6 -c mono a,10,b,20 a,10,b,20 -./so_txtime -4 -6 -c mono a,20,b,10 b,20,a,20 - -if tc qdisc replace dev lo root etf clockid CLOCK_TAI delta 400000; then - ! ./so_txtime -4 -6 -c tai a,-1 a,-1 - ! ./so_txtime -4 -6 -c tai a,0 a,0 - ./so_txtime -4 -6 -c tai a,10 a,10 - ./so_txtime -4 -6 -c tai a,10,b,20 a,10,b,20 - ./so_txtime -4 -6 -c tai a,20,b,10 b,10,a,20 +if ip netns exec "${NS1}" tc qdisc replace dev "${DEV}" root etf clockid CLOCK_TAI delta 400000; then + ! do_test 4 tai a,-1 a,-1 + ! do_test 6 tai a,0 a,0 + do_test 6 tai a,10 a,10 + do_test 4 tai a,10,b,20 a,10,b,20 + do_test 6 tai a,20,b,10 b,10,a,20 else echo "tc ($(tc -V)) does not support qdisc etf. skipping" fi diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh new file mode 100755 index 000000000000..a8fa64136282 --- /dev/null +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -0,0 +1,251 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +readonly BASE="ns-$(mktemp -u XXXXXX)" +readonly SRC=2 +readonly DST=1 +readonly DST_NAT=100 +readonly NS_SRC=$BASE$SRC +readonly NS_DST=$BASE$DST + +# "baremetal" network used for raw UDP traffic +readonly BM_NET_V4=192.168.1. +readonly BM_NET_V6=2001:db8:: + +# "overlay" network used for UDP over UDP tunnel traffic +readonly OL_NET_V4=172.16.1. +readonly OL_NET_V6=2001:db8:1:: +readonly NPROCS=`nproc` + +cleanup() { + local ns + local -r jobs="$(jobs -p)" + [ -n "${jobs}" ] && kill -1 ${jobs} 2>/dev/null + + for ns in $NS_SRC $NS_DST; do + ip netns del $ns 2>/dev/null + done +} + +trap cleanup EXIT + +create_ns() { + local net + local ns + + for ns in $NS_SRC $NS_DST; do + ip netns add $ns + ip -n $ns link set dev lo up + done + + ip link add name veth$SRC type veth peer name veth$DST + + for ns in $SRC $DST; do + ip link set dev veth$ns netns $BASE$ns + ip -n $BASE$ns link set dev veth$ns up + ip -n $BASE$ns addr add dev veth$ns $BM_NET_V4$ns/24 + ip -n $BASE$ns addr add dev veth$ns $BM_NET_V6$ns/64 nodad + done + ip -n $NS_DST link set veth$DST xdp object ../bpf/xdp_dummy.o section xdp_dummy 2>/dev/null +} + +create_vxlan_endpoint() { + local -r netns=$1 + local -r bm_dev=$2 + local -r bm_rem_addr=$3 + local -r vxlan_dev=$4 + local -r vxlan_id=$5 + local -r vxlan_port=4789 + + ip -n $netns link set dev $bm_dev up + ip -n $netns link add dev $vxlan_dev type vxlan id $vxlan_id \ + dstport $vxlan_port remote $bm_rem_addr + ip -n $netns link set dev $vxlan_dev up +} + +create_vxlan_pair() { + local ns + + create_ns + + for ns in $SRC $DST; do + # note that 3 - $SRC == $DST and 3 - $DST == $SRC + create_vxlan_endpoint $BASE$ns veth$ns $BM_NET_V4$((3 - $ns)) vxlan$ns 4 + ip -n $BASE$ns addr add dev vxlan$ns $OL_NET_V4$ns/24 + done + for ns in $SRC $DST; do + create_vxlan_endpoint $BASE$ns veth$ns $BM_NET_V6$((3 - $ns)) vxlan6$ns 6 + ip -n $BASE$ns addr add dev vxlan6$ns $OL_NET_V6$ns/24 nodad + done +} + +is_ipv6() { + if [[ $1 =~ .*:.* ]]; then + return 0 + fi + return 1 +} + +run_test() { + local -r msg=$1 + local -r dst=$2 + local -r pkts=$3 + local -r vxpkts=$4 + local bind=$5 + local rx_args="" + local rx_family="-4" + local family=-4 + local filter=IpInReceives + local ipt=iptables + + printf "%-40s" "$msg" + + if is_ipv6 $dst; then + # rx program does not support '-6' and implies ipv6 usage by default + rx_family="" + family=-6 + filter=Ip6InReceives + ipt=ip6tables + fi + + rx_args="$rx_family" + [ -n "$bind" ] && rx_args="$rx_args -b $bind" + + # send a single GSO packet, segmented in 10 UDP frames. + # Always expect 10 UDP frames on RX side as rx socket does + # not enable GRO + ip netns exec $NS_DST $ipt -A INPUT -p udp --dport 4789 + ip netns exec $NS_DST $ipt -A INPUT -p udp --dport 8000 + ip netns exec $NS_DST ./udpgso_bench_rx -C 1000 -R 10 -n 10 -l 1300 $rx_args & + local spid=$! + sleep 0.1 + ip netns exec $NS_SRC ./udpgso_bench_tx $family -M 1 -s 13000 -S 1300 -D $dst + local retc=$? + wait $spid + local rets=$? + if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then + echo " fail client exit code $retc, server $rets" + ret=1 + return + fi + + local rcv=`ip netns exec $NS_DST $ipt"-save" -c | grep 'dport 8000' | \ + sed -e 's/\[//' -e 's/:.*//'` + if [ $rcv != $pkts ]; then + echo " fail - received $rvs packets, expected $pkts" + ret=1 + return + fi + + local vxrcv=`ip netns exec $NS_DST $ipt"-save" -c | grep 'dport 4789' | \ + sed -e 's/\[//' -e 's/:.*//'` + + # upper net can generate a little noise, allow some tolerance + if [ $vxrcv -lt $vxpkts -o $vxrcv -gt $((vxpkts + 3)) ]; then + echo " fail - received $vxrcv vxlan packets, expected $vxpkts" + ret=1 + return + fi + echo " ok" +} + +run_bench() { + local -r msg=$1 + local -r dst=$2 + local family=-4 + + printf "%-40s" "$msg" + if [ $NPROCS -lt 2 ]; then + echo " skip - needed 2 CPUs found $NPROCS" + return + fi + + is_ipv6 $dst && family=-6 + + # bind the sender and the receiver to different CPUs to try + # get reproducible results + ip netns exec $NS_DST bash -c "echo 2 > /sys/class/net/veth$DST/queues/rx-0/rps_cpus" + ip netns exec $NS_DST taskset 0x2 ./udpgso_bench_rx -C 1000 -R 10 & + local spid=$! + sleep 0.1 + ip netns exec $NS_SRC taskset 0x1 ./udpgso_bench_tx $family -l 3 -S 1300 -D $dst + local retc=$? + wait $spid + local rets=$? + if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then + echo " fail client exit code $retc, server $rets" + ret=1 + return + fi +} + +for family in 4 6; do + BM_NET=$BM_NET_V4 + OL_NET=$OL_NET_V4 + IPT=iptables + SUFFIX=24 + VXDEV=vxlan + + if [ $family = 6 ]; then + BM_NET=$BM_NET_V6 + OL_NET=$OL_NET_V6 + SUFFIX="64 nodad" + VXDEV=vxlan6 + IPT=ip6tables + fi + + echo "IPv$family" + + create_ns + run_test "No GRO" $BM_NET$DST 10 0 + cleanup + + create_ns + ip netns exec $NS_DST ethtool -K veth$DST rx-gro-list on + run_test "GRO frag list" $BM_NET$DST 1 0 + cleanup + + # UDP GRO fwd skips aggregation when find an udp socket with the GRO option + # if there is an UDP tunnel in the running system, such lookup happen + # take place. + # use NAT to circumvent GRO FWD check + create_ns + ip -n $NS_DST addr add dev veth$DST $BM_NET$DST_NAT/$SUFFIX + ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on + ip netns exec $NS_DST $IPT -t nat -I PREROUTING -d $BM_NET$DST_NAT \ + -j DNAT --to-destination $BM_NET$DST + run_test "GRO fwd" $BM_NET$DST_NAT 1 0 $BM_NET$DST + cleanup + + create_ns + run_bench "UDP fwd perf" $BM_NET$DST + ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on + run_bench "UDP GRO fwd perf" $BM_NET$DST + cleanup + + create_vxlan_pair + ip netns exec $NS_DST ethtool -K veth$DST rx-gro-list on + run_test "GRO frag list over UDP tunnel" $OL_NET$DST 1 1 + cleanup + + # use NAT to circumvent GRO FWD check + create_vxlan_pair + ip -n $NS_DST addr add dev $VXDEV$DST $OL_NET$DST_NAT/$SUFFIX + ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on + ip netns exec $NS_DST $IPT -t nat -I PREROUTING -d $OL_NET$DST_NAT \ + -j DNAT --to-destination $OL_NET$DST + + # load arp cache before running the test to reduce the amount of + # stray traffic on top of the UDP tunnel + ip netns exec $NS_SRC ping -q -c 1 $OL_NET$DST_NAT >/dev/null + run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 1 1 $OL_NET$DST + cleanup + + create_vxlan_pair + run_bench "UDP tunnel fwd perf" $OL_NET$DST + ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on + run_bench "UDP tunnel GRO fwd perf" $OL_NET$DST + cleanup +done + +exit $ret diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh new file mode 100755 index 000000000000..2fedc0781ce8 --- /dev/null +++ b/tools/testing/selftests/net/veth.sh @@ -0,0 +1,177 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +readonly STATS="$(mktemp -p /tmp ns-XXXXXX)" +readonly BASE=`basename $STATS` +readonly SRC=2 +readonly DST=1 +readonly DST_NAT=100 +readonly NS_SRC=$BASE$SRC +readonly NS_DST=$BASE$DST + +# "baremetal" network used for raw UDP traffic +readonly BM_NET_V4=192.168.1. +readonly BM_NET_V6=2001:db8:: + +readonly NPROCS=`nproc` +ret=0 + +cleanup() { + local ns + local -r jobs="$(jobs -p)" + [ -n "${jobs}" ] && kill -1 ${jobs} 2>/dev/null + rm -f $STATS + + for ns in $NS_SRC $NS_DST; do + ip netns del $ns 2>/dev/null + done +} + +trap cleanup EXIT + +create_ns() { + local ns + + for ns in $NS_SRC $NS_DST; do + ip netns add $ns + ip -n $ns link set dev lo up + done + + ip link add name veth$SRC type veth peer name veth$DST + + for ns in $SRC $DST; do + ip link set dev veth$ns netns $BASE$ns up + ip -n $BASE$ns addr add dev veth$ns $BM_NET_V4$ns/24 + ip -n $BASE$ns addr add dev veth$ns $BM_NET_V6$ns/64 nodad + done + echo "#kernel" > $BASE + chmod go-rw $BASE +} + +__chk_flag() { + local msg="$1" + local target=$2 + local expected=$3 + local flagname=$4 + + local flag=`ip netns exec $BASE$target ethtool -k veth$target |\ + grep $flagname | awk '{print $2}'` + + printf "%-60s" "$msg" + if [ "$flag" = "$expected" ]; then + echo " ok " + else + echo " fail - expected $expected found $flag" + ret=1 + fi +} + +chk_gro_flag() { + __chk_flag "$1" $2 $3 generic-receive-offload +} + +chk_tso_flag() { + __chk_flag "$1" $2 $3 tcp-segmentation-offload +} + +chk_gro() { + local msg="$1" + local expected=$2 + + ip netns exec $BASE$SRC ping -qc 1 $BM_NET_V4$DST >/dev/null + NSTAT_HISTORY=$STATS ip netns exec $NS_DST nstat -n + + printf "%-60s" "$msg" + ip netns exec $BASE$DST ./udpgso_bench_rx -C 1000 -R 10 & + local spid=$! + sleep 0.1 + + ip netns exec $NS_SRC ./udpgso_bench_tx -4 -s 13000 -S 1300 -M 1 -D $BM_NET_V4$DST + local retc=$? + wait $spid + local rets=$? + if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then + echo " fail client exit code $retc, server $rets" + ret=1 + return + fi + + local pkts=`NSTAT_HISTORY=$STATS ip netns exec $NS_DST nstat IpInReceives | \ + awk '{print $2}' | tail -n 1` + if [ "$pkts" = "$expected" ]; then + echo " ok " + else + echo " fail - got $pkts packets, expected $expected " + ret=1 + fi +} + +if [ ! -f ../bpf/xdp_dummy.o ]; then + echo "Missing xdp_dummy helper. Build bpf selftest first" + exit -1 +fi + +create_ns +chk_gro_flag "default - gro flag" $SRC off +chk_gro_flag " - peer gro flag" $DST off +chk_tso_flag " - tso flag" $SRC on +chk_tso_flag " - peer tso flag" $DST on +chk_gro " - aggregation" 1 +ip netns exec $NS_SRC ethtool -K veth$SRC tx-udp-segmentation off +chk_gro " - aggregation with TSO off" 10 +cleanup + +create_ns +ip netns exec $NS_DST ethtool -K veth$DST gro on +chk_gro_flag "with gro on - gro flag" $DST on +chk_gro_flag " - peer gro flag" $SRC off +chk_tso_flag " - tso flag" $SRC on +chk_tso_flag " - peer tso flag" $DST on +ip netns exec $NS_SRC ethtool -K veth$SRC tx-udp-segmentation off +ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on +chk_gro " - aggregation with TSO off" 1 +cleanup + +create_ns +ip -n $NS_DST link set dev veth$DST down +ip netns exec $NS_DST ethtool -K veth$DST gro on +chk_gro_flag "with gro enabled on link down - gro flag" $DST on +chk_gro_flag " - peer gro flag" $SRC off +chk_tso_flag " - tso flag" $SRC on +chk_tso_flag " - peer tso flag" $DST on +ip -n $NS_DST link set dev veth$DST up +ip netns exec $NS_SRC ethtool -K veth$SRC tx-udp-segmentation off +ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on +chk_gro " - aggregation with TSO off" 1 +cleanup + +create_ns +ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o section xdp_dummy 2>/dev/null +chk_gro_flag "with xdp attached - gro flag" $DST on +chk_gro_flag " - peer gro flag" $SRC off +chk_tso_flag " - tso flag" $SRC off +chk_tso_flag " - peer tso flag" $DST on +ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on +chk_gro " - aggregation" 1 + + +ip -n $NS_DST link set dev veth$DST down +ip -n $NS_SRC link set dev veth$SRC down +chk_gro_flag " - after dev off, flag" $DST on +chk_gro_flag " - peer flag" $SRC off + +ip netns exec $NS_DST ethtool -K veth$DST gro on +ip -n $NS_DST link set dev veth$DST xdp off +chk_gro_flag " - after gro on xdp off, gro flag" $DST on +chk_gro_flag " - peer gro flag" $SRC off +chk_tso_flag " - tso flag" $SRC on +chk_tso_flag " - peer tso flag" $DST on +ip -n $NS_DST link set dev veth$DST up +ip -n $NS_SRC link set dev veth$SRC up +chk_gro " - aggregation" 1 + +ip netns exec $NS_DST ethtool -K veth$DST gro off +ip netns exec $NS_SRC ethtool -K veth$SRC tx-udp-segmentation off +chk_gro "aggregation again with default and TSO off" 10 + +exit $ret diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh index 431296c0f91c..427d94816f2d 100755 --- a/tools/testing/selftests/netfilter/nft_flowtable.sh +++ b/tools/testing/selftests/netfilter/nft_flowtable.sh @@ -371,6 +371,88 @@ else ip netns exec nsr1 nft list ruleset fi +# Another test: +# Add bridge interface br0 to Router1, with NAT enabled. +ip -net nsr1 link add name br0 type bridge +ip -net nsr1 addr flush dev veth0 +ip -net nsr1 link set up dev veth0 +ip -net nsr1 link set veth0 master br0 +ip -net nsr1 addr add 10.0.1.1/24 dev br0 +ip -net nsr1 addr add dead:1::1/64 dev br0 +ip -net nsr1 link set up dev br0 + +ip netns exec nsr1 sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null + +# br0 with NAT enabled. +ip netns exec nsr1 nft -f - <<EOF +flush table ip nat +table ip nat { + chain prerouting { + type nat hook prerouting priority 0; policy accept; + meta iif "br0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345 + } + + chain postrouting { + type nat hook postrouting priority 0; policy accept; + meta oifname "veth1" counter masquerade + } +} +EOF + +if test_tcp_forwarding_nat ns1 ns2; then + echo "PASS: flow offloaded for ns1/ns2 with bridge NAT" +else + echo "FAIL: flow offload for ns1/ns2 with bridge NAT" 1>&2 + ip netns exec nsr1 nft list ruleset + ret=1 +fi + +# Another test: +# Add bridge interface br0 to Router1, with NAT and VLAN. +ip -net nsr1 link set veth0 nomaster +ip -net nsr1 link set down dev veth0 +ip -net nsr1 link add link veth0 name veth0.10 type vlan id 10 +ip -net nsr1 link set up dev veth0 +ip -net nsr1 link set up dev veth0.10 +ip -net nsr1 link set veth0.10 master br0 + +ip -net ns1 addr flush dev eth0 +ip -net ns1 link add link eth0 name eth0.10 type vlan id 10 +ip -net ns1 link set eth0 up +ip -net ns1 link set eth0.10 up +ip -net ns1 addr add 10.0.1.99/24 dev eth0.10 +ip -net ns1 route add default via 10.0.1.1 +ip -net ns1 addr add dead:1::99/64 dev eth0.10 + +if test_tcp_forwarding_nat ns1 ns2; then + echo "PASS: flow offloaded for ns1/ns2 with bridge NAT and VLAN" +else + echo "FAIL: flow offload for ns1/ns2 with bridge NAT and VLAN" 1>&2 + ip netns exec nsr1 nft list ruleset + ret=1 +fi + +# restore test topology (remove bridge and VLAN) +ip -net nsr1 link set veth0 nomaster +ip -net nsr1 link set veth0 down +ip -net nsr1 link set veth0.10 down +ip -net nsr1 link delete veth0.10 type vlan +ip -net nsr1 link delete br0 type bridge +ip -net ns1 addr flush dev eth0.10 +ip -net ns1 link set eth0.10 down +ip -net ns1 link set eth0 down +ip -net ns1 link delete eth0.10 type vlan + +# restore address in ns1 and nsr1 +ip -net ns1 link set eth0 up +ip -net ns1 addr add 10.0.1.99/24 dev eth0 +ip -net ns1 route add default via 10.0.1.1 +ip -net ns1 addr add dead:1::99/64 dev eth0 +ip -net ns1 route add default via dead:1::1 +ip -net nsr1 addr add 10.0.1.1/24 dev veth0 +ip -net nsr1 addr add dead:1::1/64 dev veth0 +ip -net nsr1 link set up dev veth0 + KEY_SHA="0x"$(ps -xaf | sha1sum | cut -d " " -f 1) KEY_AES="0x"$(ps -xaf | md5sum | cut -d " " -f 1) SPI1=$RANDOM diff --git a/tools/testing/selftests/perf_events/.gitignore b/tools/testing/selftests/perf_events/.gitignore new file mode 100644 index 000000000000..790c47001e77 --- /dev/null +++ b/tools/testing/selftests/perf_events/.gitignore @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +sigtrap_threads +remove_on_exec diff --git a/tools/testing/selftests/perf_events/Makefile b/tools/testing/selftests/perf_events/Makefile new file mode 100644 index 000000000000..fcafa5f0d34c --- /dev/null +++ b/tools/testing/selftests/perf_events/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +CFLAGS += -Wl,-no-as-needed -Wall -I../../../../usr/include +LDFLAGS += -lpthread + +TEST_GEN_PROGS := sigtrap_threads remove_on_exec +include ../lib.mk diff --git a/tools/testing/selftests/perf_events/config b/tools/testing/selftests/perf_events/config new file mode 100644 index 000000000000..ba58ff2203e4 --- /dev/null +++ b/tools/testing/selftests/perf_events/config @@ -0,0 +1 @@ +CONFIG_PERF_EVENTS=y diff --git a/tools/testing/selftests/perf_events/remove_on_exec.c b/tools/testing/selftests/perf_events/remove_on_exec.c new file mode 100644 index 000000000000..5814611a1dc7 --- /dev/null +++ b/tools/testing/selftests/perf_events/remove_on_exec.c @@ -0,0 +1,260 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test for remove_on_exec. + * + * Copyright (C) 2021, Google LLC. + */ + +#define _GNU_SOURCE + +/* We need the latest siginfo from the kernel repo. */ +#include <sys/types.h> +#include <asm/siginfo.h> +#define __have_siginfo_t 1 +#define __have_sigval_t 1 +#define __have_sigevent_t 1 +#define __siginfo_t_defined +#define __sigval_t_defined +#define __sigevent_t_defined +#define _BITS_SIGINFO_CONSTS_H 1 +#define _BITS_SIGEVENT_CONSTS_H 1 + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <linux/perf_event.h> +#include <pthread.h> +#include <signal.h> +#include <sys/ioctl.h> +#include <sys/syscall.h> +#include <unistd.h> + +#include "../kselftest_harness.h" + +static volatile int signal_count; + +static struct perf_event_attr make_event_attr(void) +{ + struct perf_event_attr attr = { + .type = PERF_TYPE_HARDWARE, + .size = sizeof(attr), + .config = PERF_COUNT_HW_INSTRUCTIONS, + .sample_period = 1000, + .exclude_kernel = 1, + .exclude_hv = 1, + .disabled = 1, + .inherit = 1, + /* + * Children normally retain their inherited event on exec; with + * remove_on_exec, we'll remove their event, but the parent and + * any other non-exec'd children will keep their events. + */ + .remove_on_exec = 1, + .sigtrap = 1, + }; + return attr; +} + +static void sigtrap_handler(int signum, siginfo_t *info, void *ucontext) +{ + if (info->si_code != TRAP_PERF) { + fprintf(stderr, "%s: unexpected si_code %d\n", __func__, info->si_code); + return; + } + + signal_count++; +} + +FIXTURE(remove_on_exec) +{ + struct sigaction oldact; + int fd; +}; + +FIXTURE_SETUP(remove_on_exec) +{ + struct perf_event_attr attr = make_event_attr(); + struct sigaction action = {}; + + signal_count = 0; + + /* Initialize sigtrap handler. */ + action.sa_flags = SA_SIGINFO | SA_NODEFER; + action.sa_sigaction = sigtrap_handler; + sigemptyset(&action.sa_mask); + ASSERT_EQ(sigaction(SIGTRAP, &action, &self->oldact), 0); + + /* Initialize perf event. */ + self->fd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC); + ASSERT_NE(self->fd, -1); +} + +FIXTURE_TEARDOWN(remove_on_exec) +{ + close(self->fd); + sigaction(SIGTRAP, &self->oldact, NULL); +} + +/* Verify event propagates to fork'd child. */ +TEST_F(remove_on_exec, fork_only) +{ + int status; + pid_t pid = fork(); + + if (pid == 0) { + ASSERT_EQ(signal_count, 0); + ASSERT_EQ(ioctl(self->fd, PERF_EVENT_IOC_ENABLE, 0), 0); + while (!signal_count); + _exit(42); + } + + while (!signal_count); /* Child enables event. */ + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(WEXITSTATUS(status), 42); +} + +/* + * Verify that event does _not_ propagate to fork+exec'd child; event enabled + * after fork+exec. + */ +TEST_F(remove_on_exec, fork_exec_then_enable) +{ + pid_t pid_exec, pid_only_fork; + int pipefd[2]; + int tmp; + + /* + * Non-exec child, to ensure exec does not affect inherited events of + * other children. + */ + pid_only_fork = fork(); + if (pid_only_fork == 0) { + /* Block until parent enables event. */ + while (!signal_count); + _exit(42); + } + + ASSERT_NE(pipe(pipefd), -1); + pid_exec = fork(); + if (pid_exec == 0) { + ASSERT_NE(dup2(pipefd[1], STDOUT_FILENO), -1); + close(pipefd[0]); + execl("/proc/self/exe", "exec_child", NULL); + _exit((perror("exec failed"), 1)); + } + close(pipefd[1]); + + ASSERT_EQ(waitpid(pid_exec, &tmp, WNOHANG), 0); /* Child is running. */ + /* Wait for exec'd child to start spinning. */ + EXPECT_EQ(read(pipefd[0], &tmp, sizeof(int)), sizeof(int)); + EXPECT_EQ(tmp, 42); + close(pipefd[0]); + /* Now we can enable the event, knowing the child is doing work. */ + EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_ENABLE, 0), 0); + /* If the event propagated to the exec'd child, it will exit normally... */ + usleep(100000); /* ... give time for event to trigger (in case of bug). */ + EXPECT_EQ(waitpid(pid_exec, &tmp, WNOHANG), 0); /* Should still be running. */ + EXPECT_EQ(kill(pid_exec, SIGKILL), 0); + + /* Verify removal from child did not affect this task's event. */ + tmp = signal_count; + while (signal_count == tmp); /* Should not hang! */ + /* Nor should it have affected the first child. */ + EXPECT_EQ(waitpid(pid_only_fork, &tmp, 0), pid_only_fork); + EXPECT_EQ(WEXITSTATUS(tmp), 42); +} + +/* + * Verify that event does _not_ propagate to fork+exec'd child; event enabled + * before fork+exec. + */ +TEST_F(remove_on_exec, enable_then_fork_exec) +{ + pid_t pid_exec; + int tmp; + + EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_ENABLE, 0), 0); + + pid_exec = fork(); + if (pid_exec == 0) { + execl("/proc/self/exe", "exec_child", NULL); + _exit((perror("exec failed"), 1)); + } + + /* + * The child may exit abnormally at any time if the event propagated and + * a SIGTRAP is sent before the handler was set up. + */ + usleep(100000); /* ... give time for event to trigger (in case of bug). */ + EXPECT_EQ(waitpid(pid_exec, &tmp, WNOHANG), 0); /* Should still be running. */ + EXPECT_EQ(kill(pid_exec, SIGKILL), 0); + + /* Verify removal from child did not affect this task's event. */ + tmp = signal_count; + while (signal_count == tmp); /* Should not hang! */ +} + +TEST_F(remove_on_exec, exec_stress) +{ + pid_t pids[30]; + int i, tmp; + + for (i = 0; i < sizeof(pids) / sizeof(pids[0]); i++) { + pids[i] = fork(); + if (pids[i] == 0) { + execl("/proc/self/exe", "exec_child", NULL); + _exit((perror("exec failed"), 1)); + } + + /* Some forked with event disabled, rest with enabled. */ + if (i > 10) + EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_ENABLE, 0), 0); + } + + usleep(100000); /* ... give time for event to trigger (in case of bug). */ + + for (i = 0; i < sizeof(pids) / sizeof(pids[0]); i++) { + /* All children should still be running. */ + EXPECT_EQ(waitpid(pids[i], &tmp, WNOHANG), 0); + EXPECT_EQ(kill(pids[i], SIGKILL), 0); + } + + /* Verify event is still alive. */ + tmp = signal_count; + while (signal_count == tmp); +} + +/* For exec'd child. */ +static void exec_child(void) +{ + struct sigaction action = {}; + const int val = 42; + + /* Set up sigtrap handler in case we erroneously receive a trap. */ + action.sa_flags = SA_SIGINFO | SA_NODEFER; + action.sa_sigaction = sigtrap_handler; + sigemptyset(&action.sa_mask); + if (sigaction(SIGTRAP, &action, NULL)) + _exit((perror("sigaction failed"), 1)); + + /* Signal parent that we're starting to spin. */ + if (write(STDOUT_FILENO, &val, sizeof(int)) == -1) + _exit((perror("write failed"), 1)); + + /* Should hang here until killed. */ + while (!signal_count); +} + +#define main test_main +TEST_HARNESS_MAIN +#undef main +int main(int argc, char *argv[]) +{ + if (!strcmp(argv[0], "exec_child")) { + exec_child(); + return 1; + } + + return test_main(argc, argv); +} diff --git a/tools/testing/selftests/perf_events/settings b/tools/testing/selftests/perf_events/settings new file mode 100644 index 000000000000..6091b45d226b --- /dev/null +++ b/tools/testing/selftests/perf_events/settings @@ -0,0 +1 @@ +timeout=120 diff --git a/tools/testing/selftests/perf_events/sigtrap_threads.c b/tools/testing/selftests/perf_events/sigtrap_threads.c new file mode 100644 index 000000000000..78ddf5e11625 --- /dev/null +++ b/tools/testing/selftests/perf_events/sigtrap_threads.c @@ -0,0 +1,210 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test for perf events with SIGTRAP across all threads. + * + * Copyright (C) 2021, Google LLC. + */ + +#define _GNU_SOURCE + +/* We need the latest siginfo from the kernel repo. */ +#include <sys/types.h> +#include <asm/siginfo.h> +#define __have_siginfo_t 1 +#define __have_sigval_t 1 +#define __have_sigevent_t 1 +#define __siginfo_t_defined +#define __sigval_t_defined +#define __sigevent_t_defined +#define _BITS_SIGINFO_CONSTS_H 1 +#define _BITS_SIGEVENT_CONSTS_H 1 + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <linux/hw_breakpoint.h> +#include <linux/perf_event.h> +#include <pthread.h> +#include <signal.h> +#include <sys/ioctl.h> +#include <sys/syscall.h> +#include <unistd.h> + +#include "../kselftest_harness.h" + +#define NUM_THREADS 5 + +/* Data shared between test body, threads, and signal handler. */ +static struct { + int tids_want_signal; /* Which threads still want a signal. */ + int signal_count; /* Sanity check number of signals received. */ + volatile int iterate_on; /* Variable to set breakpoint on. */ + siginfo_t first_siginfo; /* First observed siginfo_t. */ +} ctx; + +/* Unique value to check si_perf is correctly set from perf_event_attr::sig_data. */ +#define TEST_SIG_DATA(addr) (~(unsigned long)(addr)) + +static struct perf_event_attr make_event_attr(bool enabled, volatile void *addr) +{ + struct perf_event_attr attr = { + .type = PERF_TYPE_BREAKPOINT, + .size = sizeof(attr), + .sample_period = 1, + .disabled = !enabled, + .bp_addr = (unsigned long)addr, + .bp_type = HW_BREAKPOINT_RW, + .bp_len = HW_BREAKPOINT_LEN_1, + .inherit = 1, /* Children inherit events ... */ + .inherit_thread = 1, /* ... but only cloned with CLONE_THREAD. */ + .remove_on_exec = 1, /* Required by sigtrap. */ + .sigtrap = 1, /* Request synchronous SIGTRAP on event. */ + .sig_data = TEST_SIG_DATA(addr), + }; + return attr; +} + +static void sigtrap_handler(int signum, siginfo_t *info, void *ucontext) +{ + if (info->si_code != TRAP_PERF) { + fprintf(stderr, "%s: unexpected si_code %d\n", __func__, info->si_code); + return; + } + + /* + * The data in siginfo_t we're interested in should all be the same + * across threads. + */ + if (!__atomic_fetch_add(&ctx.signal_count, 1, __ATOMIC_RELAXED)) + ctx.first_siginfo = *info; + __atomic_fetch_sub(&ctx.tids_want_signal, syscall(__NR_gettid), __ATOMIC_RELAXED); +} + +static void *test_thread(void *arg) +{ + pthread_barrier_t *barrier = (pthread_barrier_t *)arg; + pid_t tid = syscall(__NR_gettid); + int iter; + int i; + + pthread_barrier_wait(barrier); + + __atomic_fetch_add(&ctx.tids_want_signal, tid, __ATOMIC_RELAXED); + iter = ctx.iterate_on; /* read */ + for (i = 0; i < iter - 1; i++) { + __atomic_fetch_add(&ctx.tids_want_signal, tid, __ATOMIC_RELAXED); + ctx.iterate_on = iter; /* idempotent write */ + } + + return NULL; +} + +FIXTURE(sigtrap_threads) +{ + struct sigaction oldact; + pthread_t threads[NUM_THREADS]; + pthread_barrier_t barrier; + int fd; +}; + +FIXTURE_SETUP(sigtrap_threads) +{ + struct perf_event_attr attr = make_event_attr(false, &ctx.iterate_on); + struct sigaction action = {}; + int i; + + memset(&ctx, 0, sizeof(ctx)); + + /* Initialize sigtrap handler. */ + action.sa_flags = SA_SIGINFO | SA_NODEFER; + action.sa_sigaction = sigtrap_handler; + sigemptyset(&action.sa_mask); + ASSERT_EQ(sigaction(SIGTRAP, &action, &self->oldact), 0); + + /* Initialize perf event. */ + self->fd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC); + ASSERT_NE(self->fd, -1); + + /* Spawn threads inheriting perf event. */ + pthread_barrier_init(&self->barrier, NULL, NUM_THREADS + 1); + for (i = 0; i < NUM_THREADS; i++) + ASSERT_EQ(pthread_create(&self->threads[i], NULL, test_thread, &self->barrier), 0); +} + +FIXTURE_TEARDOWN(sigtrap_threads) +{ + pthread_barrier_destroy(&self->barrier); + close(self->fd); + sigaction(SIGTRAP, &self->oldact, NULL); +} + +static void run_test_threads(struct __test_metadata *_metadata, + FIXTURE_DATA(sigtrap_threads) *self) +{ + int i; + + pthread_barrier_wait(&self->barrier); + for (i = 0; i < NUM_THREADS; i++) + ASSERT_EQ(pthread_join(self->threads[i], NULL), 0); +} + +TEST_F(sigtrap_threads, remain_disabled) +{ + run_test_threads(_metadata, self); + EXPECT_EQ(ctx.signal_count, 0); + EXPECT_NE(ctx.tids_want_signal, 0); +} + +TEST_F(sigtrap_threads, enable_event) +{ + EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_ENABLE, 0), 0); + run_test_threads(_metadata, self); + + EXPECT_EQ(ctx.signal_count, NUM_THREADS); + EXPECT_EQ(ctx.tids_want_signal, 0); + EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on); + EXPECT_EQ(ctx.first_siginfo.si_errno, PERF_TYPE_BREAKPOINT); + EXPECT_EQ(ctx.first_siginfo.si_perf, TEST_SIG_DATA(&ctx.iterate_on)); + + /* Check enabled for parent. */ + ctx.iterate_on = 0; + EXPECT_EQ(ctx.signal_count, NUM_THREADS + 1); +} + +/* Test that modification propagates to all inherited events. */ +TEST_F(sigtrap_threads, modify_and_enable_event) +{ + struct perf_event_attr new_attr = make_event_attr(true, &ctx.iterate_on); + + EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_MODIFY_ATTRIBUTES, &new_attr), 0); + run_test_threads(_metadata, self); + + EXPECT_EQ(ctx.signal_count, NUM_THREADS); + EXPECT_EQ(ctx.tids_want_signal, 0); + EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on); + EXPECT_EQ(ctx.first_siginfo.si_errno, PERF_TYPE_BREAKPOINT); + EXPECT_EQ(ctx.first_siginfo.si_perf, TEST_SIG_DATA(&ctx.iterate_on)); + + /* Check enabled for parent. */ + ctx.iterate_on = 0; + EXPECT_EQ(ctx.signal_count, NUM_THREADS + 1); +} + +/* Stress test event + signal handling. */ +TEST_F(sigtrap_threads, signal_stress) +{ + ctx.iterate_on = 3000; + + EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_ENABLE, 0), 0); + run_test_threads(_metadata, self); + EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_DISABLE, 0), 0); + + EXPECT_EQ(ctx.signal_count, NUM_THREADS * ctx.iterate_on); + EXPECT_EQ(ctx.tids_want_signal, 0); + EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on); + EXPECT_EQ(ctx.first_siginfo.si_errno, PERF_TYPE_BREAKPOINT); + EXPECT_EQ(ctx.first_siginfo.si_perf, TEST_SIG_DATA(&ctx.iterate_on)); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c index c25cf7cd45e9..33ee34fc0828 100644 --- a/tools/testing/selftests/powerpc/alignment/alignment_handler.c +++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c @@ -10,16 +10,7 @@ * * We create two sets of source and destination buffers, one in regular memory, * the other cache-inhibited (by default we use /dev/fb0 for this, but an - * alterative path for cache-inhibited memory may be provided). - * - * One way to get cache-inhibited memory is to use the "mem" kernel parameter - * to limit the kernel to less memory than actually exists. Addresses above - * the limit may still be accessed but will be treated as cache-inhibited. For - * example, if there is actually 4GB of memory and the parameter "mem=3GB" is - * used, memory from address 0xC0000000 onwards is treated as cache-inhibited. - * To access this region /dev/mem is used. The kernel should be configured - * without CONFIG_STRICT_DEVMEM. In this case use: - * ./alignment_handler /dev/mem 0xc0000000 + * alterative path for cache-inhibited memory may be provided, e.g. memtrace). * * We initialise the source buffers, then use whichever set of load/store * instructions is under test to copy bytes from the source buffers to the diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile index defe488d6bf1..40253abc6208 100644 --- a/tools/testing/selftests/powerpc/mm/Makefile +++ b/tools/testing/selftests/powerpc/mm/Makefile @@ -5,6 +5,7 @@ noarg: TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \ large_vm_fork_separation bad_accesses pkey_exec_prot \ pkey_siginfo stack_expansion_signal stack_expansion_ldst +TEST_PROGS := stress_code_patching.sh TEST_GEN_PROGS_EXTENDED := tlbie_test TEST_GEN_FILES := tempfile diff --git a/tools/testing/selftests/powerpc/mm/stress_code_patching.sh b/tools/testing/selftests/powerpc/mm/stress_code_patching.sh new file mode 100755 index 000000000000..e454509659f6 --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/stress_code_patching.sh @@ -0,0 +1,49 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-or-later + +TIMEOUT=30 + +DEBUFS_DIR=`cat /proc/mounts | grep debugfs | awk '{print $2}'` +if [ ! -e "$DEBUFS_DIR" ] +then + echo "debugfs not found, skipping" 1>&2 + exit 4 +fi + +if [ ! -e "$DEBUFS_DIR/tracing/current_tracer" ] +then + echo "Tracing files not found, skipping" 1>&2 + exit 4 +fi + + +echo "Testing for spurious faults when mapping kernel memory..." + +if grep -q "FUNCTION TRACING IS CORRUPTED" "$DEBUFS_DIR/tracing/trace" +then + echo "FAILED: Ftrace already dead. Probably due to a spurious fault" 1>&2 + exit 1 +fi + +dmesg -C +START_TIME=`date +%s` +END_TIME=`expr $START_TIME + $TIMEOUT` +while [ `date +%s` -lt $END_TIME ] +do + echo function > $DEBUFS_DIR/tracing/current_tracer + echo nop > $DEBUFS_DIR/tracing/current_tracer + if dmesg | grep -q 'ftrace bug' + then + break + fi +done + +echo nop > $DEBUFS_DIR/tracing/current_tracer +if dmesg | grep -q 'ftrace bug' +then + echo "FAILED: Mapping kernel memory causes spurious faults" 1>&2 + exit 1 +else + echo "OK: Mapping kernel memory does not cause spurious faults" + exit 0 +fi diff --git a/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c index 02dffb65de48..b099753b50e4 100644 --- a/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c +++ b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c @@ -324,7 +324,7 @@ int compress_file(int argc, char **argv, void *handle) fprintf(stderr, "error: cannot progress; "); fprintf(stderr, "too many faults\n"); exit(-1); - }; + } } fault_tries = NX_MAX_FAULTS; /* Reset for the next chunk */ diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile index af3df79d8163..c5ecb4634094 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile +++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -include ../../../../../../scripts/Kbuild.include +include ../../../../../build/Build.include noarg: $(MAKE) -C ../../ diff --git a/tools/testing/selftests/powerpc/ptrace/.gitignore b/tools/testing/selftests/powerpc/ptrace/.gitignore index 0e96150b7c7e..eb75e5360e31 100644 --- a/tools/testing/selftests/powerpc/ptrace/.gitignore +++ b/tools/testing/selftests/powerpc/ptrace/.gitignore @@ -14,3 +14,4 @@ perf-hwbreak core-pkey ptrace-pkey ptrace-syscall +ptrace-perf-hwbreak diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile index 8d3f006c98cc..a500639da97a 100644 --- a/tools/testing/selftests/powerpc/ptrace/Makefile +++ b/tools/testing/selftests/powerpc/ptrace/Makefile @@ -2,7 +2,7 @@ TEST_GEN_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \ ptrace-tar ptrace-tm-tar ptrace-tm-spd-tar ptrace-vsx ptrace-tm-vsx \ ptrace-tm-spd-vsx ptrace-tm-spr ptrace-hwbreak ptrace-pkey core-pkey \ - perf-hwbreak ptrace-syscall + perf-hwbreak ptrace-syscall ptrace-perf-hwbreak top_srcdir = ../../../../.. include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c index c1f324afdbf3..ecde2c199f3b 100644 --- a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c @@ -21,8 +21,13 @@ #include <assert.h> #include <stdio.h> #include <stdlib.h> +#include <signal.h> #include <string.h> #include <sys/ioctl.h> +#include <sys/wait.h> +#include <sys/ptrace.h> +#include <sys/sysinfo.h> +#include <asm/ptrace.h> #include <elf.h> #include <pthread.h> #include <sys/syscall.h> @@ -30,32 +35,130 @@ #include <linux/hw_breakpoint.h> #include "utils.h" +#ifndef PPC_DEBUG_FEATURE_DATA_BP_ARCH_31 +#define PPC_DEBUG_FEATURE_DATA_BP_ARCH_31 0x20 +#endif + #define MAX_LOOPS 10000 #define DAWR_LENGTH_MAX ((0x3f + 1) * 8) -static inline int sys_perf_event_open(struct perf_event_attr *attr, pid_t pid, - int cpu, int group_fd, - unsigned long flags) +int nprocs; + +static volatile int a = 10; +static volatile int b = 10; +static volatile char c[512 + 8] __attribute__((aligned(512))); + +static void perf_event_attr_set(struct perf_event_attr *attr, + __u32 type, __u64 addr, __u64 len, + bool exclude_user) { - attr->size = sizeof(*attr); - return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); + memset(attr, 0, sizeof(struct perf_event_attr)); + attr->type = PERF_TYPE_BREAKPOINT; + attr->size = sizeof(struct perf_event_attr); + attr->bp_type = type; + attr->bp_addr = addr; + attr->bp_len = len; + attr->exclude_kernel = 1; + attr->exclude_hv = 1; + attr->exclude_guest = 1; + attr->exclude_user = exclude_user; + attr->disabled = 1; } -static inline bool breakpoint_test(int len) +static int +perf_process_event_open_exclude_user(__u32 type, __u64 addr, __u64 len, bool exclude_user) +{ + struct perf_event_attr attr; + + perf_event_attr_set(&attr, type, addr, len, exclude_user); + return syscall(__NR_perf_event_open, &attr, getpid(), -1, -1, 0); +} + +static int perf_process_event_open(__u32 type, __u64 addr, __u64 len) +{ + struct perf_event_attr attr; + + perf_event_attr_set(&attr, type, addr, len, 0); + return syscall(__NR_perf_event_open, &attr, getpid(), -1, -1, 0); +} + +static int perf_cpu_event_open(long cpu, __u32 type, __u64 addr, __u64 len) { struct perf_event_attr attr; + + perf_event_attr_set(&attr, type, addr, len, 0); + return syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0); +} + +static void close_fds(int *fd, int n) +{ + int i; + + for (i = 0; i < n; i++) + close(fd[i]); +} + +static unsigned long read_fds(int *fd, int n) +{ + int i; + unsigned long c = 0; + unsigned long count = 0; + size_t res; + + for (i = 0; i < n; i++) { + res = read(fd[i], &c, sizeof(c)); + assert(res == sizeof(unsigned long long)); + count += c; + } + return count; +} + +static void reset_fds(int *fd, int n) +{ + int i; + + for (i = 0; i < n; i++) + ioctl(fd[i], PERF_EVENT_IOC_RESET); +} + +static void enable_fds(int *fd, int n) +{ + int i; + + for (i = 0; i < n; i++) + ioctl(fd[i], PERF_EVENT_IOC_ENABLE); +} + +static void disable_fds(int *fd, int n) +{ + int i; + + for (i = 0; i < n; i++) + ioctl(fd[i], PERF_EVENT_IOC_DISABLE); +} + +static int perf_systemwide_event_open(int *fd, __u32 type, __u64 addr, __u64 len) +{ + int i = 0; + + /* Assume online processors are 0 to nprocs for simplisity */ + for (i = 0; i < nprocs; i++) { + fd[i] = perf_cpu_event_open(i, type, addr, len); + if (fd[i] < 0) { + close_fds(fd, i); + return fd[i]; + } + } + return 0; +} + +static inline bool breakpoint_test(int len) +{ int fd; - /* setup counters */ - memset(&attr, 0, sizeof(attr)); - attr.disabled = 1; - attr.type = PERF_TYPE_BREAKPOINT; - attr.bp_type = HW_BREAKPOINT_R; /* bp_addr can point anywhere but needs to be aligned */ - attr.bp_addr = (__u64)(&attr) & 0xfffffffffffff800; - attr.bp_len = len; - fd = sys_perf_event_open(&attr, 0, -1, -1, 0); + fd = perf_process_event_open(HW_BREAKPOINT_R, (__u64)(&fd) & 0xfffffffffffff800, len); if (fd < 0) return false; close(fd); @@ -75,7 +178,6 @@ static inline bool dawr_supported(void) static int runtestsingle(int readwriteflag, int exclude_user, int arraytest) { int i,j; - struct perf_event_attr attr; size_t res; unsigned long long breaks, needed; int readint; @@ -85,6 +187,7 @@ static int runtestsingle(int readwriteflag, int exclude_user, int arraytest) int break_fd; int loop_num = MAX_LOOPS - (rand() % 100); /* provide some variability */ volatile int *k; + __u64 len; /* align to 0x400 boundary as required by DAWR */ readintalign = (int *)(((unsigned long)readintarraybig + 0x7ff) & @@ -94,19 +197,11 @@ static int runtestsingle(int readwriteflag, int exclude_user, int arraytest) if (arraytest) ptr = &readintalign[0]; - /* setup counters */ - memset(&attr, 0, sizeof(attr)); - attr.disabled = 1; - attr.type = PERF_TYPE_BREAKPOINT; - attr.bp_type = readwriteflag; - attr.bp_addr = (__u64)ptr; - attr.bp_len = sizeof(int); - if (arraytest) - attr.bp_len = DAWR_LENGTH_MAX; - attr.exclude_user = exclude_user; - break_fd = sys_perf_event_open(&attr, 0, -1, -1, 0); + len = arraytest ? DAWR_LENGTH_MAX : sizeof(int); + break_fd = perf_process_event_open_exclude_user(readwriteflag, (__u64)ptr, + len, exclude_user); if (break_fd < 0) { - perror("sys_perf_event_open"); + perror("perf_process_event_open_exclude_user"); exit(1); } @@ -153,7 +248,6 @@ static int runtest_dar_outside(void) void *target; volatile __u16 temp16; volatile __u64 temp64; - struct perf_event_attr attr; int break_fd; unsigned long long breaks; int fail = 0; @@ -165,21 +259,11 @@ static int runtest_dar_outside(void) exit(EXIT_FAILURE); } - /* setup counters */ - memset(&attr, 0, sizeof(attr)); - attr.disabled = 1; - attr.type = PERF_TYPE_BREAKPOINT; - attr.exclude_kernel = 1; - attr.exclude_hv = 1; - attr.exclude_guest = 1; - attr.bp_type = HW_BREAKPOINT_RW; /* watch middle half of target array */ - attr.bp_addr = (__u64)(target + 2); - attr.bp_len = 4; - break_fd = sys_perf_event_open(&attr, 0, -1, -1, 0); + break_fd = perf_process_event_open(HW_BREAKPOINT_RW, (__u64)(target + 2), 4); if (break_fd < 0) { free(target); - perror("sys_perf_event_open"); + perror("perf_process_event_open"); exit(EXIT_FAILURE); } @@ -263,11 +347,467 @@ static int runtest_dar_outside(void) return fail; } +static void multi_dawr_workload(void) +{ + a += 10; + b += 10; + c[512 + 1] += 'a'; +} + +static int test_process_multi_diff_addr(void) +{ + unsigned long long breaks1 = 0, breaks2 = 0; + int fd1, fd2; + char *desc = "Process specific, Two events, diff addr"; + size_t res; + + fd1 = perf_process_event_open(HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a)); + if (fd1 < 0) { + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + fd2 = perf_process_event_open(HW_BREAKPOINT_RW, (__u64)&b, (__u64)sizeof(b)); + if (fd2 < 0) { + close(fd1); + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + ioctl(fd1, PERF_EVENT_IOC_RESET); + ioctl(fd2, PERF_EVENT_IOC_RESET); + ioctl(fd1, PERF_EVENT_IOC_ENABLE); + ioctl(fd2, PERF_EVENT_IOC_ENABLE); + multi_dawr_workload(); + ioctl(fd1, PERF_EVENT_IOC_DISABLE); + ioctl(fd2, PERF_EVENT_IOC_DISABLE); + + res = read(fd1, &breaks1, sizeof(breaks1)); + assert(res == sizeof(unsigned long long)); + res = read(fd2, &breaks2, sizeof(breaks2)); + assert(res == sizeof(unsigned long long)); + + close(fd1); + close(fd2); + + if (breaks1 != 2 || breaks2 != 2) { + printf("FAILED: %s: %lld != 2 || %lld != 2\n", desc, breaks1, breaks2); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +static int test_process_multi_same_addr(void) +{ + unsigned long long breaks1 = 0, breaks2 = 0; + int fd1, fd2; + char *desc = "Process specific, Two events, same addr"; + size_t res; + + fd1 = perf_process_event_open(HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a)); + if (fd1 < 0) { + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + fd2 = perf_process_event_open(HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a)); + if (fd2 < 0) { + close(fd1); + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + ioctl(fd1, PERF_EVENT_IOC_RESET); + ioctl(fd2, PERF_EVENT_IOC_RESET); + ioctl(fd1, PERF_EVENT_IOC_ENABLE); + ioctl(fd2, PERF_EVENT_IOC_ENABLE); + multi_dawr_workload(); + ioctl(fd1, PERF_EVENT_IOC_DISABLE); + ioctl(fd2, PERF_EVENT_IOC_DISABLE); + + res = read(fd1, &breaks1, sizeof(breaks1)); + assert(res == sizeof(unsigned long long)); + res = read(fd2, &breaks2, sizeof(breaks2)); + assert(res == sizeof(unsigned long long)); + + close(fd1); + close(fd2); + + if (breaks1 != 2 || breaks2 != 2) { + printf("FAILED: %s: %lld != 2 || %lld != 2\n", desc, breaks1, breaks2); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +static int test_process_multi_diff_addr_ro_wo(void) +{ + unsigned long long breaks1 = 0, breaks2 = 0; + int fd1, fd2; + char *desc = "Process specific, Two events, diff addr, one is RO, other is WO"; + size_t res; + + fd1 = perf_process_event_open(HW_BREAKPOINT_W, (__u64)&a, (__u64)sizeof(a)); + if (fd1 < 0) { + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + fd2 = perf_process_event_open(HW_BREAKPOINT_R, (__u64)&b, (__u64)sizeof(b)); + if (fd2 < 0) { + close(fd1); + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + ioctl(fd1, PERF_EVENT_IOC_RESET); + ioctl(fd2, PERF_EVENT_IOC_RESET); + ioctl(fd1, PERF_EVENT_IOC_ENABLE); + ioctl(fd2, PERF_EVENT_IOC_ENABLE); + multi_dawr_workload(); + ioctl(fd1, PERF_EVENT_IOC_DISABLE); + ioctl(fd2, PERF_EVENT_IOC_DISABLE); + + res = read(fd1, &breaks1, sizeof(breaks1)); + assert(res == sizeof(unsigned long long)); + res = read(fd2, &breaks2, sizeof(breaks2)); + assert(res == sizeof(unsigned long long)); + + close(fd1); + close(fd2); + + if (breaks1 != 1 || breaks2 != 1) { + printf("FAILED: %s: %lld != 1 || %lld != 1\n", desc, breaks1, breaks2); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +static int test_process_multi_same_addr_ro_wo(void) +{ + unsigned long long breaks1 = 0, breaks2 = 0; + int fd1, fd2; + char *desc = "Process specific, Two events, same addr, one is RO, other is WO"; + size_t res; + + fd1 = perf_process_event_open(HW_BREAKPOINT_R, (__u64)&a, (__u64)sizeof(a)); + if (fd1 < 0) { + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + fd2 = perf_process_event_open(HW_BREAKPOINT_W, (__u64)&a, (__u64)sizeof(a)); + if (fd2 < 0) { + close(fd1); + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + ioctl(fd1, PERF_EVENT_IOC_RESET); + ioctl(fd2, PERF_EVENT_IOC_RESET); + ioctl(fd1, PERF_EVENT_IOC_ENABLE); + ioctl(fd2, PERF_EVENT_IOC_ENABLE); + multi_dawr_workload(); + ioctl(fd1, PERF_EVENT_IOC_DISABLE); + ioctl(fd2, PERF_EVENT_IOC_DISABLE); + + res = read(fd1, &breaks1, sizeof(breaks1)); + assert(res == sizeof(unsigned long long)); + res = read(fd2, &breaks2, sizeof(breaks2)); + assert(res == sizeof(unsigned long long)); + + close(fd1); + close(fd2); + + if (breaks1 != 1 || breaks2 != 1) { + printf("FAILED: %s: %lld != 1 || %lld != 1\n", desc, breaks1, breaks2); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +static int test_syswide_multi_diff_addr(void) +{ + unsigned long long breaks1 = 0, breaks2 = 0; + int *fd1 = malloc(nprocs * sizeof(int)); + int *fd2 = malloc(nprocs * sizeof(int)); + char *desc = "Systemwide, Two events, diff addr"; + int ret; + + ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a)); + if (ret) { + perror("perf_systemwide_event_open"); + exit(EXIT_FAILURE); + } + + ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_RW, (__u64)&b, (__u64)sizeof(b)); + if (ret) { + close_fds(fd1, nprocs); + perror("perf_systemwide_event_open"); + exit(EXIT_FAILURE); + } + + reset_fds(fd1, nprocs); + reset_fds(fd2, nprocs); + enable_fds(fd1, nprocs); + enable_fds(fd2, nprocs); + multi_dawr_workload(); + disable_fds(fd1, nprocs); + disable_fds(fd2, nprocs); + + breaks1 = read_fds(fd1, nprocs); + breaks2 = read_fds(fd2, nprocs); + + close_fds(fd1, nprocs); + close_fds(fd2, nprocs); + + free(fd1); + free(fd2); + + if (breaks1 != 2 || breaks2 != 2) { + printf("FAILED: %s: %lld != 2 || %lld != 2\n", desc, breaks1, breaks2); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +static int test_syswide_multi_same_addr(void) +{ + unsigned long long breaks1 = 0, breaks2 = 0; + int *fd1 = malloc(nprocs * sizeof(int)); + int *fd2 = malloc(nprocs * sizeof(int)); + char *desc = "Systemwide, Two events, same addr"; + int ret; + + ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a)); + if (ret) { + perror("perf_systemwide_event_open"); + exit(EXIT_FAILURE); + } + + ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a)); + if (ret) { + close_fds(fd1, nprocs); + perror("perf_systemwide_event_open"); + exit(EXIT_FAILURE); + } + + reset_fds(fd1, nprocs); + reset_fds(fd2, nprocs); + enable_fds(fd1, nprocs); + enable_fds(fd2, nprocs); + multi_dawr_workload(); + disable_fds(fd1, nprocs); + disable_fds(fd2, nprocs); + + breaks1 = read_fds(fd1, nprocs); + breaks2 = read_fds(fd2, nprocs); + + close_fds(fd1, nprocs); + close_fds(fd2, nprocs); + + free(fd1); + free(fd2); + + if (breaks1 != 2 || breaks2 != 2) { + printf("FAILED: %s: %lld != 2 || %lld != 2\n", desc, breaks1, breaks2); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +static int test_syswide_multi_diff_addr_ro_wo(void) +{ + unsigned long long breaks1 = 0, breaks2 = 0; + int *fd1 = malloc(nprocs * sizeof(int)); + int *fd2 = malloc(nprocs * sizeof(int)); + char *desc = "Systemwide, Two events, diff addr, one is RO, other is WO"; + int ret; + + ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_W, (__u64)&a, (__u64)sizeof(a)); + if (ret) { + perror("perf_systemwide_event_open"); + exit(EXIT_FAILURE); + } + + ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_R, (__u64)&b, (__u64)sizeof(b)); + if (ret) { + close_fds(fd1, nprocs); + perror("perf_systemwide_event_open"); + exit(EXIT_FAILURE); + } + + reset_fds(fd1, nprocs); + reset_fds(fd2, nprocs); + enable_fds(fd1, nprocs); + enable_fds(fd2, nprocs); + multi_dawr_workload(); + disable_fds(fd1, nprocs); + disable_fds(fd2, nprocs); + + breaks1 = read_fds(fd1, nprocs); + breaks2 = read_fds(fd2, nprocs); + + close_fds(fd1, nprocs); + close_fds(fd2, nprocs); + + free(fd1); + free(fd2); + + if (breaks1 != 1 || breaks2 != 1) { + printf("FAILED: %s: %lld != 1 || %lld != 1\n", desc, breaks1, breaks2); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +static int test_syswide_multi_same_addr_ro_wo(void) +{ + unsigned long long breaks1 = 0, breaks2 = 0; + int *fd1 = malloc(nprocs * sizeof(int)); + int *fd2 = malloc(nprocs * sizeof(int)); + char *desc = "Systemwide, Two events, same addr, one is RO, other is WO"; + int ret; + + ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_W, (__u64)&a, (__u64)sizeof(a)); + if (ret) { + perror("perf_systemwide_event_open"); + exit(EXIT_FAILURE); + } + + ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_R, (__u64)&a, (__u64)sizeof(a)); + if (ret) { + close_fds(fd1, nprocs); + perror("perf_systemwide_event_open"); + exit(EXIT_FAILURE); + } + + reset_fds(fd1, nprocs); + reset_fds(fd2, nprocs); + enable_fds(fd1, nprocs); + enable_fds(fd2, nprocs); + multi_dawr_workload(); + disable_fds(fd1, nprocs); + disable_fds(fd2, nprocs); + + breaks1 = read_fds(fd1, nprocs); + breaks2 = read_fds(fd2, nprocs); + + close_fds(fd1, nprocs); + close_fds(fd2, nprocs); + + free(fd1); + free(fd2); + + if (breaks1 != 1 || breaks2 != 1) { + printf("FAILED: %s: %lld != 1 || %lld != 1\n", desc, breaks1, breaks2); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +static int runtest_multi_dawr(void) +{ + int ret = 0; + + ret |= test_process_multi_diff_addr(); + ret |= test_process_multi_same_addr(); + ret |= test_process_multi_diff_addr_ro_wo(); + ret |= test_process_multi_same_addr_ro_wo(); + ret |= test_syswide_multi_diff_addr(); + ret |= test_syswide_multi_same_addr(); + ret |= test_syswide_multi_diff_addr_ro_wo(); + ret |= test_syswide_multi_same_addr_ro_wo(); + + return ret; +} + +static int runtest_unaligned_512bytes(void) +{ + unsigned long long breaks = 0; + int fd; + char *desc = "Process specific, 512 bytes, unaligned"; + __u64 addr = (__u64)&c + 8; + size_t res; + + fd = perf_process_event_open(HW_BREAKPOINT_RW, addr, 512); + if (fd < 0) { + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + ioctl(fd, PERF_EVENT_IOC_RESET); + ioctl(fd, PERF_EVENT_IOC_ENABLE); + multi_dawr_workload(); + ioctl(fd, PERF_EVENT_IOC_DISABLE); + + res = read(fd, &breaks, sizeof(breaks)); + assert(res == sizeof(unsigned long long)); + + close(fd); + + if (breaks != 2) { + printf("FAILED: %s: %lld != 2\n", desc, breaks); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +/* There is no perf api to find number of available watchpoints. Use ptrace. */ +static int get_nr_wps(bool *arch_31) +{ + struct ppc_debug_info dbginfo; + int child_pid; + + child_pid = fork(); + if (!child_pid) { + int ret = ptrace(PTRACE_TRACEME, 0, NULL, 0); + if (ret) { + perror("PTRACE_TRACEME failed\n"); + exit(EXIT_FAILURE); + } + kill(getpid(), SIGUSR1); + + sleep(1); + exit(EXIT_SUCCESS); + } + + wait(NULL); + if (ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, &dbginfo)) { + perror("Can't get breakpoint info"); + exit(EXIT_FAILURE); + } + + *arch_31 = !!(dbginfo.features & PPC_DEBUG_FEATURE_DATA_BP_ARCH_31); + return dbginfo.num_data_bps; +} + static int runtest(void) { int rwflag; int exclude_user; int ret; + bool dawr = dawr_supported(); + bool arch_31 = false; + int nr_wps = get_nr_wps(&arch_31); /* * perf defines rwflag as two bits read and write and at least @@ -280,7 +820,7 @@ static int runtest(void) return ret; /* if we have the dawr, we can do an array test */ - if (!dawr_supported()) + if (!dawr) continue; ret = runtestsingle(rwflag, exclude_user, 1); if (ret) @@ -289,6 +829,19 @@ static int runtest(void) } ret = runtest_dar_outside(); + if (ret) + return ret; + + if (dawr && nr_wps > 1) { + nprocs = get_nprocs(); + ret = runtest_multi_dawr(); + if (ret) + return ret; + } + + if (dawr && arch_31) + ret = runtest_unaligned_512bytes(); + return ret; } diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c index 2e0d86e0687e..a0635a3819aa 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c @@ -194,6 +194,18 @@ static void test_workload(void) big_var[rand() % DAWR_MAX_LEN] = 'a'; else cvar = big_var[rand() % DAWR_MAX_LEN]; + + /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW ALIGNED, WO test */ + gstruct.a[rand() % A_LEN] = 'a'; + + /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW UNALIGNED, RO test */ + cvar = gstruct.b[rand() % B_LEN]; + + /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DAWR Overlap, WO test */ + gstruct.a[rand() % A_LEN] = 'a'; + + /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DAWR Overlap, RO test */ + cvar = gstruct.a[rand() % A_LEN]; } static void check_success(pid_t child_pid, const char *name, const char *type, @@ -417,6 +429,69 @@ static void test_sethwdebug_range_aligned(pid_t child_pid) ptrace_delhwdebug(child_pid, wh); } +static void test_multi_sethwdebug_range(pid_t child_pid) +{ + struct ppc_hw_breakpoint info1, info2; + unsigned long wp_addr1, wp_addr2; + char *name1 = "PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW ALIGNED"; + char *name2 = "PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW UNALIGNED"; + int len1, len2; + int wh1, wh2; + + wp_addr1 = (unsigned long)&gstruct.a; + wp_addr2 = (unsigned long)&gstruct.b; + len1 = A_LEN; + len2 = B_LEN; + get_ppc_hw_breakpoint(&info1, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr1, len1); + get_ppc_hw_breakpoint(&info2, PPC_BREAKPOINT_TRIGGER_READ, wp_addr2, len2); + + /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW ALIGNED, WO test */ + wh1 = ptrace_sethwdebug(child_pid, &info1); + + /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW UNALIGNED, RO test */ + wh2 = ptrace_sethwdebug(child_pid, &info2); + + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name1, "WO", wp_addr1, len1); + + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name2, "RO", wp_addr2, len2); + + ptrace_delhwdebug(child_pid, wh1); + ptrace_delhwdebug(child_pid, wh2); +} + +static void test_multi_sethwdebug_range_dawr_overlap(pid_t child_pid) +{ + struct ppc_hw_breakpoint info1, info2; + unsigned long wp_addr1, wp_addr2; + char *name = "PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DAWR Overlap"; + int len1, len2; + int wh1, wh2; + + wp_addr1 = (unsigned long)&gstruct.a; + wp_addr2 = (unsigned long)&gstruct.a; + len1 = A_LEN; + len2 = A_LEN; + get_ppc_hw_breakpoint(&info1, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr1, len1); + get_ppc_hw_breakpoint(&info2, PPC_BREAKPOINT_TRIGGER_READ, wp_addr2, len2); + + /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DAWR Overlap, WO test */ + wh1 = ptrace_sethwdebug(child_pid, &info1); + + /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DAWR Overlap, RO test */ + wh2 = ptrace_sethwdebug(child_pid, &info2); + + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "WO", wp_addr1, len1); + + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "RO", wp_addr2, len2); + + ptrace_delhwdebug(child_pid, wh1); + ptrace_delhwdebug(child_pid, wh2); +} + static void test_sethwdebug_range_unaligned(pid_t child_pid) { struct ppc_hw_breakpoint info; @@ -504,6 +579,10 @@ run_tests(pid_t child_pid, struct ppc_debug_info *dbginfo, bool dawr) test_sethwdebug_range_unaligned(child_pid); test_sethwdebug_range_unaligned_dar(child_pid); test_sethwdebug_dawr_max_range(child_pid); + if (dbginfo->num_data_bps > 1) { + test_multi_sethwdebug_range(child_pid); + test_multi_sethwdebug_range_dawr_overlap(child_pid); + } } } } diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c new file mode 100644 index 000000000000..3344e74a97b4 --- /dev/null +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c @@ -0,0 +1,659 @@ +// SPDX-License-Identifier: GPL-2.0+ +#include <stdio.h> +#include <string.h> +#include <signal.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <linux/hw_breakpoint.h> +#include <linux/perf_event.h> +#include <asm/unistd.h> +#include <sys/ptrace.h> +#include <sys/wait.h> +#include "ptrace.h" + +char data[16]; + +/* Overlapping address range */ +volatile __u64 *ptrace_data1 = (__u64 *)&data[0]; +volatile __u64 *perf_data1 = (__u64 *)&data[4]; + +/* Non-overlapping address range */ +volatile __u64 *ptrace_data2 = (__u64 *)&data[0]; +volatile __u64 *perf_data2 = (__u64 *)&data[8]; + +static unsigned long pid_max_addr(void) +{ + FILE *fp; + char *line, *c; + char addr[100]; + size_t len = 0; + + fp = fopen("/proc/kallsyms", "r"); + if (!fp) { + printf("Failed to read /proc/kallsyms. Exiting..\n"); + exit(EXIT_FAILURE); + } + + while (getline(&line, &len, fp) != -1) { + if (!strstr(line, "pid_max") || strstr(line, "pid_max_max") || + strstr(line, "pid_max_min")) + continue; + + strncpy(addr, line, len < 100 ? len : 100); + c = strchr(addr, ' '); + *c = '\0'; + return strtoul(addr, &c, 16); + } + fclose(fp); + printf("Could not find pix_max. Exiting..\n"); + exit(EXIT_FAILURE); + return -1; +} + +static void perf_user_event_attr_set(struct perf_event_attr *attr, __u64 addr, __u64 len) +{ + memset(attr, 0, sizeof(struct perf_event_attr)); + attr->type = PERF_TYPE_BREAKPOINT; + attr->size = sizeof(struct perf_event_attr); + attr->bp_type = HW_BREAKPOINT_R; + attr->bp_addr = addr; + attr->bp_len = len; + attr->exclude_kernel = 1; + attr->exclude_hv = 1; +} + +static void perf_kernel_event_attr_set(struct perf_event_attr *attr) +{ + memset(attr, 0, sizeof(struct perf_event_attr)); + attr->type = PERF_TYPE_BREAKPOINT; + attr->size = sizeof(struct perf_event_attr); + attr->bp_type = HW_BREAKPOINT_R; + attr->bp_addr = pid_max_addr(); + attr->bp_len = sizeof(unsigned long); + attr->exclude_user = 1; + attr->exclude_hv = 1; +} + +static int perf_cpu_event_open(int cpu, __u64 addr, __u64 len) +{ + struct perf_event_attr attr; + + perf_user_event_attr_set(&attr, addr, len); + return syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0); +} + +static int perf_thread_event_open(pid_t child_pid, __u64 addr, __u64 len) +{ + struct perf_event_attr attr; + + perf_user_event_attr_set(&attr, addr, len); + return syscall(__NR_perf_event_open, &attr, child_pid, -1, -1, 0); +} + +static int perf_thread_cpu_event_open(pid_t child_pid, int cpu, __u64 addr, __u64 len) +{ + struct perf_event_attr attr; + + perf_user_event_attr_set(&attr, addr, len); + return syscall(__NR_perf_event_open, &attr, child_pid, cpu, -1, 0); +} + +static int perf_thread_kernel_event_open(pid_t child_pid) +{ + struct perf_event_attr attr; + + perf_kernel_event_attr_set(&attr); + return syscall(__NR_perf_event_open, &attr, child_pid, -1, -1, 0); +} + +static int perf_cpu_kernel_event_open(int cpu) +{ + struct perf_event_attr attr; + + perf_kernel_event_attr_set(&attr); + return syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0); +} + +static int child(void) +{ + int ret; + + ret = ptrace(PTRACE_TRACEME, 0, NULL, 0); + if (ret) { + printf("Error: PTRACE_TRACEME failed\n"); + return 0; + } + kill(getpid(), SIGUSR1); /* --> parent (SIGUSR1) */ + + return 0; +} + +static void ptrace_ppc_hw_breakpoint(struct ppc_hw_breakpoint *info, int type, + __u64 addr, int len) +{ + info->version = 1; + info->trigger_type = type; + info->condition_mode = PPC_BREAKPOINT_CONDITION_NONE; + info->addr = addr; + info->addr2 = addr + len; + info->condition_value = 0; + if (!len) + info->addr_mode = PPC_BREAKPOINT_MODE_EXACT; + else + info->addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE; +} + +static int ptrace_open(pid_t child_pid, __u64 wp_addr, int len) +{ + struct ppc_hw_breakpoint info; + + ptrace_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_RW, wp_addr, len); + return ptrace(PPC_PTRACE_SETHWDEBUG, child_pid, 0, &info); +} + +static int test1(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread event by ptrace) + * if (existing cpu event by perf) + * if (addr range overlaps) + * fail; + */ + + perf_fd = perf_cpu_event_open(0, (__u64)perf_data1, sizeof(*perf_data1)); + if (perf_fd < 0) + return -1; + + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd > 0 || errno != ENOSPC) + ret = -1; + + close(perf_fd); + return ret; +} + +static int test2(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread event by ptrace) + * if (existing cpu event by perf) + * if (addr range does not overlaps) + * allow; + */ + + perf_fd = perf_cpu_event_open(0, (__u64)perf_data2, sizeof(*perf_data2)); + if (perf_fd < 0) + return -1; + + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); + if (ptrace_fd < 0) { + ret = -1; + goto perf_close; + } + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + +perf_close: + close(perf_fd); + return ret; +} + +static int test3(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread event by ptrace) + * if (existing thread event by perf on the same thread) + * if (addr range overlaps) + * fail; + */ + perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data1, + sizeof(*perf_data1)); + if (perf_fd < 0) + return -1; + + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd > 0 || errno != ENOSPC) + ret = -1; + + close(perf_fd); + return ret; +} + +static int test4(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread event by ptrace) + * if (existing thread event by perf on the same thread) + * if (addr range does not overlaps) + * fail; + */ + perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data2, + sizeof(*perf_data2)); + if (perf_fd < 0) + return -1; + + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); + if (ptrace_fd < 0) { + ret = -1; + goto perf_close; + } + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + +perf_close: + close(perf_fd); + return ret; +} + +static int test5(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int cpid; + int ret = 0; + + /* Test: + * if (new per thread event by ptrace) + * if (existing thread event by perf on the different thread) + * allow; + */ + cpid = fork(); + if (!cpid) { + /* Temporary Child */ + pause(); + exit(EXIT_SUCCESS); + } + + perf_fd = perf_thread_event_open(cpid, (__u64)perf_data1, sizeof(*perf_data1)); + if (perf_fd < 0) { + ret = -1; + goto kill_child; + } + + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd < 0) { + ret = -1; + goto perf_close; + } + + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); +perf_close: + close(perf_fd); +kill_child: + kill(cpid, SIGINT); + return ret; +} + +static int test6(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread kernel event by perf) + * if (existing thread event by ptrace on the same thread) + * allow; + * -- OR -- + * if (new per cpu kernel event by perf) + * if (existing thread event by ptrace) + * allow; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd < 0) + return -1; + + perf_fd = perf_thread_kernel_event_open(child_pid); + if (perf_fd < 0) { + ret = -1; + goto ptrace_close; + } + close(perf_fd); + + perf_fd = perf_cpu_kernel_event_open(0); + if (perf_fd < 0) { + ret = -1; + goto ptrace_close; + } + close(perf_fd); + +ptrace_close: + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int test7(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread event by perf) + * if (existing thread event by ptrace on the same thread) + * if (addr range overlaps) + * fail; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd < 0) + return -1; + + perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data1, + sizeof(*perf_data1)); + if (perf_fd > 0 || errno != ENOSPC) + ret = -1; + + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int test8(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread event by perf) + * if (existing thread event by ptrace on the same thread) + * if (addr range does not overlaps) + * allow; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); + if (ptrace_fd < 0) + return -1; + + perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data2, + sizeof(*perf_data2)); + if (perf_fd < 0) { + ret = -1; + goto ptrace_close; + } + close(perf_fd); + +ptrace_close: + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int test9(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int cpid; + int ret = 0; + + /* Test: + * if (new per thread event by perf) + * if (existing thread event by ptrace on the other thread) + * allow; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd < 0) + return -1; + + cpid = fork(); + if (!cpid) { + /* Temporary Child */ + pause(); + exit(EXIT_SUCCESS); + } + + perf_fd = perf_thread_event_open(cpid, (__u64)perf_data1, sizeof(*perf_data1)); + if (perf_fd < 0) { + ret = -1; + goto kill_child; + } + close(perf_fd); + +kill_child: + kill(cpid, SIGINT); + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int test10(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per cpu event by perf) + * if (existing thread event by ptrace on the same thread) + * if (addr range overlaps) + * fail; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd < 0) + return -1; + + perf_fd = perf_cpu_event_open(0, (__u64)perf_data1, sizeof(*perf_data1)); + if (perf_fd > 0 || errno != ENOSPC) + ret = -1; + + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int test11(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per cpu event by perf) + * if (existing thread event by ptrace on the same thread) + * if (addr range does not overlap) + * allow; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); + if (ptrace_fd < 0) + return -1; + + perf_fd = perf_cpu_event_open(0, (__u64)perf_data2, sizeof(*perf_data2)); + if (perf_fd < 0) { + ret = -1; + goto ptrace_close; + } + close(perf_fd); + +ptrace_close: + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int test12(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread and per cpu event by perf) + * if (existing thread event by ptrace on the same thread) + * if (addr range overlaps) + * fail; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd < 0) + return -1; + + perf_fd = perf_thread_cpu_event_open(child_pid, 0, (__u64)perf_data1, sizeof(*perf_data1)); + if (perf_fd > 0 || errno != ENOSPC) + ret = -1; + + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int test13(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread and per cpu event by perf) + * if (existing thread event by ptrace on the same thread) + * if (addr range does not overlap) + * allow; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); + if (ptrace_fd < 0) + return -1; + + perf_fd = perf_thread_cpu_event_open(child_pid, 0, (__u64)perf_data2, sizeof(*perf_data2)); + if (perf_fd < 0) { + ret = -1; + goto ptrace_close; + } + close(perf_fd); + +ptrace_close: + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int test14(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int cpid; + int ret = 0; + + /* Test: + * if (new per thread and per cpu event by perf) + * if (existing thread event by ptrace on the other thread) + * allow; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd < 0) + return -1; + + cpid = fork(); + if (!cpid) { + /* Temporary Child */ + pause(); + exit(EXIT_SUCCESS); + } + + perf_fd = perf_thread_cpu_event_open(cpid, 0, (__u64)perf_data1, + sizeof(*perf_data1)); + if (perf_fd < 0) { + ret = -1; + goto kill_child; + } + close(perf_fd); + +kill_child: + kill(cpid, SIGINT); + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int do_test(const char *msg, int (*fun)(pid_t arg), pid_t arg) +{ + int ret; + + ret = fun(arg); + if (ret) + printf("%s: Error\n", msg); + else + printf("%s: Ok\n", msg); + return ret; +} + +char *desc[14] = { + "perf cpu event -> ptrace thread event (Overlapping)", + "perf cpu event -> ptrace thread event (Non-overlapping)", + "perf thread event -> ptrace same thread event (Overlapping)", + "perf thread event -> ptrace same thread event (Non-overlapping)", + "perf thread event -> ptrace other thread event", + "ptrace thread event -> perf kernel event", + "ptrace thread event -> perf same thread event (Overlapping)", + "ptrace thread event -> perf same thread event (Non-overlapping)", + "ptrace thread event -> perf other thread event", + "ptrace thread event -> perf cpu event (Overlapping)", + "ptrace thread event -> perf cpu event (Non-overlapping)", + "ptrace thread event -> perf same thread & cpu event (Overlapping)", + "ptrace thread event -> perf same thread & cpu event (Non-overlapping)", + "ptrace thread event -> perf other thread & cpu event", +}; + +static int test(pid_t child_pid) +{ + int ret = TEST_PASS; + + ret |= do_test(desc[0], test1, child_pid); + ret |= do_test(desc[1], test2, child_pid); + ret |= do_test(desc[2], test3, child_pid); + ret |= do_test(desc[3], test4, child_pid); + ret |= do_test(desc[4], test5, child_pid); + ret |= do_test(desc[5], test6, child_pid); + ret |= do_test(desc[6], test7, child_pid); + ret |= do_test(desc[7], test8, child_pid); + ret |= do_test(desc[8], test9, child_pid); + ret |= do_test(desc[9], test10, child_pid); + ret |= do_test(desc[10], test11, child_pid); + ret |= do_test(desc[11], test12, child_pid); + ret |= do_test(desc[12], test13, child_pid); + ret |= do_test(desc[13], test14, child_pid); + + return ret; +} + +static void get_dbginfo(pid_t child_pid, struct ppc_debug_info *dbginfo) +{ + if (ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, dbginfo)) { + perror("Can't get breakpoint info"); + exit(-1); + } +} + +static int ptrace_perf_hwbreak(void) +{ + int ret; + pid_t child_pid; + struct ppc_debug_info dbginfo; + + child_pid = fork(); + if (!child_pid) + return child(); + + /* parent */ + wait(NULL); /* <-- child (SIGUSR1) */ + + get_dbginfo(child_pid, &dbginfo); + SKIP_IF(dbginfo.num_data_bps <= 1); + + ret = perf_cpu_event_open(0, (__u64)perf_data1, sizeof(*perf_data1)); + SKIP_IF(ret < 0); + close(ret); + + ret = test(child_pid); + + ptrace(PTRACE_CONT, child_pid, NULL, 0); + return ret; +} + +int main(int argc, char *argv[]) +{ + return test_harness(ptrace_perf_hwbreak, "ptrace-perf-hwbreak"); +} diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile index f25e854fe370..844d18cd5f93 100644 --- a/tools/testing/selftests/powerpc/security/Makefile +++ b/tools/testing/selftests/powerpc/security/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0+ -TEST_GEN_PROGS := rfi_flush entry_flush spectre_v2 +TEST_GEN_PROGS := rfi_flush entry_flush uaccess_flush spectre_v2 top_srcdir = ../../../../.. CFLAGS += -I../../../../../usr/include @@ -13,3 +13,4 @@ $(OUTPUT)/spectre_v2: CFLAGS += -m64 $(OUTPUT)/spectre_v2: ../pmu/event.c branch_loops.S $(OUTPUT)/rfi_flush: flush_utils.c $(OUTPUT)/entry_flush: flush_utils.c +$(OUTPUT)/uaccess_flush: flush_utils.c diff --git a/tools/testing/selftests/powerpc/security/entry_flush.c b/tools/testing/selftests/powerpc/security/entry_flush.c index 78cf914fa321..68ce377b205e 100644 --- a/tools/testing/selftests/powerpc/security/entry_flush.c +++ b/tools/testing/selftests/powerpc/security/entry_flush.c @@ -53,7 +53,7 @@ int entry_flush_test(void) entry_flush = entry_flush_orig; - fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1); + fd = perf_event_open_counter(PERF_TYPE_HW_CACHE, PERF_L1D_READ_MISS_CONFIG, -1); FAIL_IF(fd < 0); p = (char *)memalign(zero_size, CACHELINE_SIZE); diff --git a/tools/testing/selftests/powerpc/security/flush_utils.c b/tools/testing/selftests/powerpc/security/flush_utils.c index 0c3c4c40c7fb..4d95965cb751 100644 --- a/tools/testing/selftests/powerpc/security/flush_utils.c +++ b/tools/testing/selftests/powerpc/security/flush_utils.c @@ -13,6 +13,7 @@ #include <stdlib.h> #include <string.h> #include <stdio.h> +#include <sys/utsname.h> #include "utils.h" #include "flush_utils.h" @@ -35,6 +36,18 @@ void syscall_loop(char *p, unsigned long iterations, } } +void syscall_loop_uaccess(char *p, unsigned long iterations, + unsigned long zero_size) +{ + struct utsname utsname; + + for (unsigned long i = 0; i < iterations; i++) { + for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE) + load(p + j); + uname(&utsname); + } +} + static void sigill_handler(int signr, siginfo_t *info, void *unused) { static int warned; diff --git a/tools/testing/selftests/powerpc/security/flush_utils.h b/tools/testing/selftests/powerpc/security/flush_utils.h index 07a5eb301466..e1e68281f7ac 100644 --- a/tools/testing/selftests/powerpc/security/flush_utils.h +++ b/tools/testing/selftests/powerpc/security/flush_utils.h @@ -9,9 +9,16 @@ #define CACHELINE_SIZE 128 +#define PERF_L1D_READ_MISS_CONFIG ((PERF_COUNT_HW_CACHE_L1D) | \ + (PERF_COUNT_HW_CACHE_OP_READ << 8) | \ + (PERF_COUNT_HW_CACHE_RESULT_MISS << 16)) + void syscall_loop(char *p, unsigned long iterations, unsigned long zero_size); +void syscall_loop_uaccess(char *p, unsigned long iterations, + unsigned long zero_size); + void set_dscr(unsigned long val); #endif /* _SELFTESTS_POWERPC_SECURITY_FLUSH_UTILS_H */ diff --git a/tools/testing/selftests/powerpc/security/rfi_flush.c b/tools/testing/selftests/powerpc/security/rfi_flush.c index 7565fd786640..f73484a6470f 100644 --- a/tools/testing/selftests/powerpc/security/rfi_flush.c +++ b/tools/testing/selftests/powerpc/security/rfi_flush.c @@ -54,7 +54,7 @@ int rfi_flush_test(void) rfi_flush = rfi_flush_orig; - fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1); + fd = perf_event_open_counter(PERF_TYPE_HW_CACHE, PERF_L1D_READ_MISS_CONFIG, -1); FAIL_IF(fd < 0); p = (char *)memalign(zero_size, CACHELINE_SIZE); diff --git a/tools/testing/selftests/powerpc/security/uaccess_flush.c b/tools/testing/selftests/powerpc/security/uaccess_flush.c new file mode 100644 index 000000000000..cf80f960e38a --- /dev/null +++ b/tools/testing/selftests/powerpc/security/uaccess_flush.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Copyright 2018 IBM Corporation. + * Copyright 2020 Canonical Ltd. + */ + +#define __SANE_USERSPACE_TYPES__ + +#include <sys/types.h> +#include <stdint.h> +#include <malloc.h> +#include <unistd.h> +#include <signal.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include "utils.h" +#include "flush_utils.h" + +int uaccess_flush_test(void) +{ + char *p; + int repetitions = 10; + int fd, passes = 0, iter, rc = 0; + struct perf_event_read v; + __u64 l1d_misses_total = 0; + unsigned long iterations = 100000, zero_size = 24 * 1024; + unsigned long l1d_misses_expected; + int rfi_flush_orig; + int entry_flush_orig; + int uaccess_flush, uaccess_flush_orig; + + SKIP_IF(geteuid() != 0); + + // The PMU event we use only works on Power7 or later + SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06)); + + if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_orig) < 0) { + perror("Unable to read powerpc/rfi_flush debugfs file"); + SKIP_IF(1); + } + + if (read_debugfs_file("powerpc/entry_flush", &entry_flush_orig) < 0) { + perror("Unable to read powerpc/entry_flush debugfs file"); + SKIP_IF(1); + } + + if (read_debugfs_file("powerpc/uaccess_flush", &uaccess_flush_orig) < 0) { + perror("Unable to read powerpc/entry_flush debugfs file"); + SKIP_IF(1); + } + + if (rfi_flush_orig != 0) { + if (write_debugfs_file("powerpc/rfi_flush", 0) < 0) { + perror("error writing to powerpc/rfi_flush debugfs file"); + FAIL_IF(1); + } + } + + if (entry_flush_orig != 0) { + if (write_debugfs_file("powerpc/entry_flush", 0) < 0) { + perror("error writing to powerpc/entry_flush debugfs file"); + FAIL_IF(1); + } + } + + uaccess_flush = uaccess_flush_orig; + + fd = perf_event_open_counter(PERF_TYPE_HW_CACHE, PERF_L1D_READ_MISS_CONFIG, -1); + FAIL_IF(fd < 0); + + p = (char *)memalign(zero_size, CACHELINE_SIZE); + + FAIL_IF(perf_event_enable(fd)); + + // disable L1 prefetching + set_dscr(1); + + iter = repetitions; + + /* + * We expect to see l1d miss for each cacheline access when entry_flush + * is set. Allow a small variation on this. + */ + l1d_misses_expected = iterations * (zero_size / CACHELINE_SIZE - 2); + +again: + FAIL_IF(perf_event_reset(fd)); + + syscall_loop_uaccess(p, iterations, zero_size); + + FAIL_IF(read(fd, &v, sizeof(v)) != sizeof(v)); + + if (uaccess_flush && v.l1d_misses >= l1d_misses_expected) + passes++; + else if (!uaccess_flush && v.l1d_misses < (l1d_misses_expected / 2)) + passes++; + + l1d_misses_total += v.l1d_misses; + + while (--iter) + goto again; + + if (passes < repetitions) { + printf("FAIL (L1D misses with uaccess_flush=%d: %llu %c %lu) [%d/%d failures]\n", + uaccess_flush, l1d_misses_total, uaccess_flush ? '<' : '>', + uaccess_flush ? repetitions * l1d_misses_expected : + repetitions * l1d_misses_expected / 2, + repetitions - passes, repetitions); + rc = 1; + } else { + printf("PASS (L1D misses with uaccess_flush=%d: %llu %c %lu) [%d/%d pass]\n", + uaccess_flush, l1d_misses_total, uaccess_flush ? '>' : '<', + uaccess_flush ? repetitions * l1d_misses_expected : + repetitions * l1d_misses_expected / 2, + passes, repetitions); + } + + if (uaccess_flush == uaccess_flush_orig) { + uaccess_flush = !uaccess_flush_orig; + if (write_debugfs_file("powerpc/uaccess_flush", uaccess_flush) < 0) { + perror("error writing to powerpc/uaccess_flush debugfs file"); + return 1; + } + iter = repetitions; + l1d_misses_total = 0; + passes = 0; + goto again; + } + + perf_event_disable(fd); + close(fd); + + set_dscr(0); + + if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_orig) < 0) { + perror("unable to restore original value of powerpc/rfi_flush debugfs file"); + return 1; + } + + if (write_debugfs_file("powerpc/entry_flush", entry_flush_orig) < 0) { + perror("unable to restore original value of powerpc/entry_flush debugfs file"); + return 1; + } + + if (write_debugfs_file("powerpc/uaccess_flush", uaccess_flush_orig) < 0) { + perror("unable to restore original value of powerpc/uaccess_flush debugfs file"); + return 1; + } + + return rc; +} + +int main(int argc, char *argv[]) +{ + return test_harness(uaccess_flush_test, "uaccess_flush_test"); +} diff --git a/tools/testing/selftests/powerpc/tm/tm-trap.c b/tools/testing/selftests/powerpc/tm/tm-trap.c index c75960af8018..11521077f915 100644 --- a/tools/testing/selftests/powerpc/tm/tm-trap.c +++ b/tools/testing/selftests/powerpc/tm/tm-trap.c @@ -66,7 +66,7 @@ void trap_signal_handler(int signo, siginfo_t *si, void *uc) /* Get thread endianness: extract bit LE from MSR */ thread_endianness = MSR_LE & ucp->uc_mcontext.gp_regs[PT_MSR]; - /*** + /* * Little-Endian Machine */ @@ -126,7 +126,7 @@ void trap_signal_handler(int signo, siginfo_t *si, void *uc) } } - /*** + /* * Big-Endian Machine */ diff --git a/tools/testing/selftests/rcutorture/bin/cpus2use.sh b/tools/testing/selftests/rcutorture/bin/cpus2use.sh index 1dbfb62567d2..6bb993001680 100755 --- a/tools/testing/selftests/rcutorture/bin/cpus2use.sh +++ b/tools/testing/selftests/rcutorture/bin/cpus2use.sh @@ -21,7 +21,6 @@ then awk -v ncpus=$ncpus '{ print ncpus * ($7 + $NF) / 100 }'` else # No mpstat command, so use all available CPUs. - echo The mpstat command is not available, so greedily using all CPUs. idlecpus=$ncpus fi awk -v ncpus=$ncpus -v idlecpus=$idlecpus < /dev/null ' diff --git a/tools/testing/selftests/rcutorture/bin/jitter.sh b/tools/testing/selftests/rcutorture/bin/jitter.sh index 188b864bc4bf..15d937ba96ca 100755 --- a/tools/testing/selftests/rcutorture/bin/jitter.sh +++ b/tools/testing/selftests/rcutorture/bin/jitter.sh @@ -5,10 +5,11 @@ # of this script is to inflict random OS jitter on a concurrently running # test. # -# Usage: jitter.sh me duration [ sleepmax [ spinmax ] ] +# Usage: jitter.sh me jittering-path duration [ sleepmax [ spinmax ] ] # # me: Random-number-generator seed salt. # duration: Time to run in seconds. +# jittering-path: Path to file whose removal will stop this script. # sleepmax: Maximum microseconds to sleep, defaults to one second. # spinmax: Maximum microseconds to spin, defaults to one millisecond. # @@ -17,9 +18,10 @@ # Authors: Paul E. McKenney <paulmck@linux.ibm.com> me=$(($1 * 1000)) -duration=$2 -sleepmax=${3-1000000} -spinmax=${4-1000} +jittering=$2 +duration=$3 +sleepmax=${4-1000000} +spinmax=${5-1000} n=1 @@ -47,7 +49,7 @@ do fi # Check for stop request. - if test -f "$TORTURE_STOPFILE" + if ! test -f "$jittering" then exit 1; fi @@ -67,10 +69,10 @@ do srand(n + me + systime()); ncpus = split(cpus, ca); curcpu = ca[int(rand() * ncpus + 1)]; - mask = lshift(1, curcpu); - if (mask + 0 <= 0) - mask = 1; - printf("%#x\n", mask); + z = ""; + for (i = 1; 4 * i <= curcpu; i++) + z = z "0"; + print "0x" 2 ^ (curcpu % 4) z; }' < /dev/null` n=$(($n+1)) if ! taskset -p $cpumask $$ > /dev/null 2>&1 diff --git a/tools/testing/selftests/rcutorture/bin/jitterstart.sh b/tools/testing/selftests/rcutorture/bin/jitterstart.sh new file mode 100644 index 000000000000..3d710ad291c3 --- /dev/null +++ b/tools/testing/selftests/rcutorture/bin/jitterstart.sh @@ -0,0 +1,37 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ +# +# Start up the specified number of jitter.sh scripts in the background. +# +# Usage: . jitterstart.sh n jittering-dir duration [ sleepmax [ spinmax ] ] +# +# n: Number of jitter.sh scripts to start up. +# jittering-dir: Directory in which to put "jittering" file. +# duration: Time to run in seconds. +# sleepmax: Maximum microseconds to sleep, defaults to one second. +# spinmax: Maximum microseconds to spin, defaults to one millisecond. +# +# Copyright (C) 2021 Facebook, Inc. +# +# Authors: Paul E. McKenney <paulmck@kernel.org> + +jitter_n=$1 +if test -z "$jitter_n" +then + echo jitterstart.sh: Missing count of jitter.sh scripts to start. + exit 33 +fi +jittering_dir=$2 +if test -z "$jittering_dir" +then + echo jitterstart.sh: Missing directory in which to place jittering file. + exit 34 +fi +shift +shift + +touch ${jittering_dir}/jittering +for ((jitter_i = 1; jitter_i <= $jitter_n; jitter_i++)) +do + jitter.sh $jitter_i "${jittering_dir}/jittering" "$@" & +done diff --git a/tools/testing/selftests/rcutorture/bin/jitterstop.sh b/tools/testing/selftests/rcutorture/bin/jitterstop.sh new file mode 100644 index 000000000000..576a4cf4b79a --- /dev/null +++ b/tools/testing/selftests/rcutorture/bin/jitterstop.sh @@ -0,0 +1,23 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ +# +# Remove the "jittering" file, signaling the jitter.sh scripts to stop, +# then wait for them to terminate. +# +# Usage: . jitterstop.sh jittering-dir +# +# jittering-dir: Directory containing "jittering" file. +# +# Copyright (C) 2021 Facebook, Inc. +# +# Authors: Paul E. McKenney <paulmck@kernel.org> + +jittering_dir=$1 +if test -z "$jittering_dir" +then + echo jitterstop.sh: Missing directory in which to place jittering file. + exit 34 +fi + +rm -f ${jittering_dir}/jittering +wait diff --git a/tools/testing/selftests/rcutorture/bin/kvm-again.sh b/tools/testing/selftests/rcutorture/bin/kvm-again.sh new file mode 100755 index 000000000000..46e47a00a7db --- /dev/null +++ b/tools/testing/selftests/rcutorture/bin/kvm-again.sh @@ -0,0 +1,199 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ +# +# Rerun a series of tests under KVM. +# +# Usage: kvm-again.sh /path/to/old/run [ options ] +# +# Copyright (C) 2021 Facebook, Inc. +# +# Authors: Paul E. McKenney <paulmck@kernel.org> + +scriptname=$0 +args="$*" + +T=${TMPDIR-/tmp}/kvm-again.sh.$$ +trap 'rm -rf $T' 0 +mkdir $T + +if ! test -d tools/testing/selftests/rcutorture/bin +then + echo $scriptname must be run from top-level directory of kernel source tree. + exit 1 +fi + +oldrun=$1 +shift +if ! test -d "$oldrun" +then + echo "Usage: $scriptname /path/to/old/run [ options ]" + exit 1 +fi +if ! cp "$oldrun/batches" $T/batches.oldrun +then + # Later on, can reconstitute this from console.log files. + echo Prior run batches file does not exist: $oldrun/batches + exit 1 +fi + +if test -f "$oldrun/torture_suite" +then + torture_suite="`cat $oldrun/torture_suite`" +elif test -f "$oldrun/TORTURE_SUITE" +then + torture_suite="`cat $oldrun/TORTURE_SUITE`" +else + echo "Prior run torture_suite file does not exist: $oldrun/{torture_suite,TORTURE_SUITE}" + exit 1 +fi + +KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM +PATH=${KVM}/bin:$PATH; export PATH +. functions.sh + +dryrun= +dur= +default_link="cp -R" +rundir="`pwd`/tools/testing/selftests/rcutorture/res/`date +%Y.%m.%d-%H.%M.%S-again`" + +startdate="`date`" +starttime="`get_starttime`" + +usage () { + echo "Usage: $scriptname $oldrun [ arguments ]:" + echo " --dryrun" + echo " --duration minutes | <seconds>s | <hours>h | <days>d" + echo " --link hard|soft|copy" + echo " --remote" + echo " --rundir /new/res/path" + exit 1 +} + +while test $# -gt 0 +do + case "$1" in + --dryrun) + dryrun=1 + ;; + --duration) + checkarg --duration "(minutes)" $# "$2" '^[0-9][0-9]*\(s\|m\|h\|d\|\)$' '^error' + mult=60 + if echo "$2" | grep -q 's$' + then + mult=1 + elif echo "$2" | grep -q 'h$' + then + mult=3600 + elif echo "$2" | grep -q 'd$' + then + mult=86400 + fi + ts=`echo $2 | sed -e 's/[smhd]$//'` + dur=$(($ts*mult)) + shift + ;; + --link) + checkarg --link "hard|soft|copy" "$#" "$2" 'hard\|soft\|copy' '^--' + case "$2" in + copy) + arg_link="cp -R" + ;; + hard) + arg_link="cp -Rl" + ;; + soft) + arg_link="cp -Rs" + ;; + esac + shift + ;; + --remote) + arg_remote=1 + default_link="cp -as" + ;; + --rundir) + checkarg --rundir "(absolute pathname)" "$#" "$2" '^/' '^error' + rundir=$2 + if test -e "$rundir" + then + echo "--rundir $2: Already exists." + usage + fi + shift + ;; + *) + echo Unknown argument $1 + usage + ;; + esac + shift +done +if test -z "$arg_link" +then + arg_link="$default_link" +fi + +echo ---- Re-run results directory: $rundir + +# Copy old run directory tree over and adjust. +mkdir -p "`dirname "$rundir"`" +if ! $arg_link "$oldrun" "$rundir" +then + echo "Cannot copy from $oldrun to $rundir." + usage +fi +rm -f "$rundir"/*/{console.log,console.log.diags,qemu_pid,qemu-retval,Warnings,kvm-test-1-run.sh.out,kvm-test-1-run-qemu.sh.out,vmlinux} "$rundir"/log +echo $oldrun > "$rundir/re-run" +if ! test -d "$rundir/../../bin" +then + $arg_link "$oldrun/../../bin" "$rundir/../.." +fi +for i in $rundir/*/qemu-cmd +do + cp "$i" $T + qemu_cmd_dir="`dirname "$i"`" + kernel_dir="`echo $qemu_cmd_dir | sed -e 's/\.[0-9]\+$//'`" + jitter_dir="`dirname "$kernel_dir"`" + kvm-transform.sh "$kernel_dir/bzImage" "$qemu_cmd_dir/console.log" "$jitter_dir" $dur < $T/qemu-cmd > $i + if test -n "$arg_remote" + then + echo "# TORTURE_KCONFIG_GDB_ARG=''" >> $i + fi +done + +# Extract settings from the last qemu-cmd file transformed above. +grep '^#' $i | sed -e 's/^# //' > $T/qemu-cmd-settings +. $T/qemu-cmd-settings + +grep -v '^#' $T/batches.oldrun | awk ' +BEGIN { + oldbatch = 1; +} + +{ + if (oldbatch != $1) { + print "kvm-test-1-run-batch.sh" curbatch; + curbatch = ""; + oldbatch = $1; + } + curbatch = curbatch " " $2; +} + +END { + print "kvm-test-1-run-batch.sh" curbatch +}' > $T/runbatches.sh + +if test -n "$dryrun" +then + echo ---- Dryrun complete, directory: $rundir | tee -a "$rundir/log" +else + ( cd "$rundir"; sh $T/runbatches.sh ) + kcsan-collapse.sh "$rundir" | tee -a "$rundir/log" + echo | tee -a "$rundir/log" + echo ---- Results directory: $rundir | tee -a "$rundir/log" + kvm-recheck.sh "$rundir" > $T/kvm-recheck.sh.out 2>&1 + ret=$? + cat $T/kvm-recheck.sh.out | tee -a "$rundir/log" + echo " --- Done at `date` (`get_starttime_duration $starttime`) exitcode $ret" | tee -a "$rundir/log" + exit $ret +fi diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh index 47cf4db10896..e01b31b87044 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh @@ -30,7 +30,7 @@ do resdir=`echo $i | sed -e 's,/$,,' -e 's,/[^/]*$,,'` head -1 $resdir/log fi - TORTURE_SUITE="`cat $i/../TORTURE_SUITE`" + TORTURE_SUITE="`cat $i/../torture_suite`" configfile=`echo $i | sed -e 's,^.*/,,'` rm -f $i/console.log.*.diags kvm-recheck-${TORTURE_SUITE}.sh $i diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh new file mode 100755 index 000000000000..7ea0809e229e --- /dev/null +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh @@ -0,0 +1,67 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ +# +# Carry out a kvm-based run for the specified batch of scenarios, which +# might have been built by --build-only kvm.sh run. +# +# Usage: kvm-test-1-run-batch.sh SCENARIO [ SCENARIO ... ] +# +# Each SCENARIO is the name of a directory in the current directory +# containing a ready-to-run qemu-cmd file. +# +# Copyright (C) 2021 Facebook, Inc. +# +# Authors: Paul E. McKenney <paulmck@kernel.org> + +T=${TMPDIR-/tmp}/kvm-test-1-run-batch.sh.$$ +trap 'rm -rf $T' 0 +mkdir $T + +echo ---- Running batch $* +# Check arguments +runfiles= +for i in "$@" +do + if ! echo $i | grep -q '^[^/.a-z]\+\(\.[0-9]\+\)\?$' + then + echo Bad scenario name: \"$i\" 1>&2 + exit 1 + fi + if ! test -d "$i" + then + echo Scenario name not a directory: \"$i\" 1>&2 + exit 2 + fi + if ! test -f "$i/qemu-cmd" + then + echo Scenario lacks a command file: \"$i/qemu-cmd\" 1>&2 + exit 3 + fi + rm -f $i/build.* + touch $i/build.run + runfiles="$runfiles $i/build.run" +done + +# Extract settings from the qemu-cmd file. +grep '^#' $1/qemu-cmd | sed -e 's/^# //' > $T/qemu-cmd-settings +. $T/qemu-cmd-settings + +# Start up jitter, start each scenario, wait, end jitter. +echo ---- System running test: `uname -a` +echo ---- Starting kernels. `date` | tee -a log +$TORTURE_JITTER_START +for i in "$@" +do + echo ---- System running test: `uname -a` > $i/kvm-test-1-run-qemu.sh.out + echo > $i/kvm-test-1-run-qemu.sh.out + kvm-test-1-run-qemu.sh $i >> $i/kvm-test-1-run-qemu.sh.out 2>&1 & +done +for i in $runfiles +do + while ls $i > /dev/null 2>&1 + do + : + done +done +echo ---- All kernel runs complete. `date` | tee -a log +$TORTURE_JITTER_STOP diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh new file mode 100755 index 000000000000..5b1aa2a4f3f6 --- /dev/null +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh @@ -0,0 +1,176 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ +# +# Carry out a kvm-based run for the specified qemu-cmd file, which might +# have been generated by --build-only kvm.sh run. +# +# Usage: kvm-test-1-run-qemu.sh qemu-cmd-dir +# +# qemu-cmd-dir provides the directory containing qemu-cmd file. +# This is assumed to be of the form prefix/ds/scenario, where +# "ds" is the top-level date-stamped directory and "scenario" +# is the scenario name. Any required adjustments to this file +# must have been made by the caller. The shell-command comments +# at the end of the qemu-cmd file are not optional. +# +# Copyright (C) 2021 Facebook, Inc. +# +# Authors: Paul E. McKenney <paulmck@kernel.org> + +T=${TMPDIR-/tmp}/kvm-test-1-run-qemu.sh.$$ +trap 'rm -rf $T' 0 +mkdir $T + +resdir="$1" +if ! test -d "$resdir" +then + echo $0: Nonexistent directory: $resdir + exit 1 +fi +if ! test -f "$resdir/qemu-cmd" +then + echo $0: Nonexistent qemu-cmd file: $resdir/qemu-cmd + exit 1 +fi + +echo ' ---' `date`: Starting kernel, PID $$ + +# Obtain settings from the qemu-cmd file. +grep '^#' $resdir/qemu-cmd | sed -e 's/^# //' > $T/qemu-cmd-settings +. $T/qemu-cmd-settings + +# Decorate qemu-cmd with redirection, backgrounding, and PID capture +sed -e 's/$/ 2>\&1 \&/' < $resdir/qemu-cmd > $T/qemu-cmd +echo 'echo $! > $resdir/qemu_pid' >> $T/qemu-cmd + +# In case qemu refuses to run... +echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log + +# Attempt to run qemu +kstarttime=`gawk 'BEGIN { print systime() }' < /dev/null` +( . $T/qemu-cmd; wait `cat $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) & +commandcompleted=0 +if test -z "$TORTURE_KCONFIG_GDB_ARG" +then + sleep 10 # Give qemu's pid a chance to reach the file + if test -s "$resdir/qemu_pid" + then + qemu_pid=`cat "$resdir/qemu_pid"` + echo Monitoring qemu job at pid $qemu_pid + else + qemu_pid="" + echo Monitoring qemu job at yet-as-unknown pid + fi +fi +if test -n "$TORTURE_KCONFIG_GDB_ARG" +then + base_resdir=`echo $resdir | sed -e 's/\.[0-9]\+$//'` + if ! test -f $base_resdir/vmlinux + then + base_resdir="`cat re-run`/$resdir" + if ! test -f $base_resdir/vmlinux + then + base_resdir=/path/to + fi + fi + echo Waiting for you to attach a debug session, for example: > /dev/tty + echo " gdb $base_resdir/vmlinux" > /dev/tty + echo 'After symbols load and the "(gdb)" prompt appears:' > /dev/tty + echo " target remote :1234" > /dev/tty + echo " continue" > /dev/tty + kstarttime=`gawk 'BEGIN { print systime() }' < /dev/null` +fi +while : +do + if test -z "$qemu_pid" -a -s "$resdir/qemu_pid" + then + qemu_pid=`cat "$resdir/qemu_pid"` + fi + kruntime=`gawk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null` + if test -z "$qemu_pid" || kill -0 "$qemu_pid" > /dev/null 2>&1 + then + if test -n "$TORTURE_KCONFIG_GDB_ARG" + then + : + elif test $kruntime -ge $seconds || test -f "$resdir/../STOP.1" + then + break; + fi + sleep 1 + else + commandcompleted=1 + if test $kruntime -lt $seconds + then + echo Completed in $kruntime vs. $seconds >> $resdir/Warnings 2>&1 + grep "^(qemu) qemu:" $resdir/kvm-test-1-run.sh.out >> $resdir/Warnings 2>&1 + killpid="`sed -n "s/^(qemu) qemu: terminating on signal [0-9]* from pid \([0-9]*\).*$/\1/p" $resdir/Warnings`" + if test -n "$killpid" + then + echo "ps -fp $killpid" >> $resdir/Warnings 2>&1 + ps -fp $killpid >> $resdir/Warnings 2>&1 + fi + else + echo ' ---' `date`: "Kernel done" + fi + break + fi +done +if test -z "$qemu_pid" -a -s "$resdir/qemu_pid" +then + qemu_pid=`cat "$resdir/qemu_pid"` +fi +if test $commandcompleted -eq 0 -a -n "$qemu_pid" +then + if ! test -f "$resdir/../STOP.1" + then + echo Grace period for qemu job at pid $qemu_pid + fi + oldline="`tail $resdir/console.log`" + while : + do + if test -f "$resdir/../STOP.1" + then + echo "PID $qemu_pid killed due to run STOP.1 request" >> $resdir/Warnings 2>&1 + kill -KILL $qemu_pid + break + fi + kruntime=`gawk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null` + if kill -0 $qemu_pid > /dev/null 2>&1 + then + : + else + break + fi + must_continue=no + newline="`tail $resdir/console.log`" + if test "$newline" != "$oldline" && echo $newline | grep -q ' [0-9]\+us : ' + then + must_continue=yes + fi + last_ts="`tail $resdir/console.log | grep '^\[ *[0-9]\+\.[0-9]\+]' | tail -1 | sed -e 's/^\[ *//' -e 's/\..*$//'`" + if test -z "$last_ts" + then + last_ts=0 + fi + if test "$newline" != "$oldline" -a "$last_ts" -lt $((seconds + $TORTURE_SHUTDOWN_GRACE)) + then + must_continue=yes + fi + if test $must_continue = no -a $kruntime -ge $((seconds + $TORTURE_SHUTDOWN_GRACE)) + then + echo "!!! PID $qemu_pid hung at $kruntime vs. $seconds seconds" >> $resdir/Warnings 2>&1 + kill -KILL $qemu_pid + break + fi + oldline=$newline + sleep 10 + done +elif test -z "$qemu_pid" +then + echo Unknown PID, cannot kill qemu command +fi + +# Tell the script that this run is done. +rm -f $resdir/build.run + +parse-console.sh $resdir/console.log $title diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh index 536d103ef166..420ed5ce9d32 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh @@ -7,15 +7,15 @@ # Execute this in the source tree. Do not run it as a background task # because qemu does not seem to like that much. # -# Usage: kvm-test-1-run.sh config builddir resdir seconds qemu-args boot_args +# Usage: kvm-test-1-run.sh config resdir seconds qemu-args boot_args_in # # qemu-args defaults to "-enable-kvm -nographic", along with arguments # specifying the number of CPUs and other options # generated from the underlying CPU architecture. -# boot_args defaults to value returned by the per_version_boot_params +# boot_args_in defaults to value returned by the per_version_boot_params # shell function. # -# Anything you specify for either qemu-args or boot_args is appended to +# Anything you specify for either qemu-args or boot_args_in is appended to # the default values. The "-smp" value is deduced from the contents of # the config fragment. # @@ -35,14 +35,13 @@ mkdir $T config_template=${1} config_dir=`echo $config_template | sed -e 's,/[^/]*$,,'` title=`echo $config_template | sed -e 's/^.*\///'` -builddir=${2} -resdir=${3} +resdir=${2} if test -z "$resdir" -o ! -d "$resdir" -o ! -w "$resdir" then echo "kvm-test-1-run.sh :$resdir: Not a writable directory, cannot store results into it" exit 1 fi -echo ' ---' `date`: Starting build +echo ' ---' `date`: Starting build, PID $$ echo ' ---' Kconfig fragment at: $config_template >> $resdir/log touch $resdir/ConfigFragment.input @@ -73,7 +72,7 @@ config_override_param "--kconfig argument" KcList "$TORTURE_KCONFIG_ARG" cp $T/KcList $resdir/ConfigFragment base_resdir=`echo $resdir | sed -e 's/\.[0-9]\+$//'` -if test "$base_resdir" != "$resdir" -a -f $base_resdir/bzImage -a -f $base_resdir/vmlinux +if test "$base_resdir" != "$resdir" && test -f $base_resdir/bzImage && test -f $base_resdir/vmlinux then # Rerunning previous test, so use that test's kernel. QEMU="`identify_qemu $base_resdir/vmlinux`" @@ -83,6 +82,17 @@ then ln -s $base_resdir/.config $resdir # for kvm-recheck.sh # Arch-independent indicator touch $resdir/builtkernel +elif test "$base_resdir" != "$resdir" +then + # Rerunning previous test for which build failed + ln -s $base_resdir/Make*.out $resdir # for kvm-recheck.sh + ln -s $base_resdir/.config $resdir # for kvm-recheck.sh + echo Initial build failed, not running KVM, see $resdir. + if test -f $resdir/build.wait + then + mv $resdir/build.wait $resdir/build.ready + fi + exit 1 elif kvm-build.sh $T/KcList $resdir then # Had to build a kernel for this test. @@ -107,23 +117,23 @@ else # Build failed. cp .config $resdir || : echo Build failed, not running KVM, see $resdir. - if test -f $builddir.wait + if test -f $resdir/build.wait then - mv $builddir.wait $builddir.ready + mv $resdir/build.wait $resdir/build.ready fi exit 1 fi -if test -f $builddir.wait +if test -f $resdir/build.wait then - mv $builddir.wait $builddir.ready + mv $resdir/build.wait $resdir/build.ready fi -while test -f $builddir.ready +while test -f $resdir/build.ready do sleep 1 done -seconds=$4 -qemu_args=$5 -boot_args=$6 +seconds=$3 +qemu_args=$4 +boot_args_in=$5 if test -z "$TORTURE_BUILDONLY" then @@ -133,7 +143,7 @@ fi # Generate -smp qemu argument. qemu_args="-enable-kvm -nographic $qemu_args" cpu_count=`configNR_CPUS.sh $resdir/ConfigFragment` -cpu_count=`configfrag_boot_cpus "$boot_args" "$config_template" "$cpu_count"` +cpu_count=`configfrag_boot_cpus "$boot_args_in" "$config_template" "$cpu_count"` if test "$cpu_count" -gt "$TORTURE_ALLOTED_CPUS" then echo CPU count limited from $cpu_count to $TORTURE_ALLOTED_CPUS | tee -a $resdir/Warnings @@ -149,16 +159,52 @@ qemu_args="$qemu_args `identify_qemu_args "$QEMU" "$resdir/console.log"`" qemu_append="`identify_qemu_append "$QEMU"`" # Pull in Kconfig-fragment boot parameters -boot_args="`configfrag_boot_params "$boot_args" "$config_template"`" +boot_args="`configfrag_boot_params "$boot_args_in" "$config_template"`" # Generate kernel-version-specific boot parameters boot_args="`per_version_boot_params "$boot_args" $resdir/.config $seconds`" if test -n "$TORTURE_BOOT_GDB_ARG" then boot_args="$boot_args $TORTURE_BOOT_GDB_ARG" fi + +# Give bare-metal advice +modprobe_args="`echo $boot_args | tr -s ' ' '\012' | grep "^$TORTURE_MOD\." | sed -e "s/$TORTURE_MOD\.//g"`" +kboot_args="`echo $boot_args | tr -s ' ' '\012' | grep -v "^$TORTURE_MOD\."`" +testid_txt="`dirname $resdir`/testid.txt" +touch $resdir/bare-metal +echo To run this scenario on bare metal: >> $resdir/bare-metal +echo >> $resdir/bare-metal +echo " 1." Set your bare-metal build tree to the state shown in this file: >> $resdir/bare-metal +echo " " $testid_txt >> $resdir/bare-metal +echo " 2." Update your bare-metal build tree"'"s .config based on this file: >> $resdir/bare-metal +echo " " $resdir/ConfigFragment >> $resdir/bare-metal +echo " 3." Make the bare-metal kernel"'"s build system aware of your .config updates: >> $resdir/bare-metal +echo " " $ 'yes "" | make oldconfig' >> $resdir/bare-metal +echo " 4." Build your bare-metal kernel. >> $resdir/bare-metal +echo " 5." Boot your bare-metal kernel with the following parameters: >> $resdir/bare-metal +echo " " $kboot_args >> $resdir/bare-metal +echo " 6." Start the test with the following command: >> $resdir/bare-metal +echo " " $ modprobe $TORTURE_MOD $modprobe_args >> $resdir/bare-metal +echo " 7." After some time, end the test with the following command: >> $resdir/bare-metal +echo " " $ rmmod $TORTURE_MOD >> $resdir/bare-metal +echo " 8." Copy your bare-metal kernel"'"s .config file, overwriting this file: >> $resdir/bare-metal +echo " " $resdir/.config >> $resdir/bare-metal +echo " 9." Copy the console output from just before the modprobe to just after >> $resdir/bare-metal +echo " " the rmmod into this file: >> $resdir/bare-metal +echo " " $resdir/console.log >> $resdir/bare-metal +echo "10." Check for runtime errors using the following command: >> $resdir/bare-metal +echo " " $ tools/testing/selftests/rcutorture/bin/kvm-recheck.sh `dirname $resdir` >> $resdir/bare-metal +echo >> $resdir/bare-metal +echo Some of the above steps may be skipped if you build your bare-metal >> $resdir/bare-metal +echo kernel here: `head -n 1 $testid_txt | sed -e 's/^Build directory: //'` >> $resdir/bare-metal + echo $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append \"$qemu_append $boot_args\" $TORTURE_QEMU_GDB_ARG > $resdir/qemu-cmd echo "# TORTURE_SHUTDOWN_GRACE=$TORTURE_SHUTDOWN_GRACE" >> $resdir/qemu-cmd echo "# seconds=$seconds" >> $resdir/qemu-cmd +echo "# TORTURE_KCONFIG_GDB_ARG=\"$TORTURE_KCONFIG_GDB_ARG\"" >> $resdir/qemu-cmd +echo "# TORTURE_JITTER_START=\"$TORTURE_JITTER_START\"" >> $resdir/qemu-cmd +echo "# TORTURE_JITTER_STOP=\"$TORTURE_JITTER_STOP\"" >> $resdir/qemu-cmd +echo "# TORTURE_TRUST_MAKE=\"$TORTURE_TRUST_MAKE\"; export TORTURE_TRUST_MAKE" >> $resdir/qemu-cmd if test -n "$TORTURE_BUILDONLY" then @@ -167,140 +213,4 @@ then exit 0 fi -# Decorate qemu-cmd with redirection, backgrounding, and PID capture -sed -e 's/$/ 2>\&1 \&/' < $resdir/qemu-cmd > $T/qemu-cmd -echo 'echo $! > $resdir/qemu_pid' >> $T/qemu-cmd - -# In case qemu refuses to run... -echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log - -# Attempt to run qemu -kstarttime=`gawk 'BEGIN { print systime() }' < /dev/null` -( . $T/qemu-cmd; wait `cat $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) & -commandcompleted=0 -if test -z "$TORTURE_KCONFIG_GDB_ARG" -then - sleep 10 # Give qemu's pid a chance to reach the file - if test -s "$resdir/qemu_pid" - then - qemu_pid=`cat "$resdir/qemu_pid"` - echo Monitoring qemu job at pid $qemu_pid - else - qemu_pid="" - echo Monitoring qemu job at yet-as-unknown pid - fi -fi -if test -n "$TORTURE_KCONFIG_GDB_ARG" -then - echo Waiting for you to attach a debug session, for example: > /dev/tty - echo " gdb $base_resdir/vmlinux" > /dev/tty - echo 'After symbols load and the "(gdb)" prompt appears:' > /dev/tty - echo " target remote :1234" > /dev/tty - echo " continue" > /dev/tty - kstarttime=`gawk 'BEGIN { print systime() }' < /dev/null` -fi -while : -do - if test -z "$qemu_pid" -a -s "$resdir/qemu_pid" - then - qemu_pid=`cat "$resdir/qemu_pid"` - fi - kruntime=`gawk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null` - if test -z "$qemu_pid" || kill -0 "$qemu_pid" > /dev/null 2>&1 - then - if test -n "$TORTURE_KCONFIG_GDB_ARG" - then - : - elif test $kruntime -ge $seconds || test -f "$resdir/../STOP.1" - then - break; - fi - sleep 1 - else - commandcompleted=1 - if test $kruntime -lt $seconds - then - echo Completed in $kruntime vs. $seconds >> $resdir/Warnings 2>&1 - grep "^(qemu) qemu:" $resdir/kvm-test-1-run.sh.out >> $resdir/Warnings 2>&1 - killpid="`sed -n "s/^(qemu) qemu: terminating on signal [0-9]* from pid \([0-9]*\).*$/\1/p" $resdir/Warnings`" - if test -n "$killpid" - then - echo "ps -fp $killpid" >> $resdir/Warnings 2>&1 - ps -fp $killpid >> $resdir/Warnings 2>&1 - fi - # Reduce probability of PID reuse by allowing a one-minute buffer - if test $((kruntime + 60)) -lt $seconds && test -s "$resdir/../jitter_pids" - then - awk < "$resdir/../jitter_pids" ' - NF > 0 { - pidlist = pidlist " " $1; - n++; - } - END { - if (n > 0) { - print "kill " pidlist; - } - }' | sh - fi - else - echo ' ---' `date`: "Kernel done" - fi - break - fi -done -if test -z "$qemu_pid" -a -s "$resdir/qemu_pid" -then - qemu_pid=`cat "$resdir/qemu_pid"` -fi -if test $commandcompleted -eq 0 -a -n "$qemu_pid" -then - if ! test -f "$resdir/../STOP.1" - then - echo Grace period for qemu job at pid $qemu_pid - fi - oldline="`tail $resdir/console.log`" - while : - do - if test -f "$resdir/../STOP.1" - then - echo "PID $qemu_pid killed due to run STOP.1 request" >> $resdir/Warnings 2>&1 - kill -KILL $qemu_pid - break - fi - kruntime=`gawk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null` - if kill -0 $qemu_pid > /dev/null 2>&1 - then - : - else - break - fi - must_continue=no - newline="`tail $resdir/console.log`" - if test "$newline" != "$oldline" && echo $newline | grep -q ' [0-9]\+us : ' - then - must_continue=yes - fi - last_ts="`tail $resdir/console.log | grep '^\[ *[0-9]\+\.[0-9]\+]' | tail -1 | sed -e 's/^\[ *//' -e 's/\..*$//'`" - if test -z "$last_ts" - then - last_ts=0 - fi - if test "$newline" != "$oldline" -a "$last_ts" -lt $((seconds + $TORTURE_SHUTDOWN_GRACE)) - then - must_continue=yes - fi - if test $must_continue = no -a $kruntime -ge $((seconds + $TORTURE_SHUTDOWN_GRACE)) - then - echo "!!! PID $qemu_pid hung at $kruntime vs. $seconds seconds" >> $resdir/Warnings 2>&1 - kill -KILL $qemu_pid - break - fi - oldline=$newline - sleep 10 - done -elif test -z "$qemu_pid" -then - echo Unknown PID, cannot kill qemu command -fi - -parse-console.sh $resdir/console.log $title +kvm-test-1-run-qemu.sh $resdir diff --git a/tools/testing/selftests/rcutorture/bin/kvm-transform.sh b/tools/testing/selftests/rcutorture/bin/kvm-transform.sh index c45a953ef393..d40b4e60a50c 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-transform.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-transform.sh @@ -3,7 +3,7 @@ # # Transform a qemu-cmd file to allow reuse. # -# Usage: kvm-transform.sh bzImage console.log < qemu-cmd-in > qemu-cmd-out +# Usage: kvm-transform.sh bzImage console.log jitter_dir [ seconds ] < qemu-cmd-in > qemu-cmd-out # # bzImage: Kernel and initrd from the same prior kvm.sh run. # console.log: File into which to place console output. @@ -29,20 +29,62 @@ then echo "Need console log file name." exit 1 fi +jitter_dir="$3" +if test -z "$jitter_dir" || ! test -d "$jitter_dir" +then + echo "Need valid jitter directory: '$jitter_dir'" + exit 1 +fi +seconds="$4" +if test -n "$seconds" && echo $seconds | grep -q '[^0-9]' +then + echo "Invalid duration, should be numeric in seconds: '$seconds'" + exit 1 +fi + +awk -v image="$image" -v consolelog="$consolelog" -v jitter_dir="$jitter_dir" \ + -v seconds="$seconds" ' +/^# seconds=/ { + if (seconds == "") + print $0; + else + print "# seconds=" seconds; + next; +} + +/^# TORTURE_JITTER_START=/ { + print "# TORTURE_JITTER_START=\". jitterstart.sh " $4 " " jitter_dir " " $6 " " $7; + next; +} + +/^# TORTURE_JITTER_STOP=/ { + print "# TORTURE_JITTER_STOP=\". jitterstop.sh " " " jitter_dir " " $5; + next; +} + +/^#/ { + print $0; + next; +} -awk -v image="$image" -v consolelog="$consolelog" ' { line = ""; for (i = 1; i <= NF; i++) { - if (line == "") + if ("" seconds != "" && $i ~ /\.shutdown_secs=[0-9]*$/) { + sub(/[0-9]*$/, seconds, $i); + if (line == "") + line = $i; + else + line = line " " $i; + } else if (line == "") { line = $i; - else + } else { line = line " " $i; + } if ($i == "-serial") { i++; line = line " file:" consolelog; - } - if ($i == "-kernel") { + } else if ($i == "-kernel") { i++; line = line " " image; } diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 8d3c99b35e06..6bf00a003d3d 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -29,17 +29,21 @@ PATH=${KVM}/bin:$PATH; export PATH TORTURE_ALLOTED_CPUS="`identify_qemu_vcpus`" TORTURE_DEFCONFIG=defconfig TORTURE_BOOT_IMAGE="" +TORTURE_BUILDONLY= TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD TORTURE_KCONFIG_ARG="" TORTURE_KCONFIG_GDB_ARG="" TORTURE_BOOT_GDB_ARG="" TORTURE_QEMU_GDB_ARG="" +TORTURE_JITTER_START="" +TORTURE_JITTER_STOP="" TORTURE_KCONFIG_KASAN_ARG="" TORTURE_KCONFIG_KCSAN_ARG="" TORTURE_KMAKE_ARG="" TORTURE_QEMU_MEM=512 TORTURE_SHUTDOWN_GRACE=180 TORTURE_SUITE=rcu +TORTURE_MOD=rcutorture TORTURE_TRUST_MAKE="" resdir="" configs="" @@ -100,7 +104,7 @@ do TORTURE_BUILDONLY=1 ;; --configs|--config) - checkarg --configs "(list of config files)" "$#" "$2" '^[^/]\+$' '^--' + checkarg --configs "(list of config files)" "$#" "$2" '^[^/.a-z]\+$' '^--' configs="$configs $2" shift ;; @@ -116,7 +120,7 @@ do shift ;; --datestamp) - checkarg --datestamp "(relative pathname)" "$#" "$2" '^[a-zA-Z0-9._-/]*$' '^--' + checkarg --datestamp "(relative pathname)" "$#" "$2" '^[a-zA-Z0-9._/-]*$' '^--' ds=$2 shift ;; @@ -215,6 +219,7 @@ do --torture) checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuscale\|refscale\|scf\)$' '^--' TORTURE_SUITE=$2 + TORTURE_MOD="`echo $TORTURE_SUITE | sed -e 's/^\(lock\|rcu\|scf\)$/\1torture/'`" shift if test "$TORTURE_SUITE" = rcuscale || test "$TORTURE_SUITE" = refscale then @@ -381,6 +386,7 @@ TORTURE_QEMU_GDB_ARG="$TORTURE_QEMU_GDB_ARG"; export TORTURE_QEMU_GDB_ARG TORTURE_KCONFIG_KASAN_ARG="$TORTURE_KCONFIG_KASAN_ARG"; export TORTURE_KCONFIG_KASAN_ARG TORTURE_KCONFIG_KCSAN_ARG="$TORTURE_KCONFIG_KCSAN_ARG"; export TORTURE_KCONFIG_KCSAN_ARG TORTURE_KMAKE_ARG="$TORTURE_KMAKE_ARG"; export TORTURE_KMAKE_ARG +TORTURE_MOD="$TORTURE_MOD"; export TORTURE_MOD TORTURE_QEMU_CMD="$TORTURE_QEMU_CMD"; export TORTURE_QEMU_CMD TORTURE_QEMU_INTERACTIVE="$TORTURE_QEMU_INTERACTIVE"; export TORTURE_QEMU_INTERACTIVE TORTURE_QEMU_MAC="$TORTURE_QEMU_MAC"; export TORTURE_QEMU_MAC @@ -399,12 +405,17 @@ echo Results directory: $resdir/$ds echo $scriptname $args touch $resdir/$ds/log echo $scriptname $args >> $resdir/$ds/log -echo ${TORTURE_SUITE} > $resdir/$ds/TORTURE_SUITE -pwd > $resdir/$ds/testid.txt +echo ${TORTURE_SUITE} > $resdir/$ds/torture_suite +echo Build directory: `pwd` > $resdir/$ds/testid.txt if test -d .git then + echo Current commit: `git rev-parse HEAD` >> $resdir/$ds/testid.txt + echo >> $resdir/$ds/testid.txt + echo ' ---' Output of "'"git status"'": >> $resdir/$ds/testid.txt git status >> $resdir/$ds/testid.txt - git rev-parse HEAD >> $resdir/$ds/testid.txt + echo >> $resdir/$ds/testid.txt + echo >> $resdir/$ds/testid.txt + echo ' ---' Output of "'"git diff HEAD"'": >> $resdir/$ds/testid.txt git diff HEAD >> $resdir/$ds/testid.txt fi ___EOF___ @@ -434,8 +445,17 @@ function dump(first, pastlast, batchnum) print "echo ----Start batch " batchnum ": `date` | tee -a " rd "log"; print "needqemurun=" jn=1 + njitter = 0; + split(jitter, ja); + if (ja[1] == -1 && ncpus == 0) + njitter = 1; + else if (ja[1] == -1) + njitter = ncpus; + else + njitter = ja[1]; + print "TORTURE_JITTER_START=\". jitterstart.sh " njitter " " rd " " dur " " ja[2] " " ja[3] "\"; export TORTURE_JITTER_START"; + print "TORTURE_JITTER_STOP=\". jitterstop.sh " rd " \"; export TORTURE_JITTER_STOP" for (j = first; j < pastlast; j++) { - builddir=KVM "/b" j - first + 1 cpusr[jn] = cpus[j]; if (cfrep[cf[j]] == "") { cfr[jn] = cf[j]; @@ -444,15 +464,15 @@ function dump(first, pastlast, batchnum) cfrep[cf[j]]++; cfr[jn] = cf[j] "." cfrep[cf[j]]; } + builddir=rd cfr[jn] "/build"; if (cpusr[jn] > ncpus && ncpus != 0) ovf = "-ovf"; else ovf = ""; print "echo ", cfr[jn], cpusr[jn] ovf ": Starting build. `date` | tee -a " rd "log"; - print "rm -f " builddir ".*"; - print "touch " builddir ".wait"; print "mkdir " rd cfr[jn] " || :"; - print "kvm-test-1-run.sh " CONFIGDIR cf[j], builddir, rd cfr[jn], dur " \"" TORTURE_QEMU_ARG "\" \"" TORTURE_BOOTARGS "\" > " rd cfr[jn] "/kvm-test-1-run.sh.out 2>&1 &" + print "touch " builddir ".wait"; + print "kvm-test-1-run.sh " CONFIGDIR cf[j], rd cfr[jn], dur " \"" TORTURE_QEMU_ARG "\" \"" TORTURE_BOOTARGS "\" > " rd cfr[jn] "/kvm-test-1-run.sh.out 2>&1 &" print "echo ", cfr[jn], cpusr[jn] ovf ": Waiting for build to complete. `date` | tee -a " rd "log"; print "while test -f " builddir ".wait" print "do" @@ -461,23 +481,21 @@ function dump(first, pastlast, batchnum) print "echo ", cfr[jn], cpusr[jn] ovf ": Build complete. `date` | tee -a " rd "log"; jn++; } + print "runfiles=" for (j = 1; j < jn; j++) { - builddir=KVM "/b" j - print "rm -f " builddir ".ready" + builddir=rd cfr[j] "/build"; + if (TORTURE_BUILDONLY) + print "rm -f " builddir ".ready" + else + print "mv " builddir ".ready " builddir ".run" + print "runfiles=\"$runfiles " builddir ".run\"" + fi print "if test -f \"" rd cfr[j] "/builtkernel\"" print "then" print "\techo ----", cfr[j], cpusr[j] ovf ": Kernel present. `date` | tee -a " rd "log"; print "\tneedqemurun=1" print "fi" } - njitter = 0; - split(jitter, ja); - if (ja[1] == -1 && ncpus == 0) - njitter = 1; - else if (ja[1] == -1) - njitter = ncpus; - else - njitter = ja[1]; if (TORTURE_BUILDONLY && njitter != 0) { njitter = 0; print "echo Build-only run, so suppressing jitter | tee -a " rd "log" @@ -488,19 +506,18 @@ function dump(first, pastlast, batchnum) print "if test -n \"$needqemurun\"" print "then" print "\techo ---- Starting kernels. `date` | tee -a " rd "log"; - print "\techo > " rd "jitter_pids" - for (j = 0; j < njitter; j++) { - print "\tjitter.sh " j " " dur " " ja[2] " " ja[3] "&" - print "\techo $! >> " rd "jitter_pids" - } - print "\twait" + print "\t$TORTURE_JITTER_START"; + print "\twhile ls $runfiles > /dev/null 2>&1" + print "\tdo" + print "\t\t:" + print "\tdone" + print "\t$TORTURE_JITTER_STOP"; print "\techo ---- All kernel runs complete. `date` | tee -a " rd "log"; print "else" print "\twait" print "\techo ---- No kernel runs. `date` | tee -a " rd "log"; print "fi" for (j = 1; j < jn; j++) { - builddir=KVM "/b" j print "echo ----", cfr[j], cpusr[j] ovf ": Build/run results: | tee -a " rd "log"; print "cat " rd cfr[j] "/kvm-test-1-run.sh.out | tee -a " rd "log"; } @@ -548,6 +565,18 @@ echo 'ret=$?' >> $T/script echo "cat $T/kvm-recheck.sh.out | tee -a $resdir/$ds/log" >> $T/script echo 'exit $ret' >> $T/script +# Extract the tests and their batches from the script. +egrep 'Start batch|Starting build\.' $T/script | grep -v ">>" | + sed -e 's/:.*$//' -e 's/^echo //' -e 's/-ovf//' | + awk ' + /^----Start/ { + batchno = $3; + next; + } + { + print batchno, $1, $2 + }' > $T/batches + if test "$dryrun" = script then cat $T/script @@ -566,21 +595,14 @@ then exit 0 elif test "$dryrun" = batches then - # Extract the tests and their batches from the script. - egrep 'Start batch|Starting build\.' $T/script | grep -v ">>" | - sed -e 's/:.*$//' -e 's/^echo //' -e 's/-ovf//' | - awk ' - /^----Start/ { - batchno = $3; - next; - } - { - print batchno, $1, $2 - }' + cat $T/batches + exit 0 else - # Not a dryrun, so run the script. + # Not a dryrun. Record the batches and the number of CPUs, then run the script. bash $T/script ret=$? + cp $T/batches $resdir/$ds/batches + echo '#' cpus=$cpus >> $resdir/$ds/batches echo " --- Done at `date` (`get_starttime_duration $starttime`) exitcode $ret" | tee -a $resdir/$ds/log exit $ret fi diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index ad7525b7ac29..56e2e1a42569 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -374,7 +374,7 @@ done if test "$do_kvfree" = "yes" then torture_bootargs="rcuscale.kfree_rcu_test=1 rcuscale.kfree_nthreads=16 rcuscale.holdoff=20 rcuscale.kfree_loops=10000 torture.disable_onoff_at_boot" - torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 10 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --trust-make + torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 10 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 1G --trust-make fi echo " --- " $scriptname $args diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST index f2b20db9e296..98b6175e5aa0 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST +++ b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST @@ -7,8 +7,8 @@ TREE07 TREE09 SRCU-N SRCU-P -SRCU-t -SRCU-u +SRCU-T +SRCU-U TINY01 TINY02 TASKS01 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-T index d6557c38dfe4..d6557c38dfe4 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-T diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-T.boot index 238bfe3bd0cc..238bfe3bd0cc 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-T.boot diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-U index 6bc24e99862f..6bc24e99862f 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-U diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-U.boot index ce48c7b82673..ce48c7b82673 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-U.boot diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot index 1c218944b1e9..64f864f1f361 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot @@ -4,3 +4,4 @@ rcutree.gp_init_delay=3 rcutree.gp_cleanup_delay=3 rcutree.kthread_prio=2 threadirqs +tree.use_softirq=0 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot index 5adc6756792a..a8d94caf7d2f 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot @@ -1 +1 @@ -rcutree.rcu_fanout_leaf=4 nohz_full=1-7 +rcutree.rcu_fanout_leaf=4 nohz_full=1-N diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot index 22478fd3a865..94d38445d393 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot @@ -1,3 +1,3 @@ rcupdate.rcu_self_test=1 rcutree.rcu_fanout_exact=1 -rcu_nocbs=0-7 +rcu_nocbs=all diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh index 0333e9b18522..ffbe15109f0d 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh @@ -12,5 +12,5 @@ # Adds per-version torture-module parameters to kernels supporting them. per_version_boot_params () { echo $1 rcuscale.shutdown=1 \ - rcuscale.verbose=1 + rcuscale.verbose=0 } diff --git a/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh index 321e82641287..f81fa2c541a6 100644 --- a/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh +++ b/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh @@ -12,5 +12,5 @@ # Adds per-version torture-module parameters to kernels supporting them. per_version_boot_params () { echo $1 refscale.shutdown=1 \ - refscale.verbose=1 + refscale.verbose=0 } diff --git a/tools/testing/selftests/resctrl/.gitignore b/tools/testing/selftests/resctrl/.gitignore new file mode 100644 index 000000000000..ab68442b6bc8 --- /dev/null +++ b/tools/testing/selftests/resctrl/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +resctrl_tests diff --git a/tools/testing/selftests/resctrl/Makefile b/tools/testing/selftests/resctrl/Makefile index d585cc1948cc..6bcee2ec91a9 100644 --- a/tools/testing/selftests/resctrl/Makefile +++ b/tools/testing/selftests/resctrl/Makefile @@ -1,5 +1,5 @@ CC = $(CROSS_COMPILE)gcc -CFLAGS = -g -Wall +CFLAGS = -g -Wall -O2 -D_FORTIFY_SOURCE=2 SRCS=$(wildcard *.c) OBJS=$(SRCS:.c=.o) diff --git a/tools/testing/selftests/resctrl/README b/tools/testing/selftests/resctrl/README index 6e5a0ffa18e8..4b36b25b6ac0 100644 --- a/tools/testing/selftests/resctrl/README +++ b/tools/testing/selftests/resctrl/README @@ -46,8 +46,8 @@ ARGUMENTS Parameter '-h' shows usage information. usage: resctrl_tests [-h] [-b "benchmark_cmd [options]"] [-t test list] [-n no_of_bits] - -b benchmark_cmd [options]: run specified benchmark for MBM, MBA and CQM default benchmark is builtin fill_buf - -t test list: run tests specified in the test list, e.g. -t mbm, mba, cqm, cat + -b benchmark_cmd [options]: run specified benchmark for MBM, MBA and CMT default benchmark is builtin fill_buf + -t test list: run tests specified in the test list, e.g. -t mbm, mba, cmt, cat -n no_of_bits: run cache tests using specified no of bits in cache bit mask -p cpu_no: specify CPU number to run the test. 1 is default -h: help diff --git a/tools/testing/selftests/resctrl/cache.c b/tools/testing/selftests/resctrl/cache.c index 38dbf4962e33..68ff856d36f0 100644 --- a/tools/testing/selftests/resctrl/cache.c +++ b/tools/testing/selftests/resctrl/cache.c @@ -111,7 +111,7 @@ static int get_llc_perf(unsigned long *llc_perf_miss) /* * Get LLC Occupancy as reported by RESCTRL FS - * For CQM, + * For CMT, * 1. If con_mon grp and mon grp given, then read from mon grp in * con_mon grp * 2. If only con_mon grp given, then read from con_mon grp @@ -182,7 +182,7 @@ int measure_cache_vals(struct resctrl_val_param *param, int bm_pid) /* * Measure cache miss from perf. */ - if (!strcmp(param->resctrl_val, "cat")) { + if (!strncmp(param->resctrl_val, CAT_STR, sizeof(CAT_STR))) { ret = get_llc_perf(&llc_perf_miss); if (ret < 0) return ret; @@ -192,7 +192,7 @@ int measure_cache_vals(struct resctrl_val_param *param, int bm_pid) /* * Measure llc occupancy from resctrl. */ - if (!strcmp(param->resctrl_val, "cqm")) { + if (!strncmp(param->resctrl_val, CMT_STR, sizeof(CMT_STR))) { ret = get_llc_occu_resctrl(&llc_occu_resc); if (ret < 0) return ret; @@ -234,7 +234,7 @@ int cat_val(struct resctrl_val_param *param) if (ret) return ret; - if ((strcmp(resctrl_val, "cat") == 0)) { + if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR))) { ret = initialize_llc_perf(); if (ret) return ret; @@ -242,7 +242,7 @@ int cat_val(struct resctrl_val_param *param) /* Test runs until the callback setup() tells the test to stop. */ while (1) { - if (strcmp(resctrl_val, "cat") == 0) { + if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR))) { ret = param->setup(1, param); if (ret) { ret = 0; @@ -270,3 +270,45 @@ int cat_val(struct resctrl_val_param *param) return ret; } + +/* + * show_cache_info: show cache test result information + * @sum_llc_val: sum of LLC cache result data + * @no_of_bits: number of bits + * @cache_span: cache span in bytes for CMT or in lines for CAT + * @max_diff: max difference + * @max_diff_percent: max difference percentage + * @num_of_runs: number of runs + * @platform: show test information on this platform + * @cmt: CMT test or CAT test + * + * Return: 0 on success. non-zero on failure. + */ +int show_cache_info(unsigned long sum_llc_val, int no_of_bits, + unsigned long cache_span, unsigned long max_diff, + unsigned long max_diff_percent, unsigned long num_of_runs, + bool platform, bool cmt) +{ + unsigned long avg_llc_val = 0; + float diff_percent; + long avg_diff = 0; + int ret; + + avg_llc_val = sum_llc_val / (num_of_runs - 1); + avg_diff = (long)abs(cache_span - avg_llc_val); + diff_percent = ((float)cache_span - avg_llc_val) / cache_span * 100; + + ret = platform && abs((int)diff_percent) > max_diff_percent && + (cmt ? (abs(avg_diff) > max_diff) : true); + + ksft_print_msg("%s Check cache miss rate within %d%%\n", + ret ? "Fail:" : "Pass:", max_diff_percent); + + ksft_print_msg("Percent diff=%d\n", abs((int)diff_percent)); + ksft_print_msg("Number of bits: %d\n", no_of_bits); + ksft_print_msg("Average LLC val: %lu\n", avg_llc_val); + ksft_print_msg("Cache span (%s): %lu\n", cmt ? "bytes" : "lines", + cache_span); + + return ret; +} diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c index 5da43767b973..cd4f68388e0f 100644 --- a/tools/testing/selftests/resctrl/cat_test.c +++ b/tools/testing/selftests/resctrl/cat_test.c @@ -17,10 +17,10 @@ #define MAX_DIFF_PERCENT 4 #define MAX_DIFF 1000000 -int count_of_bits; -char cbm_mask[256]; -unsigned long long_mask; -unsigned long cache_size; +static int count_of_bits; +static char cbm_mask[256]; +static unsigned long long_mask; +static unsigned long cache_size; /* * Change schemata. Write schemata to specified @@ -52,27 +52,6 @@ static int cat_setup(int num, ...) return ret; } -static void show_cache_info(unsigned long sum_llc_perf_miss, int no_of_bits, - unsigned long span) -{ - unsigned long allocated_cache_lines = span / 64; - unsigned long avg_llc_perf_miss = 0; - float diff_percent; - - avg_llc_perf_miss = sum_llc_perf_miss / (NUM_OF_RUNS - 1); - diff_percent = ((float)allocated_cache_lines - avg_llc_perf_miss) / - allocated_cache_lines * 100; - - printf("%sok CAT: cache miss rate within %d%%\n", - !is_amd && abs((int)diff_percent) > MAX_DIFF_PERCENT ? - "not " : "", MAX_DIFF_PERCENT); - tests_run++; - printf("# Percent diff=%d\n", abs((int)diff_percent)); - printf("# Number of bits: %d\n", no_of_bits); - printf("# Avg_llc_perf_miss: %lu\n", avg_llc_perf_miss); - printf("# Allocated cache lines: %lu\n", allocated_cache_lines); -} - static int check_results(struct resctrl_val_param *param) { char *token_array[8], temp[512]; @@ -80,7 +59,7 @@ static int check_results(struct resctrl_val_param *param) int runs = 0, no_of_bits = 0; FILE *fp; - printf("# Checking for pass/fail\n"); + ksft_print_msg("Checking for pass/fail\n"); fp = fopen(param->filename, "r"); if (!fp) { perror("# Cannot open file"); @@ -108,9 +87,9 @@ static int check_results(struct resctrl_val_param *param) fclose(fp); no_of_bits = count_bits(param->mask); - show_cache_info(sum_llc_perf_miss, no_of_bits, param->span); - - return 0; + return show_cache_info(sum_llc_perf_miss, no_of_bits, param->span / 64, + MAX_DIFF, MAX_DIFF_PERCENT, NUM_OF_RUNS, + !is_amd, false); } void cat_test_cleanup(void) @@ -132,11 +111,8 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type) if (ret) return ret; - if (!validate_resctrl_feature_request("cat")) - return -1; - /* Get default cbm mask for L3/L2 cache */ - ret = get_cbm_mask(cache_type); + ret = get_cbm_mask(cache_type, cbm_mask); if (ret) return ret; @@ -146,15 +122,18 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type) ret = get_cache_size(cpu_no, cache_type, &cache_size); if (ret) return ret; - printf("cache size :%lu\n", cache_size); + ksft_print_msg("Cache size :%lu\n", cache_size); /* Get max number of bits from default-cabm mask */ count_of_bits = count_bits(long_mask); - if (n < 1 || n > count_of_bits - 1) { - printf("Invalid input value for no_of_bits n!\n"); - printf("Please Enter value in range 1 to %d\n", - count_of_bits - 1); + if (!n) + n = count_of_bits / 2; + + if (n > count_of_bits - 1) { + ksft_print_msg("Invalid input value for no_of_bits n!\n"); + ksft_print_msg("Please enter value in range 1 to %d\n", + count_of_bits - 1); return -1; } @@ -164,7 +143,7 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type) return -1; struct resctrl_val_param param = { - .resctrl_val = "cat", + .resctrl_val = CAT_STR, .cpu_no = cpu_no, .mum_resctrlfs = 0, .setup = cat_setup, diff --git a/tools/testing/selftests/resctrl/cqm_test.c b/tools/testing/selftests/resctrl/cmt_test.c index c8756152bd61..8968e36db99d 100644 --- a/tools/testing/selftests/resctrl/cqm_test.c +++ b/tools/testing/selftests/resctrl/cmt_test.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Cache Monitoring Technology (CQM) test + * Cache Monitoring Technology (CMT) test * * Copyright (C) 2018 Intel Corporation * @@ -11,17 +11,17 @@ #include "resctrl.h" #include <unistd.h> -#define RESULT_FILE_NAME "result_cqm" +#define RESULT_FILE_NAME "result_cmt" #define NUM_OF_RUNS 5 #define MAX_DIFF 2000000 #define MAX_DIFF_PERCENT 15 -int count_of_bits; -char cbm_mask[256]; -unsigned long long_mask; -unsigned long cache_size; +static int count_of_bits; +static char cbm_mask[256]; +static unsigned long long_mask; +static unsigned long cache_size; -static int cqm_setup(int num, ...) +static int cmt_setup(int num, ...) { struct resctrl_val_param *p; va_list param; @@ -39,38 +39,6 @@ static int cqm_setup(int num, ...) return 0; } -static void show_cache_info(unsigned long sum_llc_occu_resc, int no_of_bits, - unsigned long span) -{ - unsigned long avg_llc_occu_resc = 0; - float diff_percent; - long avg_diff = 0; - bool res; - - avg_llc_occu_resc = sum_llc_occu_resc / (NUM_OF_RUNS - 1); - avg_diff = (long)abs(span - avg_llc_occu_resc); - - diff_percent = (((float)span - avg_llc_occu_resc) / span) * 100; - - if ((abs((int)diff_percent) <= MAX_DIFF_PERCENT) || - (abs(avg_diff) <= MAX_DIFF)) - res = true; - else - res = false; - - printf("%sok CQM: diff within %d, %d\%%\n", res ? "" : "not", - MAX_DIFF, (int)MAX_DIFF_PERCENT); - - printf("# diff: %ld\n", avg_diff); - printf("# percent diff=%d\n", abs((int)diff_percent)); - printf("# Results are displayed in (Bytes)\n"); - printf("# Number of bits: %d\n", no_of_bits); - printf("# Avg_llc_occu_resc: %lu\n", avg_llc_occu_resc); - printf("# llc_occu_exp (span): %lu\n", span); - - tests_run++; -} - static int check_results(struct resctrl_val_param *param, int no_of_bits) { char *token_array[8], temp[512]; @@ -78,7 +46,7 @@ static int check_results(struct resctrl_val_param *param, int no_of_bits) int runs = 0; FILE *fp; - printf("# checking for pass/fail\n"); + ksft_print_msg("Checking for pass/fail\n"); fp = fopen(param->filename, "r"); if (!fp) { perror("# Error in opening file\n"); @@ -86,7 +54,7 @@ static int check_results(struct resctrl_val_param *param, int no_of_bits) return errno; } - while (fgets(temp, 1024, fp)) { + while (fgets(temp, sizeof(temp), fp)) { char *token = strtok(temp, ":\t"); int fields = 0; @@ -101,17 +69,18 @@ static int check_results(struct resctrl_val_param *param, int no_of_bits) runs++; } fclose(fp); - show_cache_info(sum_llc_occu_resc, no_of_bits, param->span); - return 0; + return show_cache_info(sum_llc_occu_resc, no_of_bits, param->span, + MAX_DIFF, MAX_DIFF_PERCENT, NUM_OF_RUNS, + true, true); } -void cqm_test_cleanup(void) +void cmt_test_cleanup(void) { remove(RESULT_FILE_NAME); } -int cqm_resctrl_val(int cpu_no, int n, char **benchmark_cmd) +int cmt_resctrl_val(int cpu_no, int n, char **benchmark_cmd) { int ret, mum_resctrlfs; @@ -122,10 +91,10 @@ int cqm_resctrl_val(int cpu_no, int n, char **benchmark_cmd) if (ret) return ret; - if (!validate_resctrl_feature_request("cqm")) + if (!validate_resctrl_feature_request(CMT_STR)) return -1; - ret = get_cbm_mask("L3"); + ret = get_cbm_mask("L3", cbm_mask); if (ret) return ret; @@ -134,18 +103,18 @@ int cqm_resctrl_val(int cpu_no, int n, char **benchmark_cmd) ret = get_cache_size(cpu_no, "L3", &cache_size); if (ret) return ret; - printf("cache size :%lu\n", cache_size); + ksft_print_msg("Cache size :%lu\n", cache_size); count_of_bits = count_bits(long_mask); if (n < 1 || n > count_of_bits) { - printf("Invalid input value for numbr_of_bits n!\n"); - printf("Please Enter value in range 1 to %d\n", count_of_bits); + ksft_print_msg("Invalid input value for numbr_of_bits n!\n"); + ksft_print_msg("Please enter value in range 1 to %d\n", count_of_bits); return -1; } struct resctrl_val_param param = { - .resctrl_val = "cqm", + .resctrl_val = CMT_STR, .ctrlgrp = "c1", .mongrp = "m1", .cpu_no = cpu_no, @@ -154,7 +123,7 @@ int cqm_resctrl_val(int cpu_no, int n, char **benchmark_cmd) .mask = ~(long_mask << n) & long_mask, .span = cache_size * n / count_of_bits, .num_of_runs = 0, - .setup = cqm_setup, + .setup = cmt_setup, }; if (strcmp(benchmark_cmd[0], "fill_buf") == 0) @@ -170,7 +139,7 @@ int cqm_resctrl_val(int cpu_no, int n, char **benchmark_cmd) if (ret) return ret; - cqm_test_cleanup(); + cmt_test_cleanup(); return 0; } diff --git a/tools/testing/selftests/resctrl/config b/tools/testing/selftests/resctrl/config new file mode 100644 index 000000000000..8d9f2deb56ed --- /dev/null +++ b/tools/testing/selftests/resctrl/config @@ -0,0 +1,2 @@ +CONFIG_X86_CPU_RESCTRL=y +CONFIG_PROC_CPU_RESCTRL=y diff --git a/tools/testing/selftests/resctrl/fill_buf.c b/tools/testing/selftests/resctrl/fill_buf.c index 79c611c99a3d..51e5cf22632f 100644 --- a/tools/testing/selftests/resctrl/fill_buf.c +++ b/tools/testing/selftests/resctrl/fill_buf.c @@ -115,7 +115,7 @@ static int fill_cache_read(unsigned char *start_ptr, unsigned char *end_ptr, while (1) { ret = fill_one_span_read(start_ptr, end_ptr); - if (!strcmp(resctrl_val, "cat")) + if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR))) break; } @@ -134,7 +134,7 @@ static int fill_cache_write(unsigned char *start_ptr, unsigned char *end_ptr, { while (1) { fill_one_span_write(start_ptr, end_ptr); - if (!strcmp(resctrl_val, "cat")) + if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR))) break; } diff --git a/tools/testing/selftests/resctrl/mba_test.c b/tools/testing/selftests/resctrl/mba_test.c index 7bf8eaa6204b..1a1bdb6180cf 100644 --- a/tools/testing/selftests/resctrl/mba_test.c +++ b/tools/testing/selftests/resctrl/mba_test.c @@ -12,7 +12,7 @@ #define RESULT_FILE_NAME "result_mba" #define NUM_OF_RUNS 5 -#define MAX_DIFF 300 +#define MAX_DIFF_PERCENT 5 #define ALLOCATION_MAX 100 #define ALLOCATION_MIN 10 #define ALLOCATION_STEP 10 @@ -56,13 +56,14 @@ static void show_mba_info(unsigned long *bw_imc, unsigned long *bw_resc) int allocation, runs; bool failed = false; - printf("# Results are displayed in (MB)\n"); + ksft_print_msg("Results are displayed in (MB)\n"); /* Memory bandwidth from 100% down to 10% */ for (allocation = 0; allocation < ALLOCATION_MAX / ALLOCATION_STEP; allocation++) { unsigned long avg_bw_imc, avg_bw_resc; unsigned long sum_bw_imc = 0, sum_bw_resc = 0; - unsigned long avg_diff; + int avg_diff_per; + float avg_diff; /* * The first run is discarded due to inaccurate value from @@ -76,23 +77,26 @@ static void show_mba_info(unsigned long *bw_imc, unsigned long *bw_resc) avg_bw_imc = sum_bw_imc / (NUM_OF_RUNS - 1); avg_bw_resc = sum_bw_resc / (NUM_OF_RUNS - 1); - avg_diff = labs((long)(avg_bw_resc - avg_bw_imc)); - - printf("%sok MBA schemata percentage %u smaller than %d %%\n", - avg_diff > MAX_DIFF ? "not " : "", - ALLOCATION_MAX - ALLOCATION_STEP * allocation, - MAX_DIFF); - tests_run++; - printf("# avg_diff: %lu\n", avg_diff); - printf("# avg_bw_imc: %lu\n", avg_bw_imc); - printf("# avg_bw_resc: %lu\n", avg_bw_resc); - if (avg_diff > MAX_DIFF) + avg_diff = (float)labs(avg_bw_resc - avg_bw_imc) / avg_bw_imc; + avg_diff_per = (int)(avg_diff * 100); + + ksft_print_msg("%s Check MBA diff within %d%% for schemata %u\n", + avg_diff_per > MAX_DIFF_PERCENT ? + "Fail:" : "Pass:", + MAX_DIFF_PERCENT, + ALLOCATION_MAX - ALLOCATION_STEP * allocation); + + ksft_print_msg("avg_diff_per: %d%%\n", avg_diff_per); + ksft_print_msg("avg_bw_imc: %lu\n", avg_bw_imc); + ksft_print_msg("avg_bw_resc: %lu\n", avg_bw_resc); + if (avg_diff_per > MAX_DIFF_PERCENT) failed = true; } - printf("%sok schemata change using MBA%s\n", failed ? "not " : "", - failed ? " # at least one test failed" : ""); - tests_run++; + ksft_print_msg("%s Check schemata change using MBA\n", + failed ? "Fail:" : "Pass:"); + if (failed) + ksft_print_msg("At least one test failed\n"); } static int check_results(void) @@ -141,7 +145,7 @@ void mba_test_cleanup(void) int mba_schemata_change(int cpu_no, char *bw_report, char **benchmark_cmd) { struct resctrl_val_param param = { - .resctrl_val = "mba", + .resctrl_val = MBA_STR, .ctrlgrp = "c1", .mongrp = "m1", .cpu_no = cpu_no, @@ -154,9 +158,6 @@ int mba_schemata_change(int cpu_no, char *bw_report, char **benchmark_cmd) remove(RESULT_FILE_NAME); - if (!validate_resctrl_feature_request("mba")) - return -1; - ret = resctrl_val(benchmark_cmd, ¶m); if (ret) return ret; diff --git a/tools/testing/selftests/resctrl/mbm_test.c b/tools/testing/selftests/resctrl/mbm_test.c index 4700f7453f81..8392e5c55ed0 100644 --- a/tools/testing/selftests/resctrl/mbm_test.c +++ b/tools/testing/selftests/resctrl/mbm_test.c @@ -11,16 +11,16 @@ #include "resctrl.h" #define RESULT_FILE_NAME "result_mbm" -#define MAX_DIFF 300 +#define MAX_DIFF_PERCENT 5 #define NUM_OF_RUNS 5 -static void +static int show_bw_info(unsigned long *bw_imc, unsigned long *bw_resc, int span) { unsigned long avg_bw_imc = 0, avg_bw_resc = 0; unsigned long sum_bw_imc = 0, sum_bw_resc = 0; - long avg_diff = 0; - int runs; + int runs, ret, avg_diff_per; + float avg_diff = 0; /* * Discard the first value which is inaccurate due to monitoring setup @@ -33,15 +33,18 @@ show_bw_info(unsigned long *bw_imc, unsigned long *bw_resc, int span) avg_bw_imc = sum_bw_imc / 4; avg_bw_resc = sum_bw_resc / 4; - avg_diff = avg_bw_resc - avg_bw_imc; - - printf("%sok MBM: diff within %d%%\n", - labs(avg_diff) > MAX_DIFF ? "not " : "", MAX_DIFF); - tests_run++; - printf("# avg_diff: %lu\n", labs(avg_diff)); - printf("# Span (MB): %d\n", span); - printf("# avg_bw_imc: %lu\n", avg_bw_imc); - printf("# avg_bw_resc: %lu\n", avg_bw_resc); + avg_diff = (float)labs(avg_bw_resc - avg_bw_imc) / avg_bw_imc; + avg_diff_per = (int)(avg_diff * 100); + + ret = avg_diff_per > MAX_DIFF_PERCENT; + ksft_print_msg("%s Check MBM diff within %d%%\n", + ret ? "Fail:" : "Pass:", MAX_DIFF_PERCENT); + ksft_print_msg("avg_diff_per: %d%%\n", avg_diff_per); + ksft_print_msg("Span (MB): %d\n", span); + ksft_print_msg("avg_bw_imc: %lu\n", avg_bw_imc); + ksft_print_msg("avg_bw_resc: %lu\n", avg_bw_resc); + + return ret; } static int check_results(int span) @@ -49,10 +52,10 @@ static int check_results(int span) unsigned long bw_imc[NUM_OF_RUNS], bw_resc[NUM_OF_RUNS]; char temp[1024], *token_array[8]; char output[] = RESULT_FILE_NAME; - int runs; + int runs, ret; FILE *fp; - printf("# Checking for pass/fail\n"); + ksft_print_msg("Checking for pass/fail\n"); fp = fopen(output, "r"); if (!fp) { @@ -76,11 +79,11 @@ static int check_results(int span) runs++; } - show_bw_info(bw_imc, bw_resc, span); + ret = show_bw_info(bw_imc, bw_resc, span); fclose(fp); - return 0; + return ret; } static int mbm_setup(int num, ...) @@ -114,7 +117,7 @@ void mbm_test_cleanup(void) int mbm_bw_change(int span, int cpu_no, char *bw_report, char **benchmark_cmd) { struct resctrl_val_param param = { - .resctrl_val = "mbm", + .resctrl_val = MBM_STR, .ctrlgrp = "c1", .mongrp = "m1", .span = span, @@ -128,9 +131,6 @@ int mbm_bw_change(int span, int cpu_no, char *bw_report, char **benchmark_cmd) remove(RESULT_FILE_NAME); - if (!validate_resctrl_feature_request("mbm")) - return -1; - ret = resctrl_val(benchmark_cmd, ¶m); if (ret) return ret; diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h index 39bf59c6b9c5..1ad10c47e31d 100644 --- a/tools/testing/selftests/resctrl/resctrl.h +++ b/tools/testing/selftests/resctrl/resctrl.h @@ -23,11 +23,16 @@ #include <sys/eventfd.h> #include <asm/unistd.h> #include <linux/perf_event.h> +#include "../kselftest.h" #define MB (1024 * 1024) #define RESCTRL_PATH "/sys/fs/resctrl" #define PHYS_ID_PATH "/sys/devices/system/cpu/cpu" #define CBM_MASK_PATH "/sys/fs/resctrl/info" +#define L3_PATH "/sys/fs/resctrl/info/L3" +#define MB_PATH "/sys/fs/resctrl/info/MB" +#define L3_MON_PATH "/sys/fs/resctrl/info/L3_MON" +#define L3_MON_FEATURES_PATH "/sys/fs/resctrl/info/L3_MON/mon_features" #define PARENT_EXIT(err_msg) \ do { \ @@ -62,11 +67,15 @@ struct resctrl_val_param { int (*setup)(int num, ...); }; -pid_t bm_pid, ppid; -int tests_run; +#define MBM_STR "mbm" +#define MBA_STR "mba" +#define CMT_STR "cmt" +#define CAT_STR "cat" -char llc_occup_path[1024]; -bool is_amd; +extern pid_t bm_pid, ppid; + +extern char llc_occup_path[1024]; +extern bool is_amd; bool check_resctrlfs_support(void); int filter_dmesg(void); @@ -74,7 +83,7 @@ int remount_resctrlfs(bool mum_resctrlfs); int get_resource_id(int cpu_no, int *resource_id); int umount_resctrlfs(void); int validate_bw_report_request(char *bw_report); -bool validate_resctrl_feature_request(char *resctrl_val); +bool validate_resctrl_feature_request(const char *resctrl_val); char *fgrep(FILE *inf, const char *str); int taskset_benchmark(pid_t bm_pid, int cpu_no); void run_benchmark(int signum, siginfo_t *info, void *ucontext); @@ -92,16 +101,20 @@ void tests_cleanup(void); void mbm_test_cleanup(void); int mba_schemata_change(int cpu_no, char *bw_report, char **benchmark_cmd); void mba_test_cleanup(void); -int get_cbm_mask(char *cache_type); +int get_cbm_mask(char *cache_type, char *cbm_mask); int get_cache_size(int cpu_no, char *cache_type, unsigned long *cache_size); void ctrlc_handler(int signum, siginfo_t *info, void *ptr); int cat_val(struct resctrl_val_param *param); void cat_test_cleanup(void); int cat_perf_miss_val(int cpu_no, int no_of_bits, char *cache_type); -int cqm_resctrl_val(int cpu_no, int n, char **benchmark_cmd); +int cmt_resctrl_val(int cpu_no, int n, char **benchmark_cmd); unsigned int count_bits(unsigned long n); -void cqm_test_cleanup(void); +void cmt_test_cleanup(void); int get_core_sibling(int cpu_no); int measure_cache_vals(struct resctrl_val_param *param, int bm_pid); +int show_cache_info(unsigned long sum_llc_val, int no_of_bits, + unsigned long cache_span, unsigned long max_diff, + unsigned long max_diff_percent, unsigned long num_of_runs, + bool platform, bool cmt); #endif /* RESCTRL_H */ diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c index 425cc85ac883..f51b5fc066a3 100644 --- a/tools/testing/selftests/resctrl/resctrl_tests.c +++ b/tools/testing/selftests/resctrl/resctrl_tests.c @@ -37,10 +37,10 @@ void detect_amd(void) static void cmd_help(void) { printf("usage: resctrl_tests [-h] [-b \"benchmark_cmd [options]\"] [-t test list] [-n no_of_bits]\n"); - printf("\t-b benchmark_cmd [options]: run specified benchmark for MBM, MBA and CQM"); - printf("\t default benchmark is builtin fill_buf\n"); + printf("\t-b benchmark_cmd [options]: run specified benchmark for MBM, MBA and CMT\n"); + printf("\t default benchmark is builtin fill_buf\n"); printf("\t-t test list: run tests specified in the test list, "); - printf("e.g. -t mbm, mba, cqm, cat\n"); + printf("e.g. -t mbm, mba, cmt, cat\n"); printf("\t-n no_of_bits: run cache tests using specified no of bits in cache bit mask\n"); printf("\t-p cpu_no: specify CPU number to run the test. 1 is default\n"); printf("\t-h: help\n"); @@ -50,17 +50,88 @@ void tests_cleanup(void) { mbm_test_cleanup(); mba_test_cleanup(); - cqm_test_cleanup(); + cmt_test_cleanup(); + cat_test_cleanup(); +} + +static void run_mbm_test(bool has_ben, char **benchmark_cmd, int span, + int cpu_no, char *bw_report) +{ + int res; + + ksft_print_msg("Starting MBM BW change ...\n"); + + if (!validate_resctrl_feature_request(MBM_STR)) { + ksft_test_result_skip("Hardware does not support MBM or MBM is disabled\n"); + return; + } + + if (!has_ben) + sprintf(benchmark_cmd[5], "%s", MBA_STR); + res = mbm_bw_change(span, cpu_no, bw_report, benchmark_cmd); + ksft_test_result(!res, "MBM: bw change\n"); + mbm_test_cleanup(); +} + +static void run_mba_test(bool has_ben, char **benchmark_cmd, int span, + int cpu_no, char *bw_report) +{ + int res; + + ksft_print_msg("Starting MBA Schemata change ...\n"); + + if (!validate_resctrl_feature_request(MBA_STR)) { + ksft_test_result_skip("Hardware does not support MBA or MBA is disabled\n"); + return; + } + + if (!has_ben) + sprintf(benchmark_cmd[1], "%d", span); + res = mba_schemata_change(cpu_no, bw_report, benchmark_cmd); + ksft_test_result(!res, "MBA: schemata change\n"); + mba_test_cleanup(); +} + +static void run_cmt_test(bool has_ben, char **benchmark_cmd, int cpu_no) +{ + int res; + + ksft_print_msg("Starting CMT test ...\n"); + if (!validate_resctrl_feature_request(CMT_STR)) { + ksft_test_result_skip("Hardware does not support CMT or CMT is disabled\n"); + return; + } + + if (!has_ben) + sprintf(benchmark_cmd[5], "%s", CMT_STR); + res = cmt_resctrl_val(cpu_no, 5, benchmark_cmd); + ksft_test_result(!res, "CMT: test\n"); + cmt_test_cleanup(); +} + +static void run_cat_test(int cpu_no, int no_of_bits) +{ + int res; + + ksft_print_msg("Starting CAT test ...\n"); + + if (!validate_resctrl_feature_request(CAT_STR)) { + ksft_test_result_skip("Hardware does not support CAT or CAT is disabled\n"); + return; + } + + res = cat_perf_miss_val(cpu_no, no_of_bits, "L3"); + ksft_test_result(!res, "CAT: test\n"); cat_test_cleanup(); } int main(int argc, char **argv) { - bool has_ben = false, mbm_test = true, mba_test = true, cqm_test = true; - int res, c, cpu_no = 1, span = 250, argc_new = argc, i, no_of_bits = 5; + bool has_ben = false, mbm_test = true, mba_test = true, cmt_test = true; + int c, cpu_no = 1, span = 250, argc_new = argc, i, no_of_bits = 0; char *benchmark_cmd[BENCHMARK_ARGS], bw_report[64], bm_type[64]; char benchmark_cmd_area[BENCHMARK_ARGS][BENCHMARK_ARG_SIZE]; - int ben_ind, ben_count; + int ben_ind, ben_count, tests = 0; bool cat_test = true; for (i = 0; i < argc; i++) { @@ -73,7 +144,7 @@ int main(int argc, char **argv) } } - while ((c = getopt(argc_new, argv, "ht:b:")) != -1) { + while ((c = getopt(argc_new, argv, "ht:b:n:p:")) != -1) { char *token; switch (c) { @@ -82,17 +153,21 @@ int main(int argc, char **argv) mbm_test = false; mba_test = false; - cqm_test = false; + cmt_test = false; cat_test = false; while (token) { - if (!strcmp(token, "mbm")) { + if (!strncmp(token, MBM_STR, sizeof(MBM_STR))) { mbm_test = true; - } else if (!strcmp(token, "mba")) { + tests++; + } else if (!strncmp(token, MBA_STR, sizeof(MBA_STR))) { mba_test = true; - } else if (!strcmp(token, "cqm")) { - cqm_test = true; - } else if (!strcmp(token, "cat")) { + tests++; + } else if (!strncmp(token, CMT_STR, sizeof(CMT_STR))) { + cmt_test = true; + tests++; + } else if (!strncmp(token, CAT_STR, sizeof(CAT_STR))) { cat_test = true; + tests++; } else { printf("invalid argument\n"); @@ -106,6 +181,10 @@ int main(int argc, char **argv) break; case 'n': no_of_bits = atoi(optarg); + if (no_of_bits <= 0) { + printf("Bail out! invalid argument for no_of_bits\n"); + return -1; + } break; case 'h': cmd_help(); @@ -118,7 +197,7 @@ int main(int argc, char **argv) } } - printf("TAP version 13\n"); + ksft_print_header(); /* * Typically we need root privileges, because: @@ -126,7 +205,7 @@ int main(int argc, char **argv) * 2. We execute perf commands */ if (geteuid() != 0) - printf("# WARNING: not running as root, tests may fail.\n"); + return ksft_exit_fail_msg("Not running as root, abort testing.\n"); /* Detect AMD vendor */ detect_amd(); @@ -155,48 +234,26 @@ int main(int argc, char **argv) sprintf(bw_report, "reads"); sprintf(bm_type, "fill_buf"); - check_resctrlfs_support(); + if (!check_resctrlfs_support()) + return ksft_exit_fail_msg("resctrl FS does not exist\n"); + filter_dmesg(); - if (!is_amd && mbm_test) { - printf("# Starting MBM BW change ...\n"); - if (!has_ben) - sprintf(benchmark_cmd[5], "%s", "mba"); - res = mbm_bw_change(span, cpu_no, bw_report, benchmark_cmd); - printf("%sok MBM: bw change\n", res ? "not " : ""); - mbm_test_cleanup(); - tests_run++; - } + ksft_set_plan(tests ? : 4); - if (!is_amd && mba_test) { - printf("# Starting MBA Schemata change ...\n"); - if (!has_ben) - sprintf(benchmark_cmd[1], "%d", span); - res = mba_schemata_change(cpu_no, bw_report, benchmark_cmd); - printf("%sok MBA: schemata change\n", res ? "not " : ""); - mba_test_cleanup(); - tests_run++; - } + if (!is_amd && mbm_test) + run_mbm_test(has_ben, benchmark_cmd, span, cpu_no, bw_report); - if (cqm_test) { - printf("# Starting CQM test ...\n"); - if (!has_ben) - sprintf(benchmark_cmd[5], "%s", "cqm"); - res = cqm_resctrl_val(cpu_no, no_of_bits, benchmark_cmd); - printf("%sok CQM: test\n", res ? "not " : ""); - cqm_test_cleanup(); - tests_run++; - } + if (!is_amd && mba_test) + run_mba_test(has_ben, benchmark_cmd, span, cpu_no, bw_report); - if (cat_test) { - printf("# Starting CAT test ...\n"); - res = cat_perf_miss_val(cpu_no, no_of_bits, "L3"); - printf("%sok CAT: test\n", res ? "not " : ""); - tests_run++; - cat_test_cleanup(); - } + if (cmt_test) + run_cmt_test(has_ben, benchmark_cmd, cpu_no); + + if (cat_test) + run_cat_test(cpu_no, no_of_bits); - printf("1..%d\n", tests_run); + umount_resctrlfs(); - return 0; + return ksft_exit_pass(); } diff --git a/tools/testing/selftests/resctrl/resctrl_val.c b/tools/testing/selftests/resctrl/resctrl_val.c index 520fea3606d1..95224345c78e 100644 --- a/tools/testing/selftests/resctrl/resctrl_val.c +++ b/tools/testing/selftests/resctrl/resctrl_val.c @@ -221,8 +221,8 @@ static int read_from_imc_dir(char *imc_dir, int count) */ static int num_of_imcs(void) { + char imc_dir[512], *temp; unsigned int count = 0; - char imc_dir[512]; struct dirent *ep; int ret; DIR *dp; @@ -230,7 +230,25 @@ static int num_of_imcs(void) dp = opendir(DYN_PMU_PATH); if (dp) { while ((ep = readdir(dp))) { - if (strstr(ep->d_name, UNCORE_IMC)) { + temp = strstr(ep->d_name, UNCORE_IMC); + if (!temp) + continue; + + /* + * imc counters are named as "uncore_imc_<n>", hence + * increment the pointer to point to <n>. Note that + * sizeof(UNCORE_IMC) would count for null character as + * well and hence the last underscore character in + * uncore_imc'_' need not be counted. + */ + temp = temp + sizeof(UNCORE_IMC); + + /* + * Some directories under "DYN_PMU_PATH" could have + * names like "uncore_imc_free_running", hence, check if + * first character is a numerical digit or not. + */ + if (temp[0] >= '0' && temp[0] <= '9') { sprintf(imc_dir, "%s/%s/", DYN_PMU_PATH, ep->d_name); ret = read_from_imc_dir(imc_dir, count); @@ -282,9 +300,9 @@ static int initialize_mem_bw_imc(void) * Memory B/W utilized by a process on a socket can be calculated using * iMC counters. Perf events are used to read these counters. * - * Return: >= 0 on success. < 0 on failure. + * Return: = 0 on success. < 0 on failure. */ -static float get_mem_bw_imc(int cpu_no, char *bw_report) +static int get_mem_bw_imc(int cpu_no, char *bw_report, float *bw_imc) { float reads, writes, of_mul_read, of_mul_write; int imc, j, ret; @@ -355,13 +373,18 @@ static float get_mem_bw_imc(int cpu_no, char *bw_report) close(imc_counters_config[imc][WRITE].fd); } - if (strcmp(bw_report, "reads") == 0) - return reads; + if (strcmp(bw_report, "reads") == 0) { + *bw_imc = reads; + return 0; + } - if (strcmp(bw_report, "writes") == 0) - return writes; + if (strcmp(bw_report, "writes") == 0) { + *bw_imc = writes; + return 0; + } - return (reads + writes); + *bw_imc = reads + writes; + return 0; } void set_mbm_path(const char *ctrlgrp, const char *mongrp, int resource_id) @@ -397,10 +420,10 @@ static void initialize_mem_bw_resctrl(const char *ctrlgrp, const char *mongrp, return; } - if (strcmp(resctrl_val, "mbm") == 0) + if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR))) set_mbm_path(ctrlgrp, mongrp, resource_id); - if ((strcmp(resctrl_val, "mba") == 0)) { + if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) { if (ctrlgrp) sprintf(mbm_total_path, CON_MBM_LOCAL_BYTES_PATH, RESCTRL_PATH, ctrlgrp, resource_id); @@ -420,9 +443,8 @@ static void initialize_mem_bw_resctrl(const char *ctrlgrp, const char *mongrp, * 1. If con_mon grp is given, then read from it * 2. If con_mon grp is not given, then read from root con_mon grp */ -static unsigned long get_mem_bw_resctrl(void) +static int get_mem_bw_resctrl(unsigned long *mbm_total) { - unsigned long mbm_total = 0; FILE *fp; fp = fopen(mbm_total_path, "r"); @@ -431,7 +453,7 @@ static unsigned long get_mem_bw_resctrl(void) return -1; } - if (fscanf(fp, "%lu", &mbm_total) <= 0) { + if (fscanf(fp, "%lu", mbm_total) <= 0) { perror("Could not get mbm local bytes"); fclose(fp); @@ -439,7 +461,7 @@ static unsigned long get_mem_bw_resctrl(void) } fclose(fp); - return mbm_total; + return 0; } pid_t bm_pid, ppid; @@ -449,7 +471,7 @@ void ctrlc_handler(int signum, siginfo_t *info, void *ptr) kill(bm_pid, SIGKILL); umount_resctrlfs(); tests_cleanup(); - printf("Ending\n\n"); + ksft_print_msg("Ending\n\n"); exit(EXIT_SUCCESS); } @@ -492,7 +514,7 @@ static int print_results_bw(char *filename, int bm_pid, float bw_imc, return 0; } -static void set_cqm_path(const char *ctrlgrp, const char *mongrp, char sock_num) +static void set_cmt_path(const char *ctrlgrp, const char *mongrp, char sock_num) { if (strlen(ctrlgrp) && strlen(mongrp)) sprintf(llc_occup_path, CON_MON_LCC_OCCUP_PATH, RESCTRL_PATH, @@ -512,7 +534,7 @@ static void set_cqm_path(const char *ctrlgrp, const char *mongrp, char sock_num) * @ctrlgrp: Name of the control monitor group (con_mon grp) * @mongrp: Name of the monitor group (mon grp) * @cpu_no: CPU number that the benchmark PID is binded to - * @resctrl_val: Resctrl feature (Eg: cat, cqm.. etc) + * @resctrl_val: Resctrl feature (Eg: cat, cmt.. etc) */ static void initialize_llc_occu_resctrl(const char *ctrlgrp, const char *mongrp, int cpu_no, char *resctrl_val) @@ -524,14 +546,15 @@ static void initialize_llc_occu_resctrl(const char *ctrlgrp, const char *mongrp, return; } - if (strcmp(resctrl_val, "cqm") == 0) - set_cqm_path(ctrlgrp, mongrp, resource_id); + if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) + set_cmt_path(ctrlgrp, mongrp, resource_id); } static int measure_vals(struct resctrl_val_param *param, unsigned long *bw_resc_start) { - unsigned long bw_imc, bw_resc, bw_resc_end; + unsigned long bw_resc, bw_resc_end; + float bw_imc; int ret; /* @@ -541,13 +564,13 @@ measure_vals(struct resctrl_val_param *param, unsigned long *bw_resc_start) * Compare the two values to validate resctrl value. * It takes 1sec to measure the data. */ - bw_imc = get_mem_bw_imc(param->cpu_no, param->bw_report); - if (bw_imc <= 0) - return bw_imc; + ret = get_mem_bw_imc(param->cpu_no, param->bw_report, &bw_imc); + if (ret < 0) + return ret; - bw_resc_end = get_mem_bw_resctrl(); - if (bw_resc_end <= 0) - return bw_resc_end; + ret = get_mem_bw_resctrl(&bw_resc_end); + if (ret < 0) + return ret; bw_resc = (bw_resc_end - *bw_resc_start) / MB; ret = print_results_bw(param->filename, bm_pid, bw_imc, bw_resc); @@ -579,8 +602,8 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param) if (strcmp(param->filename, "") == 0) sprintf(param->filename, "stdio"); - if ((strcmp(resctrl_val, "mba")) == 0 || - (strcmp(resctrl_val, "mbm")) == 0) { + if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR)) || + !strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR))) { ret = validate_bw_report_request(param->bw_report); if (ret) return ret; @@ -645,7 +668,7 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param) PARENT_EXIT("Child is done"); } - printf("# benchmark PID: %d\n", bm_pid); + ksft_print_msg("Benchmark PID: %d\n", bm_pid); /* * Register CTRL-C handler for parent, as it has to kill benchmark @@ -674,15 +697,15 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param) if (ret) goto out; - if ((strcmp(resctrl_val, "mbm") == 0) || - (strcmp(resctrl_val, "mba") == 0)) { + if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) || + !strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) { ret = initialize_mem_bw_imc(); if (ret) goto out; initialize_mem_bw_resctrl(param->ctrlgrp, param->mongrp, param->cpu_no, resctrl_val); - } else if (strcmp(resctrl_val, "cqm") == 0) + } else if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) initialize_llc_occu_resctrl(param->ctrlgrp, param->mongrp, param->cpu_no, resctrl_val); @@ -710,8 +733,8 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param) /* Test runs until the callback setup() tells the test to stop. */ while (1) { - if ((strcmp(resctrl_val, "mbm") == 0) || - (strcmp(resctrl_val, "mba") == 0)) { + if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) || + !strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) { ret = param->setup(1, param); if (ret) { ret = 0; @@ -721,7 +744,7 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param) ret = measure_vals(param, &bw_resc_start); if (ret) break; - } else if (strcmp(resctrl_val, "cqm") == 0) { + } else if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) { ret = param->setup(1, param); if (ret) { ret = 0; diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c index 19c0ec4045a4..5f5a166ade60 100644 --- a/tools/testing/selftests/resctrl/resctrlfs.c +++ b/tools/testing/selftests/resctrl/resctrlfs.c @@ -10,8 +10,6 @@ */ #include "resctrl.h" -int tests_run; - static int find_resctrl_mount(char *buffer) { FILE *mounts; @@ -49,8 +47,6 @@ static int find_resctrl_mount(char *buffer) return -ENOENT; } -char cbm_mask[256]; - /* * remount_resctrlfs - Remount resctrl FS at /sys/fs/resctrl * @mum_resctrlfs: Should the resctrl FS be remounted? @@ -70,28 +66,25 @@ int remount_resctrlfs(bool mum_resctrlfs) if (ret) strcpy(mountpoint, RESCTRL_PATH); - if (!ret && mum_resctrlfs && umount(mountpoint)) { - printf("not ok unmounting \"%s\"\n", mountpoint); - perror("# umount"); - tests_run++; - } + if (!ret && mum_resctrlfs && umount(mountpoint)) + ksft_print_msg("Fail: unmounting \"%s\"\n", mountpoint); if (!ret && !mum_resctrlfs) return 0; + ksft_print_msg("Mounting resctrl to \"%s\"\n", RESCTRL_PATH); ret = mount("resctrl", RESCTRL_PATH, "resctrl", 0, NULL); - printf("%sok mounting resctrl to \"%s\"\n", ret ? "not " : "", - RESCTRL_PATH); if (ret) perror("# mount"); - tests_run++; - return ret; } int umount_resctrlfs(void) { + if (find_resctrl_mount(NULL)) + return 0; + if (umount(RESCTRL_PATH)) { perror("# Unable to umount resctrl"); @@ -205,16 +198,18 @@ int get_cache_size(int cpu_no, char *cache_type, unsigned long *cache_size) /* * get_cbm_mask - Get cbm mask for given cache * @cache_type: Cache level L2/L3 - * - * Mask is stored in cbm_mask which is global variable. + * @cbm_mask: cbm_mask returned as a string * * Return: = 0 on success, < 0 on failure. */ -int get_cbm_mask(char *cache_type) +int get_cbm_mask(char *cache_type, char *cbm_mask) { char cbm_mask_path[1024]; FILE *fp; + if (!cbm_mask) + return -1; + sprintf(cbm_mask_path, "%s/%s/cbm_mask", CBM_MASK_PATH, cache_type); fp = fopen(cbm_mask_path, "r"); @@ -268,7 +263,7 @@ int get_core_sibling(int cpu_no) while (token) { sibling_cpu_no = atoi(token); /* Skipping core 0 as we don't want to run test on core 0 */ - if (sibling_cpu_no != 0) + if (sibling_cpu_no != 0 && sibling_cpu_no != cpu_no) break; token = strtok(NULL, "-,"); } @@ -334,7 +329,7 @@ void run_benchmark(int signum, siginfo_t *info, void *ucontext) operation = atoi(benchmark_cmd[4]); sprintf(resctrl_val, "%s", benchmark_cmd[5]); - if (strcmp(resctrl_val, "cqm") != 0) + if (strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) buffer_span = span * MB; else buffer_span = span; @@ -458,9 +453,9 @@ int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp, if (ret) goto out; - /* Create mon grp and write pid into it for "mbm" and "cqm" test */ - if ((strcmp(resctrl_val, "cqm") == 0) || - (strcmp(resctrl_val, "mbm") == 0)) { + /* Create mon grp and write pid into it for "mbm" and "cmt" test */ + if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)) || + !strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR))) { if (strlen(mongrp)) { sprintf(monitorgroup_p, "%s/mon_groups", controlgroup); sprintf(monitorgroup, "%s/%s", monitorgroup_p, mongrp); @@ -477,13 +472,10 @@ int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp, } out: - printf("%sok writing benchmark parameters to resctrl FS\n", - ret ? "not " : ""); + ksft_print_msg("Writing benchmark parameters to resctrl FS\n"); if (ret) perror("# writing to resctrlfs"); - tests_run++; - return ret; } @@ -505,13 +497,13 @@ int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, char *resctrl_val) int resource_id, ret = 0; FILE *fp; - if ((strcmp(resctrl_val, "mba") != 0) && - (strcmp(resctrl_val, "cat") != 0) && - (strcmp(resctrl_val, "cqm") != 0)) + if (strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR)) && + strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR)) && + strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) return -ENOENT; if (!schemata) { - printf("# Skipping empty schemata update\n"); + ksft_print_msg("Skipping empty schemata update\n"); return -1; } @@ -528,9 +520,10 @@ int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, char *resctrl_val) else sprintf(controlgroup, "%s/schemata", RESCTRL_PATH); - if (!strcmp(resctrl_val, "cat") || !strcmp(resctrl_val, "cqm")) + if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR)) || + !strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) sprintf(schema, "%s%d%c%s", "L3:", resource_id, '=', schemata); - if (strcmp(resctrl_val, "mba") == 0) + if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) sprintf(schema, "%s%d%c%s", "MB:", resource_id, '=', schemata); fp = fopen(controlgroup, "w"); @@ -551,10 +544,9 @@ int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, char *resctrl_val) fclose(fp); out: - printf("%sok Write schema \"%s\" to resctrl FS%s%s\n", - ret ? "not " : "", schema, ret ? " # " : "", - ret ? reason : ""); - tests_run++; + ksft_print_msg("Write schema \"%s\" to resctrl FS%s%s\n", + schema, ret ? " # " : "", + ret ? reason : ""); return ret; } @@ -578,18 +570,20 @@ bool check_resctrlfs_support(void) fclose(inf); - printf("%sok kernel supports resctrl filesystem\n", ret ? "" : "not "); - tests_run++; + ksft_print_msg("%s Check kernel supports resctrl filesystem\n", + ret ? "Pass:" : "Fail:"); + + if (!ret) + return ret; dp = opendir(RESCTRL_PATH); - printf("%sok resctrl mountpoint \"%s\" exists\n", - dp ? "" : "not ", RESCTRL_PATH); + ksft_print_msg("%s Check resctrl mountpoint \"%s\" exists\n", + dp ? "Pass:" : "Fail:", RESCTRL_PATH); if (dp) closedir(dp); - tests_run++; - printf("# resctrl filesystem %s mounted\n", - find_resctrl_mount(NULL) ? "not" : "is"); + ksft_print_msg("resctrl filesystem %s mounted\n", + find_resctrl_mount(NULL) ? "not" : "is"); return ret; } @@ -615,26 +609,56 @@ char *fgrep(FILE *inf, const char *str) * validate_resctrl_feature_request - Check if requested feature is valid. * @resctrl_val: Requested feature * - * Return: 0 on success, non-zero on failure + * Return: True if the feature is supported, else false */ -bool validate_resctrl_feature_request(char *resctrl_val) +bool validate_resctrl_feature_request(const char *resctrl_val) { - FILE *inf = fopen("/proc/cpuinfo", "r"); + struct stat statbuf; bool found = false; char *res; + FILE *inf; - if (!inf) + if (!resctrl_val) return false; - res = fgrep(inf, "flags"); - - if (res) { - char *s = strchr(res, ':'); + if (remount_resctrlfs(false)) + return false; - found = s && !strstr(s, resctrl_val); - free(res); + if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR))) { + if (!stat(L3_PATH, &statbuf)) + return true; + } else if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) { + if (!stat(MB_PATH, &statbuf)) + return true; + } else if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) || + !strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) { + if (!stat(L3_MON_PATH, &statbuf)) { + inf = fopen(L3_MON_FEATURES_PATH, "r"); + if (!inf) + return false; + + if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) { + res = fgrep(inf, "llc_occupancy"); + if (res) { + found = true; + free(res); + } + } + + if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR))) { + res = fgrep(inf, "mbm_total_bytes"); + if (res) { + free(res); + res = fgrep(inf, "mbm_local_bytes"); + if (res) { + found = true; + free(res); + } + } + } + fclose(inf); + } } - fclose(inf); return found; } @@ -671,9 +695,9 @@ int filter_dmesg(void) while (fgets(line, 1024, fp)) { if (strstr(line, "intel_rdt:")) - printf("# dmesg: %s", line); + ksft_print_msg("dmesg: %s", line); if (strstr(line, "resctrl:")) - printf("# dmesg: %s", line); + ksft_print_msg("dmesg: %s", line); } fclose(fp); waitpid(pid, NULL, 0); diff --git a/tools/testing/selftests/sgx/defines.h b/tools/testing/selftests/sgx/defines.h index 592c1ccf4576..0bd73428d2f3 100644 --- a/tools/testing/selftests/sgx/defines.h +++ b/tools/testing/selftests/sgx/defines.h @@ -14,7 +14,7 @@ #define __aligned(x) __attribute__((__aligned__(x))) #define __packed __attribute__((packed)) -#include "../../../../arch/x86/kernel/cpu/sgx/arch.h" +#include "../../../../arch/x86/include/asm/sgx.h" #include "../../../../arch/x86/include/asm/enclu.h" #include "../../../../arch/x86/include/uapi/asm/sgx.h" diff --git a/tools/testing/selftests/sgx/load.c b/tools/testing/selftests/sgx/load.c index 9d43b75aaa55..f441ac34b4d4 100644 --- a/tools/testing/selftests/sgx/load.c +++ b/tools/testing/selftests/sgx/load.c @@ -45,19 +45,19 @@ static bool encl_map_bin(const char *path, struct encl *encl) fd = open(path, O_RDONLY); if (fd == -1) { - perror("open()"); + perror("enclave executable open()"); return false; } ret = stat(path, &sb); if (ret) { - perror("stat()"); + perror("enclave executable stat()"); goto err; } bin = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0); if (bin == MAP_FAILED) { - perror("mmap()"); + perror("enclave executable mmap()"); goto err; } @@ -90,8 +90,7 @@ static bool encl_ioc_create(struct encl *encl) ioc.src = (unsigned long)secs; rc = ioctl(encl->fd, SGX_IOC_ENCLAVE_CREATE, &ioc); if (rc) { - fprintf(stderr, "SGX_IOC_ENCLAVE_CREATE failed: errno=%d\n", - errno); + perror("SGX_IOC_ENCLAVE_CREATE failed"); munmap((void *)secs->base, encl->encl_size); return false; } @@ -116,31 +115,72 @@ static bool encl_ioc_add_pages(struct encl *encl, struct encl_segment *seg) rc = ioctl(encl->fd, SGX_IOC_ENCLAVE_ADD_PAGES, &ioc); if (rc < 0) { - fprintf(stderr, "SGX_IOC_ENCLAVE_ADD_PAGES failed: errno=%d.\n", - errno); + perror("SGX_IOC_ENCLAVE_ADD_PAGES failed"); return false; } return true; } + + bool encl_load(const char *path, struct encl *encl) { + const char device_path[] = "/dev/sgx_enclave"; Elf64_Phdr *phdr_tbl; off_t src_offset; Elf64_Ehdr *ehdr; + struct stat sb; + void *ptr; int i, j; int ret; + int fd = -1; memset(encl, 0, sizeof(*encl)); - ret = open("/dev/sgx_enclave", O_RDWR); - if (ret < 0) { - fprintf(stderr, "Unable to open /dev/sgx_enclave\n"); + fd = open(device_path, O_RDWR); + if (fd < 0) { + perror("Unable to open /dev/sgx_enclave"); + goto err; + } + + ret = stat(device_path, &sb); + if (ret) { + perror("device file stat()"); + goto err; + } + + /* + * This just checks if the /dev file has these permission + * bits set. It does not check that the current user is + * the owner or in the owning group. + */ + if (!(sb.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH))) { + fprintf(stderr, "no execute permissions on device file %s\n", device_path); + goto err; + } + + ptr = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED, fd, 0); + if (ptr == (void *)-1) { + perror("mmap for read"); + goto err; + } + munmap(ptr, PAGE_SIZE); + +#define ERR_MSG \ +"mmap() succeeded for PROT_READ, but failed for PROT_EXEC.\n" \ +" Check that current user has execute permissions on %s and \n" \ +" that /dev does not have noexec set: mount | grep \"/dev .*noexec\"\n" \ +" If so, remount it executable: mount -o remount,exec /dev\n\n" + + ptr = mmap(NULL, PAGE_SIZE, PROT_EXEC, MAP_SHARED, fd, 0); + if (ptr == (void *)-1) { + fprintf(stderr, ERR_MSG, device_path); goto err; } + munmap(ptr, PAGE_SIZE); - encl->fd = ret; + encl->fd = fd; if (!encl_map_bin(path, encl)) goto err; @@ -217,6 +257,8 @@ bool encl_load(const char *path, struct encl *encl) return true; err: + if (fd != -1) + close(fd); encl_delete(encl); return false; } @@ -229,7 +271,7 @@ static bool encl_map_area(struct encl *encl) area = mmap(NULL, encl_size * 2, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (area == MAP_FAILED) { - perror("mmap"); + perror("reservation mmap()"); return false; } @@ -268,8 +310,7 @@ bool encl_build(struct encl *encl) ioc.sigstruct = (uint64_t)&encl->sigstruct; ret = ioctl(encl->fd, SGX_IOC_ENCLAVE_INIT, &ioc); if (ret) { - fprintf(stderr, "SGX_IOC_ENCLAVE_INIT failed: errno=%d\n", - errno); + perror("SGX_IOC_ENCLAVE_INIT failed"); return false; } diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c index 724cec700926..d304a4044eb9 100644 --- a/tools/testing/selftests/sgx/main.c +++ b/tools/testing/selftests/sgx/main.c @@ -15,6 +15,7 @@ #include <sys/stat.h> #include <sys/time.h> #include <sys/types.h> +#include <sys/auxv.h> #include "defines.h" #include "main.h" #include "../kselftest.h" @@ -28,24 +29,6 @@ struct vdso_symtab { Elf64_Word *elf_hashtab; }; -static void *vdso_get_base_addr(char *envp[]) -{ - Elf64_auxv_t *auxv; - int i; - - for (i = 0; envp[i]; i++) - ; - - auxv = (Elf64_auxv_t *)&envp[i + 1]; - - for (i = 0; auxv[i].a_type != AT_NULL; i++) { - if (auxv[i].a_type == AT_SYSINFO_EHDR) - return (void *)auxv[i].a_un.a_val; - } - - return NULL; -} - static Elf64_Dyn *vdso_get_dyntab(void *addr) { Elf64_Ehdr *ehdr = addr; @@ -162,7 +145,7 @@ static int user_handler(long rdi, long rsi, long rdx, long ursp, long r8, long r return 0; } -int main(int argc, char *argv[], char *envp[]) +int main(int argc, char *argv[]) { struct sgx_enclave_run run; struct vdso_symtab symtab; @@ -195,7 +178,7 @@ int main(int argc, char *argv[], char *envp[]) addr = mmap((void *)encl.encl_base + seg->offset, seg->size, seg->prot, MAP_SHARED | MAP_FIXED, encl.fd, 0); if (addr == MAP_FAILED) { - fprintf(stderr, "mmap() failed, errno=%d.\n", errno); + perror("mmap() segment failed"); exit(KSFT_FAIL); } } @@ -203,7 +186,8 @@ int main(int argc, char *argv[], char *envp[]) memset(&run, 0, sizeof(run)); run.tcs = encl.encl_base; - addr = vdso_get_base_addr(envp); + /* Get vDSO base address */ + addr = (void *)getauxval(AT_SYSINFO_EHDR); if (!addr) goto err; diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json index b8268da5adaa..8e45792703ed 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json @@ -764,5 +764,53 @@ "teardown": [ "$TC actions flush action police" ] + }, + { + "id": "cdd7", + "name": "Add valid police action with packets per second rate limit", + "category": [ + "actions", + "police" + ], + "setup": [ + [ + "$TC actions flush action police", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action police pkts_rate 1000 pkts_burst 200 index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions ls action police", + "matchPattern": "action order [0-9]*: police 0x1 rate 0bit burst 0b mtu 4096Mb pkts_rate 1000 pkts_burst 200", + "matchCount": "1", + "teardown": [ + "$TC actions flush action police" + ] + }, + { + "id": "f5bc", + "name": "Add invalid police action with both bps and pps", + "category": [ + "actions", + "police" + ], + "setup": [ + [ + "$TC actions flush action police", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action police rate 1kbit burst 10k pkts_rate 1000 pkts_burst 200 index 1", + "expExitCode": "255", + "verifyCmd": "$TC actions ls action police", + "matchPattern": "action order [0-9]*: police 0x1 ", + "matchCount": "0", + "teardown": [ + "$TC actions flush action police" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json b/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json index 8e8c1ae12260..e0c5f060ccb9 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json @@ -24,6 +24,30 @@ ] }, { + "id": "4297", + "name": "Add simple action with change command", + "category": [ + "actions", + "simple" + ], + "setup": [ + [ + "$TC actions flush action simple", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions change action simple sdata \"Not changed\" index 60", + "expExitCode": "0", + "verifyCmd": "$TC actions list action simple", + "matchPattern": "action order [0-9]*: Simple <Not changed>.*index 60 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action simple" + ] + }, + { "id": "6d4c", "name": "Add simple action with duplicate index", "category": [ @@ -151,5 +175,64 @@ "teardown": [ "$TC actions flush action simple" ] + }, + { + "id": "8d07", + "name": "Verify cleanup of failed actions batch add", + "category": [ + "actions", + "simple" + ], + "setup": [ + [ + "$TC actions flush action simple", + 0, + 1, + 255 + ], + "$TC actions add action simple sdata \"2\" index 2", + [ + "$TC actions add action simple sdata \"1\" index 1 action simple sdata \"2\" index 2", + 255 + ], + "$TC actions flush action simple" + ], + "cmdUnderTest": "$TC actions add action simple sdata \"2\" index 2", + "expExitCode": "0", + "verifyCmd": "$TC actions list action simple", + "matchPattern": "action order [0-9]*: Simple <2>.*index 2 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action simple" + ] + }, + { + "id": "a68a", + "name": "Verify cleanup of failed actions batch change", + "category": [ + "actions", + "simple" + ], + "setup": [ + [ + "$TC actions flush action simple", + 0, + 1, + 255 + ], + [ + "$TC actions change action simple sdata \"1\" index 1 action simple sdata \"2\" goto chain 42 index 2", + 255 + ], + "$TC actions flush action simple" + ], + "cmdUnderTest": "$TC actions add action simple sdata \"1\" index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions list action simple", + "matchPattern": "action order [0-9]*: Simple <1>.*index 1 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action simple" + ] } ] diff --git a/tools/testing/selftests/timens/gettime_perf.c b/tools/testing/selftests/timens/gettime_perf.c index 7bf841a3967b..6b13dc277724 100644 --- a/tools/testing/selftests/timens/gettime_perf.c +++ b/tools/testing/selftests/timens/gettime_perf.c @@ -25,6 +25,12 @@ static void fill_function_pointers(void) if (!vdso) vdso = dlopen("linux-gate.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); + if (!vdso) + vdso = dlopen("linux-vdso32.so.1", + RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); + if (!vdso) + vdso = dlopen("linux-vdso64.so.1", + RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); if (!vdso) { pr_err("[WARN]\tfailed to find vDSO\n"); return; @@ -32,6 +38,8 @@ static void fill_function_pointers(void) vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime"); if (!vdso_clock_gettime) + vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__kernel_clock_gettime"); + if (!vdso_clock_gettime) pr_err("Warning: failed to find clock_gettime in vDSO\n"); } diff --git a/tools/testing/selftests/timers/clocksource-switch.c b/tools/testing/selftests/timers/clocksource-switch.c index bfc974b4572d..ef8eb3604595 100644 --- a/tools/testing/selftests/timers/clocksource-switch.c +++ b/tools/testing/selftests/timers/clocksource-switch.c @@ -3,7 +3,7 @@ * (C) Copyright IBM 2012 * Licensed under the GPLv2 * - * NOTE: This is a meta-test which quickly changes the clocksourc and + * NOTE: This is a meta-test which quickly changes the clocksource and * then uses other tests to detect problems. Thus this test requires * that the inconsistency-check and nanosleep tests be present in the * same directory it is run from. @@ -134,7 +134,7 @@ int main(int argv, char **argc) return -1; } - /* Check everything is sane before we start switching asyncrhonously */ + /* Check everything is sane before we start switching asynchronously */ for (i = 0; i < count; i++) { printf("Validating clocksource %s\n", clocksource_list[i]); if (change_clocksource(clocksource_list[i])) { diff --git a/tools/testing/selftests/timers/leap-a-day.c b/tools/testing/selftests/timers/leap-a-day.c index 19e46ed5dfb5..23eb398c8140 100644 --- a/tools/testing/selftests/timers/leap-a-day.c +++ b/tools/testing/selftests/timers/leap-a-day.c @@ -5,7 +5,7 @@ * Licensed under the GPLv2 * * This test signals the kernel to insert a leap second - * every day at midnight GMT. This allows for stessing the + * every day at midnight GMT. This allows for stressing the * kernel's leap-second behavior, as well as how well applications * handle the leap-second discontinuity. * diff --git a/tools/testing/selftests/timers/leapcrash.c b/tools/testing/selftests/timers/leapcrash.c index dc80728ed191..f70802c5dd0d 100644 --- a/tools/testing/selftests/timers/leapcrash.c +++ b/tools/testing/selftests/timers/leapcrash.c @@ -4,10 +4,10 @@ * (C) Copyright 2013, 2015 Linaro Limited * Licensed under the GPL * - * This test demonstrates leapsecond deadlock that is possibe + * This test demonstrates leapsecond deadlock that is possible * on kernels from 2.6.26 to 3.3. * - * WARNING: THIS WILL LIKELY HARDHANG SYSTEMS AND MAY LOSE DATA + * WARNING: THIS WILL LIKELY HARD HANG SYSTEMS AND MAY LOSE DATA * RUN AT YOUR OWN RISK! * To build: * $ gcc leapcrash.c -o leapcrash -lrt diff --git a/tools/testing/selftests/timers/threadtest.c b/tools/testing/selftests/timers/threadtest.c index cf3e48919874..80aed4bf06fb 100644 --- a/tools/testing/selftests/timers/threadtest.c +++ b/tools/testing/selftests/timers/threadtest.c @@ -76,7 +76,7 @@ void checklist(struct timespec *list, int size) /* The shared thread shares a global list * that each thread fills while holding the lock. - * This stresses clock syncronization across cpus. + * This stresses clock synchronization across cpus. */ void *shared_thread(void *arg) { diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore index 9a35c3f6a557..1f651e85ed60 100644 --- a/tools/testing/selftests/vm/.gitignore +++ b/tools/testing/selftests/vm/.gitignore @@ -22,3 +22,4 @@ map_fixed_noreplace write_to_hugetlbfs hmm-tests local_config.* +split_huge_page_test diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index d42115e4284d..73e1cc96d7c2 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -42,6 +42,7 @@ TEST_GEN_FILES += on-fault-limit TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += userfaultfd +TEST_GEN_FILES += split_huge_page_test ifeq ($(MACHINE),x86_64) CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_32bit_program.c -m32) @@ -101,7 +102,7 @@ endef ifeq ($(CAN_BUILD_I386),1) $(BINARIES_32): CFLAGS += -m32 $(BINARIES_32): LDLIBS += -lrt -ldl -lm -$(BINARIES_32): %_32: %.c +$(BINARIES_32): $(OUTPUT)/%_32: %.c $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@ $(foreach t,$(TARGETS),$(eval $(call gen-target-rule-32,$(t)))) endif @@ -109,7 +110,7 @@ endif ifeq ($(CAN_BUILD_X86_64),1) $(BINARIES_64): CFLAGS += -m64 $(BINARIES_64): LDLIBS += -lrt -ldl -$(BINARIES_64): %_64: %.c +$(BINARIES_64): $(OUTPUT)/%_64: %.c $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@ $(foreach t,$(TARGETS),$(eval $(call gen-target-rule-64,$(t)))) endif diff --git a/tools/testing/selftests/vm/gup_test.c b/tools/testing/selftests/vm/gup_test.c index 6c6336dd3b7f..1e662d59c502 100644 --- a/tools/testing/selftests/vm/gup_test.c +++ b/tools/testing/selftests/vm/gup_test.c @@ -13,6 +13,7 @@ /* Just the flags we need, copied from mm.h: */ #define FOLL_WRITE 0x01 /* check pte is writable */ +#define FOLL_TOUCH 0x02 /* mark page accessed */ static char *cmd_to_str(unsigned long cmd) { @@ -37,13 +38,13 @@ int main(int argc, char **argv) { struct gup_test gup = { 0 }; unsigned long size = 128 * MB; - int i, fd, filed, opt, nr_pages = 1, thp = -1, repeats = 1, write = 0; + int i, fd, filed, opt, nr_pages = 1, thp = -1, repeats = 1, write = 1; unsigned long cmd = GUP_FAST_BENCHMARK; - int flags = MAP_PRIVATE; + int flags = MAP_PRIVATE, touch = 0; char *file = "/dev/zero"; char *p; - while ((opt = getopt(argc, argv, "m:r:n:F:f:abctTLUuwSH")) != -1) { + while ((opt = getopt(argc, argv, "m:r:n:F:f:abctTLUuwWSHpz")) != -1) { switch (opt) { case 'a': cmd = PIN_FAST_BENCHMARK; @@ -65,9 +66,13 @@ int main(int argc, char **argv) */ gup.which_pages[0] = 1; break; + case 'p': + /* works only with DUMP_USER_PAGES_TEST */ + gup.test_flags |= GUP_TEST_FLAG_DUMP_PAGES_USE_PIN; + break; case 'F': /* strtol, so you can pass flags in hex form */ - gup.flags = strtol(optarg, 0, 0); + gup.gup_flags = strtol(optarg, 0, 0); break; case 'm': size = atoi(optarg) * MB; @@ -93,6 +98,9 @@ int main(int argc, char **argv) case 'w': write = 1; break; + case 'W': + write = 0; + break; case 'f': file = optarg; break; @@ -103,6 +111,10 @@ int main(int argc, char **argv) case 'H': flags |= (MAP_HUGETLB | MAP_ANONYMOUS); break; + case 'z': + /* fault pages in gup, do not fault in userland */ + touch = 1; + break; default: return -1; } @@ -140,7 +152,7 @@ int main(int argc, char **argv) gup.nr_pages_per_call = nr_pages; if (write) - gup.flags |= FOLL_WRITE; + gup.gup_flags |= FOLL_WRITE; fd = open("/sys/kernel/debug/gup_test", O_RDWR); if (fd == -1) { @@ -160,8 +172,18 @@ int main(int argc, char **argv) else if (thp == 0) madvise(p, size, MADV_NOHUGEPAGE); - for (; (unsigned long)p < gup.addr + size; p += PAGE_SIZE) - p[0] = 0; + /* + * FOLL_TOUCH, in gup_test, is used as an either/or case: either + * fault pages in from the kernel via FOLL_TOUCH, or fault them + * in here, from user space. This allows comparison of performance + * between those two cases. + */ + if (touch) { + gup.gup_flags |= FOLL_TOUCH; + } else { + for (; (unsigned long)p < gup.addr + size; p += PAGE_SIZE) + p[0] = 0; + } /* Only report timing information on the *_BENCHMARK commands: */ if ((cmd == PIN_FAST_BENCHMARK) || (cmd == GUP_FAST_BENCHMARK) || diff --git a/tools/testing/selftests/vm/mremap_dontunmap.c b/tools/testing/selftests/vm/mremap_dontunmap.c index 3a7b5ef0b0c6..f01dc4a85b0b 100644 --- a/tools/testing/selftests/vm/mremap_dontunmap.c +++ b/tools/testing/selftests/vm/mremap_dontunmap.c @@ -127,6 +127,57 @@ static void mremap_dontunmap_simple() "unable to unmap source mapping"); } +// This test validates that MREMAP_DONTUNMAP on a shared mapping works as expected. +static void mremap_dontunmap_simple_shmem() +{ + unsigned long num_pages = 5; + + int mem_fd = memfd_create("memfd", MFD_CLOEXEC); + BUG_ON(mem_fd < 0, "memfd_create"); + + BUG_ON(ftruncate(mem_fd, num_pages * page_size) < 0, + "ftruncate"); + + void *source_mapping = + mmap(NULL, num_pages * page_size, PROT_READ | PROT_WRITE, + MAP_FILE | MAP_SHARED, mem_fd, 0); + BUG_ON(source_mapping == MAP_FAILED, "mmap"); + + BUG_ON(close(mem_fd) < 0, "close"); + + memset(source_mapping, 'a', num_pages * page_size); + + // Try to just move the whole mapping anywhere (not fixed). + void *dest_mapping = + mremap(source_mapping, num_pages * page_size, num_pages * page_size, + MREMAP_DONTUNMAP | MREMAP_MAYMOVE, NULL); + if (dest_mapping == MAP_FAILED && errno == EINVAL) { + // Old kernel which doesn't support MREMAP_DONTUNMAP on shmem. + BUG_ON(munmap(source_mapping, num_pages * page_size) == -1, + "unable to unmap source mapping"); + return; + } + + BUG_ON(dest_mapping == MAP_FAILED, "mremap"); + + // Validate that the pages have been moved, we know they were moved if + // the dest_mapping contains a's. + BUG_ON(check_region_contains_byte + (dest_mapping, num_pages * page_size, 'a') != 0, + "pages did not migrate"); + + // Because the region is backed by shmem, we will actually see the same + // memory at the source location still. + BUG_ON(check_region_contains_byte + (source_mapping, num_pages * page_size, 'a') != 0, + "source should have no ptes"); + + BUG_ON(munmap(dest_mapping, num_pages * page_size) == -1, + "unable to unmap destination mapping"); + BUG_ON(munmap(source_mapping, num_pages * page_size) == -1, + "unable to unmap source mapping"); +} + // This test validates MREMAP_DONTUNMAP will move page tables to a specific // destination using MREMAP_FIXED, also while validating that the source // remains intact. @@ -300,6 +351,7 @@ int main(void) BUG_ON(page_buffer == MAP_FAILED, "unable to mmap a page."); mremap_dontunmap_simple(); + mremap_dontunmap_simple_shmem(); mremap_dontunmap_simple_fixed(); mremap_dontunmap_partial_mapping(); mremap_dontunmap_partial_mapping_overwrite(); diff --git a/tools/testing/selftests/vm/split_huge_page_test.c b/tools/testing/selftests/vm/split_huge_page_test.c new file mode 100644 index 000000000000..1af16d2c2a0a --- /dev/null +++ b/tools/testing/selftests/vm/split_huge_page_test.c @@ -0,0 +1,390 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * A test of splitting PMD THPs and PTE-mapped THPs from a specified virtual + * address range in a process via <debugfs>/split_huge_pages interface. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> +#include <unistd.h> +#include <inttypes.h> +#include <string.h> +#include <fcntl.h> +#include <sys/mman.h> +#include <sys/mount.h> +#include <malloc.h> +#include <stdbool.h> + +uint64_t pagesize; +unsigned int pageshift; +uint64_t pmd_pagesize; + +#define PMD_SIZE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size" +#define SPLIT_DEBUGFS "/sys/kernel/debug/split_huge_pages" +#define SMAP_PATH "/proc/self/smaps" +#define INPUT_MAX 80 + +#define PID_FMT "%d,0x%lx,0x%lx" +#define PATH_FMT "%s,0x%lx,0x%lx" + +#define PFN_MASK ((1UL<<55)-1) +#define KPF_THP (1UL<<22) + +int is_backed_by_thp(char *vaddr, int pagemap_file, int kpageflags_file) +{ + uint64_t paddr; + uint64_t page_flags; + + if (pagemap_file) { + pread(pagemap_file, &paddr, sizeof(paddr), + ((long)vaddr >> pageshift) * sizeof(paddr)); + + if (kpageflags_file) { + pread(kpageflags_file, &page_flags, sizeof(page_flags), + (paddr & PFN_MASK) * sizeof(page_flags)); + + return !!(page_flags & KPF_THP); + } + } + return 0; +} + + +static uint64_t read_pmd_pagesize(void) +{ + int fd; + char buf[20]; + ssize_t num_read; + + fd = open(PMD_SIZE_PATH, O_RDONLY); + if (fd == -1) { + perror("Open hpage_pmd_size failed"); + exit(EXIT_FAILURE); + } + num_read = read(fd, buf, 19); + if (num_read < 1) { + close(fd); + perror("Read hpage_pmd_size failed"); + exit(EXIT_FAILURE); + } + buf[num_read] = '\0'; + close(fd); + + return strtoul(buf, NULL, 10); +} + +static int write_file(const char *path, const char *buf, size_t buflen) +{ + int fd; + ssize_t numwritten; + + fd = open(path, O_WRONLY); + if (fd == -1) + return 0; + + numwritten = write(fd, buf, buflen - 1); + close(fd); + if (numwritten < 1) + return 0; + + return (unsigned int) numwritten; +} + +static void write_debugfs(const char *fmt, ...) +{ + char input[INPUT_MAX]; + int ret; + va_list argp; + + va_start(argp, fmt); + ret = vsnprintf(input, INPUT_MAX, fmt, argp); + va_end(argp); + + if (ret >= INPUT_MAX) { + printf("%s: Debugfs input is too long\n", __func__); + exit(EXIT_FAILURE); + } + + if (!write_file(SPLIT_DEBUGFS, input, ret + 1)) { + perror(SPLIT_DEBUGFS); + exit(EXIT_FAILURE); + } +} + +#define MAX_LINE_LENGTH 500 + +static bool check_for_pattern(FILE *fp, const char *pattern, char *buf) +{ + while (fgets(buf, MAX_LINE_LENGTH, fp) != NULL) { + if (!strncmp(buf, pattern, strlen(pattern))) + return true; + } + return false; +} + +static uint64_t check_huge(void *addr) +{ + uint64_t thp = 0; + int ret; + FILE *fp; + char buffer[MAX_LINE_LENGTH]; + char addr_pattern[MAX_LINE_LENGTH]; + + ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-", + (unsigned long) addr); + if (ret >= MAX_LINE_LENGTH) { + printf("%s: Pattern is too long\n", __func__); + exit(EXIT_FAILURE); + } + + + fp = fopen(SMAP_PATH, "r"); + if (!fp) { + printf("%s: Failed to open file %s\n", __func__, SMAP_PATH); + exit(EXIT_FAILURE); + } + if (!check_for_pattern(fp, addr_pattern, buffer)) + goto err_out; + + /* + * Fetch the AnonHugePages: in the same block and check the number of + * hugepages. + */ + if (!check_for_pattern(fp, "AnonHugePages:", buffer)) + goto err_out; + + if (sscanf(buffer, "AnonHugePages:%10ld kB", &thp) != 1) { + printf("Reading smap error\n"); + exit(EXIT_FAILURE); + } + +err_out: + fclose(fp); + return thp; +} + +void split_pmd_thp(void) +{ + char *one_page; + size_t len = 4 * pmd_pagesize; + uint64_t thp_size; + size_t i; + + one_page = memalign(pmd_pagesize, len); + + if (!one_page) { + printf("Fail to allocate memory\n"); + exit(EXIT_FAILURE); + } + + madvise(one_page, len, MADV_HUGEPAGE); + + for (i = 0; i < len; i++) + one_page[i] = (char)i; + + thp_size = check_huge(one_page); + if (!thp_size) { + printf("No THP is allocated\n"); + exit(EXIT_FAILURE); + } + + /* split all THPs */ + write_debugfs(PID_FMT, getpid(), (uint64_t)one_page, + (uint64_t)one_page + len); + + for (i = 0; i < len; i++) + if (one_page[i] != (char)i) { + printf("%ld byte corrupted\n", i); + exit(EXIT_FAILURE); + } + + + thp_size = check_huge(one_page); + if (thp_size) { + printf("Still %ld kB AnonHugePages not split\n", thp_size); + exit(EXIT_FAILURE); + } + + printf("Split huge pages successful\n"); + free(one_page); +} + +void split_pte_mapped_thp(void) +{ + char *one_page, *pte_mapped, *pte_mapped2; + size_t len = 4 * pmd_pagesize; + uint64_t thp_size; + size_t i; + const char *pagemap_template = "/proc/%d/pagemap"; + const char *kpageflags_proc = "/proc/kpageflags"; + char pagemap_proc[255]; + int pagemap_fd; + int kpageflags_fd; + + if (snprintf(pagemap_proc, 255, pagemap_template, getpid()) < 0) { + perror("get pagemap proc error"); + exit(EXIT_FAILURE); + } + pagemap_fd = open(pagemap_proc, O_RDONLY); + + if (pagemap_fd == -1) { + perror("read pagemap:"); + exit(EXIT_FAILURE); + } + + kpageflags_fd = open(kpageflags_proc, O_RDONLY); + + if (kpageflags_fd == -1) { + perror("read kpageflags:"); + exit(EXIT_FAILURE); + } + + one_page = mmap((void *)(1UL << 30), len, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + + madvise(one_page, len, MADV_HUGEPAGE); + + for (i = 0; i < len; i++) + one_page[i] = (char)i; + + thp_size = check_huge(one_page); + if (!thp_size) { + printf("No THP is allocated\n"); + exit(EXIT_FAILURE); + } + + /* remap the first pagesize of first THP */ + pte_mapped = mremap(one_page, pagesize, pagesize, MREMAP_MAYMOVE); + + /* remap the Nth pagesize of Nth THP */ + for (i = 1; i < 4; i++) { + pte_mapped2 = mremap(one_page + pmd_pagesize * i + pagesize * i, + pagesize, pagesize, + MREMAP_MAYMOVE|MREMAP_FIXED, + pte_mapped + pagesize * i); + if (pte_mapped2 == (char *)-1) { + perror("mremap failed"); + exit(EXIT_FAILURE); + } + } + + /* smap does not show THPs after mremap, use kpageflags instead */ + thp_size = 0; + for (i = 0; i < pagesize * 4; i++) + if (i % pagesize == 0 && + is_backed_by_thp(&pte_mapped[i], pagemap_fd, kpageflags_fd)) + thp_size++; + + if (thp_size != 4) { + printf("Some THPs are missing during mremap\n"); + exit(EXIT_FAILURE); + } + + /* split all remapped THPs */ + write_debugfs(PID_FMT, getpid(), (uint64_t)pte_mapped, + (uint64_t)pte_mapped + pagesize * 4); + + /* smap does not show THPs after mremap, use kpageflags instead */ + thp_size = 0; + for (i = 0; i < pagesize * 4; i++) { + if (pte_mapped[i] != (char)i) { + printf("%ld byte corrupted\n", i); + exit(EXIT_FAILURE); + } + if (i % pagesize == 0 && + is_backed_by_thp(&pte_mapped[i], pagemap_fd, kpageflags_fd)) + thp_size++; + } + + if (thp_size) { + printf("Still %ld THPs not split\n", thp_size); + exit(EXIT_FAILURE); + } + + printf("Split PTE-mapped huge pages successful\n"); + munmap(one_page, len); + close(pagemap_fd); + close(kpageflags_fd); +} + +void split_file_backed_thp(void) +{ + int status; + int fd; + ssize_t num_written; + char tmpfs_template[] = "/tmp/thp_split_XXXXXX"; + const char *tmpfs_loc = mkdtemp(tmpfs_template); + char testfile[INPUT_MAX]; + uint64_t pgoff_start = 0, pgoff_end = 1024; + + printf("Please enable pr_debug in split_huge_pages_in_file() if you need more info.\n"); + + status = mount("tmpfs", tmpfs_loc, "tmpfs", 0, "huge=always,size=4m"); + + if (status) { + printf("Unable to create a tmpfs for testing\n"); + exit(EXIT_FAILURE); + } + + status = snprintf(testfile, INPUT_MAX, "%s/thp_file", tmpfs_loc); + if (status >= INPUT_MAX) { + printf("Fail to create file-backed THP split testing file\n"); + goto cleanup; + } + + fd = open(testfile, O_CREAT|O_WRONLY); + if (fd == -1) { + perror("Cannot open testing file\n"); + goto cleanup; + } + + /* write something to the file, so a file-backed THP can be allocated */ + num_written = write(fd, tmpfs_loc, sizeof(tmpfs_loc)); + close(fd); + + if (num_written < 1) { + printf("Fail to write data to testing file\n"); + goto cleanup; + } + + /* split the file-backed THP */ + write_debugfs(PATH_FMT, testfile, pgoff_start, pgoff_end); + + status = unlink(testfile); + if (status) + perror("Cannot remove testing file\n"); + +cleanup: + status = umount(tmpfs_loc); + if (status) { + printf("Unable to umount %s\n", tmpfs_loc); + exit(EXIT_FAILURE); + } + status = rmdir(tmpfs_loc); + if (status) { + perror("cannot remove tmp dir"); + exit(EXIT_FAILURE); + } + + printf("file-backed THP split test done, please check dmesg for more information\n"); +} + +int main(int argc, char **argv) +{ + if (geteuid() != 0) { + printf("Please run the benchmark as root\n"); + exit(EXIT_FAILURE); + } + + pagesize = getpagesize(); + pageshift = ffs(pagesize) - 1; + pmd_pagesize = read_pmd_pagesize(); + + split_pmd_thp(); + split_pte_mapped_thp(); + split_file_backed_thp(); + + return 0; +} diff --git a/tools/testing/selftests/vm/test_vmalloc.sh b/tools/testing/selftests/vm/test_vmalloc.sh index 06d2bb109f06..d73b846736f1 100755 --- a/tools/testing/selftests/vm/test_vmalloc.sh +++ b/tools/testing/selftests/vm/test_vmalloc.sh @@ -11,6 +11,7 @@ TEST_NAME="vmalloc" DRIVER="test_${TEST_NAME}" +NUM_CPUS=`grep -c ^processor /proc/cpuinfo` # 1 if fails exitcode=1 @@ -22,9 +23,9 @@ ksft_skip=4 # Static templates for performance, stressing and smoke tests. # Also it is possible to pass any supported parameters manualy. # -PERF_PARAM="single_cpu_test=1 sequential_test_order=1 test_repeat_count=3" -SMOKE_PARAM="single_cpu_test=1 test_loop_count=10000 test_repeat_count=10" -STRESS_PARAM="test_repeat_count=20" +PERF_PARAM="sequential_test_order=1 test_repeat_count=3" +SMOKE_PARAM="test_loop_count=10000 test_repeat_count=10" +STRESS_PARAM="nr_threads=$NUM_CPUS test_repeat_count=20" check_test_requirements() { @@ -58,8 +59,8 @@ run_perfformance_check() run_stability_check() { - echo "Run stability tests. In order to stress vmalloc subsystem we run" - echo "all available test cases on all available CPUs simultaneously." + echo "Run stability tests. In order to stress vmalloc subsystem all" + echo "available test cases are run by NUM_CPUS workers simultaneously." echo "It will take time, so be patient." modprobe $DRIVER $STRESS_PARAM > /dev/null 2>&1 @@ -92,17 +93,17 @@ usage() echo "# Shows help message" echo "./${DRIVER}.sh" echo - echo "# Runs 1 test(id_1), repeats it 5 times on all online CPUs" - echo "./${DRIVER}.sh run_test_mask=1 test_repeat_count=5" + echo "# Runs 1 test(id_1), repeats it 5 times by NUM_CPUS workers" + echo "./${DRIVER}.sh nr_threads=$NUM_CPUS run_test_mask=1 test_repeat_count=5" echo echo -n "# Runs 4 tests(id_1|id_2|id_4|id_16) on one CPU with " echo "sequential order" - echo -n "./${DRIVER}.sh single_cpu_test=1 sequential_test_order=1 " + echo -n "./${DRIVER}.sh sequential_test_order=1 " echo "run_test_mask=23" echo - echo -n "# Runs all tests on all online CPUs, shuffled order, repeats " + echo -n "# Runs all tests by NUM_CPUS workers, shuffled order, repeats " echo "20 times" - echo "./${DRIVER}.sh test_repeat_count=20" + echo "./${DRIVER}.sh nr_threads=$NUM_CPUS test_repeat_count=20" echo echo "# Performance analysis" echo "./${DRIVER}.sh performance" diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 92b8ec423201..f5ab5e0312e7 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -81,6 +81,8 @@ static volatile bool test_uffdio_copy_eexist = true; static volatile bool test_uffdio_zeropage_eexist = true; /* Whether to test uffd write-protection */ static bool test_uffdio_wp = false; +/* Whether to test uffd minor faults */ +static bool test_uffdio_minor = false; static bool map_shared; static int huge_fd; @@ -96,6 +98,7 @@ struct uffd_stats { int cpu; unsigned long missing_faults; unsigned long wp_faults; + unsigned long minor_faults; }; /* pthread_mutex_t starts at page offset 0 */ @@ -153,17 +156,19 @@ static void uffd_stats_reset(struct uffd_stats *uffd_stats, uffd_stats[i].cpu = i; uffd_stats[i].missing_faults = 0; uffd_stats[i].wp_faults = 0; + uffd_stats[i].minor_faults = 0; } } static void uffd_stats_report(struct uffd_stats *stats, int n_cpus) { int i; - unsigned long long miss_total = 0, wp_total = 0; + unsigned long long miss_total = 0, wp_total = 0, minor_total = 0; for (i = 0; i < n_cpus; i++) { miss_total += stats[i].missing_faults; wp_total += stats[i].wp_faults; + minor_total += stats[i].minor_faults; } printf("userfaults: %llu missing (", miss_total); @@ -172,6 +177,9 @@ static void uffd_stats_report(struct uffd_stats *stats, int n_cpus) printf("\b), %llu wp (", wp_total); for (i = 0; i < n_cpus; i++) printf("%lu+", stats[i].wp_faults); + printf("\b), %llu minor (", minor_total); + for (i = 0; i < n_cpus; i++) + printf("%lu+", stats[i].minor_faults); printf("\b)\n"); } @@ -328,7 +336,7 @@ static struct uffd_test_ops shmem_uffd_test_ops = { }; static struct uffd_test_ops hugetlb_uffd_test_ops = { - .expected_ioctls = UFFD_API_RANGE_IOCTLS_BASIC, + .expected_ioctls = UFFD_API_RANGE_IOCTLS_BASIC & ~(1 << _UFFDIO_CONTINUE), .allocate_area = hugetlb_allocate_area, .release_pages = hugetlb_release_pages, .alias_mapping = hugetlb_alias_mapping, @@ -362,6 +370,22 @@ static void wp_range(int ufd, __u64 start, __u64 len, bool wp) } } +static void continue_range(int ufd, __u64 start, __u64 len) +{ + struct uffdio_continue req; + + req.range.start = start; + req.range.len = len; + req.mode = 0; + + if (ioctl(ufd, UFFDIO_CONTINUE, &req)) { + fprintf(stderr, + "UFFDIO_CONTINUE failed for address 0x%" PRIx64 "\n", + (uint64_t)start); + exit(1); + } +} + static void *locking_thread(void *arg) { unsigned long cpu = (unsigned long) arg; @@ -569,8 +593,32 @@ static void uffd_handle_page_fault(struct uffd_msg *msg, } if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP) { + /* Write protect page faults */ wp_range(uffd, msg->arg.pagefault.address, page_size, false); stats->wp_faults++; + } else if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_MINOR) { + uint8_t *area; + int b; + + /* + * Minor page faults + * + * To prove we can modify the original range for testing + * purposes, we're going to bit flip this range before + * continuing. + * + * Note that this requires all minor page fault tests operate on + * area_dst (non-UFFD-registered) and area_dst_alias + * (UFFD-registered). + */ + + area = (uint8_t *)(area_dst + + ((char *)msg->arg.pagefault.address - + area_dst_alias)); + for (b = 0; b < page_size; ++b) + area[b] = ~area[b]; + continue_range(uffd, msg->arg.pagefault.address, page_size); + stats->minor_faults++; } else { /* Missing page faults */ if (bounces & BOUNCE_VERIFY && @@ -779,7 +827,7 @@ static int stress(struct uffd_stats *uffd_stats) return 0; } -static int userfaultfd_open(int features) +static int userfaultfd_open_ext(uint64_t *features) { struct uffdio_api uffdio_api; @@ -792,7 +840,7 @@ static int userfaultfd_open(int features) uffd_flags = fcntl(uffd, F_GETFD, NULL); uffdio_api.api = UFFD_API; - uffdio_api.features = features; + uffdio_api.features = *features; if (ioctl(uffd, UFFDIO_API, &uffdio_api)) { fprintf(stderr, "UFFDIO_API failed.\nPlease make sure to " "run with either root or ptrace capability.\n"); @@ -804,9 +852,15 @@ static int userfaultfd_open(int features) return 1; } + *features = uffdio_api.features; return 0; } +static int userfaultfd_open(uint64_t features) +{ + return userfaultfd_open_ext(&features); +} + sigjmp_buf jbuf, *sigbuf; static void sighndl(int sig, siginfo_t *siginfo, void *ptr) @@ -1112,7 +1166,7 @@ static int userfaultfd_events_test(void) } if (!pid) - return faulting_process(0); + exit(faulting_process(0)); waitpid(pid, &err, 0); if (err) { @@ -1215,6 +1269,102 @@ static int userfaultfd_sig_test(void) return userfaults != 0; } +static int userfaultfd_minor_test(void) +{ + struct uffdio_register uffdio_register; + unsigned long expected_ioctls; + unsigned long p; + pthread_t uffd_mon; + uint8_t expected_byte; + void *expected_page; + char c; + struct uffd_stats stats = { 0 }; + uint64_t features = UFFD_FEATURE_MINOR_HUGETLBFS; + + if (!test_uffdio_minor) + return 0; + + printf("testing minor faults: "); + fflush(stdout); + + if (uffd_test_ops->release_pages(area_dst)) + return 1; + + if (userfaultfd_open_ext(&features)) + return 1; + /* If kernel reports the feature isn't supported, skip the test. */ + if (!(features & UFFD_FEATURE_MINOR_HUGETLBFS)) { + printf("skipping test due to lack of feature support\n"); + fflush(stdout); + return 0; + } + + uffdio_register.range.start = (unsigned long)area_dst_alias; + uffdio_register.range.len = nr_pages * page_size; + uffdio_register.mode = UFFDIO_REGISTER_MODE_MINOR; + if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) { + fprintf(stderr, "register failure\n"); + exit(1); + } + + expected_ioctls = uffd_test_ops->expected_ioctls; + expected_ioctls |= 1 << _UFFDIO_CONTINUE; + if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls) { + fprintf(stderr, "unexpected missing ioctl(s)\n"); + exit(1); + } + + /* + * After registering with UFFD, populate the non-UFFD-registered side of + * the shared mapping. This should *not* trigger any UFFD minor faults. + */ + for (p = 0; p < nr_pages; ++p) { + memset(area_dst + (p * page_size), p % ((uint8_t)-1), + page_size); + } + + if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats)) { + perror("uffd_poll_thread create"); + exit(1); + } + + /* + * Read each of the pages back using the UFFD-registered mapping. We + * expect that the first time we touch a page, it will result in a minor + * fault. uffd_poll_thread will resolve the fault by bit-flipping the + * page's contents, and then issuing a CONTINUE ioctl. + */ + + if (posix_memalign(&expected_page, page_size, page_size)) { + fprintf(stderr, "out of memory\n"); + return 1; + } + + for (p = 0; p < nr_pages; ++p) { + expected_byte = ~((uint8_t)(p % ((uint8_t)-1))); + memset(expected_page, expected_byte, page_size); + if (my_bcmp(expected_page, area_dst_alias + (p * page_size), + page_size)) { + fprintf(stderr, + "unexpected page contents after minor fault\n"); + exit(1); + } + } + + if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) { + perror("pipe write"); + exit(1); + } + if (pthread_join(uffd_mon, NULL)) + return 1; + + close(uffd); + + uffd_stats_report(&stats, 1); + + return stats.missing_faults != 0 || stats.minor_faults != nr_pages; +} + static int userfaultfd_stress(void) { void *area; @@ -1413,7 +1563,7 @@ static int userfaultfd_stress(void) close(uffd); return userfaultfd_zeropage_test() || userfaultfd_sig_test() - || userfaultfd_events_test(); + || userfaultfd_events_test() || userfaultfd_minor_test(); } /* @@ -1454,6 +1604,8 @@ static void set_test_type(const char *type) map_shared = true; test_type = TEST_HUGETLB; uffd_test_ops = &hugetlb_uffd_test_ops; + /* Minor faults require shared hugetlb; only enable here. */ + test_uffdio_minor = true; } else if (!strcmp(type, "shmem")) { map_shared = true; test_type = TEST_SHMEM; diff --git a/tools/testing/selftests/x86/thunks_32.S b/tools/testing/selftests/x86/thunks_32.S index a71d92da8f46..f3f56e681e9f 100644 --- a/tools/testing/selftests/x86/thunks_32.S +++ b/tools/testing/selftests/x86/thunks_32.S @@ -45,3 +45,5 @@ call64_from_32: ret .size call64_from_32, .-call64_from_32 + +.section .note.GNU-stack,"",%progbits |