diff options
37 files changed, 1231 insertions, 156 deletions
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 8194696e2805..8c9573660d51 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -287,7 +287,7 @@ static void emit_bpf_tail_call(u8 **pprog) EMIT4(0x48, 0x8B, 0x46, /* mov rax, qword ptr [rsi + 16] */ offsetof(struct bpf_array, map.max_entries)); EMIT3(0x48, 0x39, 0xD0); /* cmp rax, rdx */ -#define OFFSET1 47 /* number of bytes to jump */ +#define OFFSET1 43 /* number of bytes to jump */ EMIT2(X86_JBE, OFFSET1); /* jbe out */ label1 = cnt; @@ -296,21 +296,20 @@ static void emit_bpf_tail_call(u8 **pprog) */ EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */ EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ -#define OFFSET2 36 +#define OFFSET2 32 EMIT2(X86_JA, OFFSET2); /* ja out */ label2 = cnt; EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ EMIT2_off32(0x89, 0x85, 36); /* mov dword ptr [rbp + 36], eax */ /* prog = array->ptrs[index]; */ - EMIT4_off32(0x48, 0x8D, 0x84, 0xD6, /* lea rax, [rsi + rdx * 8 + offsetof(...)] */ + EMIT4_off32(0x48, 0x8B, 0x84, 0xD6, /* mov rax, [rsi + rdx * 8 + offsetof(...)] */ offsetof(struct bpf_array, ptrs)); - EMIT3(0x48, 0x8B, 0x00); /* mov rax, qword ptr [rax] */ /* if (prog == NULL) * goto out; */ - EMIT4(0x48, 0x83, 0xF8, 0x00); /* cmp rax, 0 */ + EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */ #define OFFSET3 10 EMIT2(X86_JE, OFFSET3); /* je out */ label3 = cnt; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 0039b4725405..a31912415264 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -620,6 +620,12 @@ enum mlx5e_traffic_types { MLX5E_NUM_INDIR_TIRS = MLX5E_TT_ANY, }; +enum mlx5e_tunnel_types { + MLX5E_TT_IPV4_GRE, + MLX5E_TT_IPV6_GRE, + MLX5E_NUM_TUNNEL_TT, +}; + enum { MLX5E_STATE_ASYNC_EVENTS_ENABLED, MLX5E_STATE_OPENED, @@ -679,6 +685,7 @@ struct mlx5e_l2_table { struct mlx5e_ttc_table { struct mlx5e_flow_table ft; struct mlx5_flow_handle *rules[MLX5E_NUM_TT]; + struct mlx5_flow_handle *tunnel_rules[MLX5E_NUM_TUNNEL_TT]; }; #define ARFS_HASH_SHIFT BITS_PER_BYTE @@ -711,6 +718,7 @@ enum { MLX5E_VLAN_FT_LEVEL = 0, MLX5E_L2_FT_LEVEL, MLX5E_TTC_FT_LEVEL, + MLX5E_INNER_TTC_FT_LEVEL, MLX5E_ARFS_FT_LEVEL }; @@ -736,6 +744,7 @@ struct mlx5e_flow_steering { struct mlx5e_vlan_table vlan; struct mlx5e_l2_table l2; struct mlx5e_ttc_table ttc; + struct mlx5e_ttc_table inner_ttc; struct mlx5e_arfs_tables arfs; }; @@ -769,6 +778,7 @@ struct mlx5e_priv { u32 tisn[MLX5E_MAX_NUM_TC]; struct mlx5e_rqt indir_rqt; struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; + struct mlx5e_tir inner_indir_tir[MLX5E_NUM_INDIR_TIRS]; struct mlx5e_tir direct_tir[MLX5E_MAX_NUM_CHANNELS]; u32 tx_rates[MLX5E_MAX_NUM_SQS]; int hard_mtu; @@ -903,7 +913,7 @@ int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, struct mlx5e_redirect_rqt_param rrp); void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_params *params, enum mlx5e_traffic_types tt, - void *tirc); + void *tirc, bool inner); int mlx5e_open_locked(struct net_device *netdev); int mlx5e_close_locked(struct net_device *netdev); @@ -932,6 +942,12 @@ void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params, u8 rq_type); +static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev) +{ + return (MLX5_CAP_ETH(mdev, tunnel_stateless_gre) && + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ft_field_support.inner_ip_version)); +} + static inline struct mlx5e_tx_wqe *mlx5e_post_nop(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 0dd7e9caf150..c6ec90e9c95b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1212,9 +1212,18 @@ static void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen) for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { memset(tirc, 0, ctxlen); - mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc); + mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false); mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, inlen); } + + if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + return; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + memset(tirc, 0, ctxlen); + mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, true); + mlx5_core_modify_tir(mdev, priv->inner_indir_tir[tt].tirn, in, inlen); + } } static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index eecbc6d4f51f..f11fd07ac4dd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -608,12 +608,21 @@ static void mlx5e_cleanup_ttc_rules(struct mlx5e_ttc_table *ttc) ttc->rules[i] = NULL; } } + + for (i = 0; i < MLX5E_NUM_TUNNEL_TT; i++) { + if (!IS_ERR_OR_NULL(ttc->tunnel_rules[i])) { + mlx5_del_flow_rules(ttc->tunnel_rules[i]); + ttc->tunnel_rules[i] = NULL; + } + } } -static struct { +struct mlx5e_etype_proto { u16 etype; u8 proto; -} ttc_rules[] = { +}; + +static struct mlx5e_etype_proto ttc_rules[] = { [MLX5E_TT_IPV4_TCP] = { .etype = ETH_P_IP, .proto = IPPROTO_TCP, @@ -660,6 +669,28 @@ static struct { }, }; +static struct mlx5e_etype_proto ttc_tunnel_rules[] = { + [MLX5E_TT_IPV4_GRE] = { + .etype = ETH_P_IP, + .proto = IPPROTO_GRE, + }, + [MLX5E_TT_IPV6_GRE] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_GRE, + }, +}; + +static u8 mlx5e_etype_to_ipv(u16 ethertype) +{ + if (ethertype == ETH_P_IP) + return 4; + + if (ethertype == ETH_P_IPV6) + return 6; + + return 0; +} + static struct mlx5_flow_handle * mlx5e_generate_ttc_rule(struct mlx5e_priv *priv, struct mlx5_flow_table *ft, @@ -667,10 +698,12 @@ mlx5e_generate_ttc_rule(struct mlx5e_priv *priv, u16 etype, u8 proto) { + int match_ipv_outer = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version); MLX5_DECLARE_FLOW_ACT(flow_act); struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; int err = 0; + u8 ipv; spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) @@ -681,7 +714,13 @@ mlx5e_generate_ttc_rule(struct mlx5e_priv *priv, MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol); MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, proto); } - if (etype) { + + ipv = mlx5e_etype_to_ipv(etype); + if (match_ipv_outer && ipv) { + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, ipv); + } else if (etype) { spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype); MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype); @@ -723,6 +762,20 @@ static int mlx5e_generate_ttc_table_rules(struct mlx5e_priv *priv) goto del_rules; } + if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + return 0; + + rules = ttc->tunnel_rules; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = priv->fs.inner_ttc.ft.t; + for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) { + rules[tt] = mlx5e_generate_ttc_rule(priv, ft, &dest, + ttc_tunnel_rules[tt].etype, + ttc_tunnel_rules[tt].proto); + if (IS_ERR(rules[tt])) + goto del_rules; + } + return 0; del_rules: @@ -733,13 +786,23 @@ del_rules: } #define MLX5E_TTC_NUM_GROUPS 3 -#define MLX5E_TTC_GROUP1_SIZE BIT(3) -#define MLX5E_TTC_GROUP2_SIZE BIT(1) -#define MLX5E_TTC_GROUP3_SIZE BIT(0) +#define MLX5E_TTC_GROUP1_SIZE (BIT(3) + MLX5E_NUM_TUNNEL_TT) +#define MLX5E_TTC_GROUP2_SIZE BIT(1) +#define MLX5E_TTC_GROUP3_SIZE BIT(0) #define MLX5E_TTC_TABLE_SIZE (MLX5E_TTC_GROUP1_SIZE +\ MLX5E_TTC_GROUP2_SIZE +\ MLX5E_TTC_GROUP3_SIZE) -static int mlx5e_create_ttc_table_groups(struct mlx5e_ttc_table *ttc) + +#define MLX5E_INNER_TTC_NUM_GROUPS 3 +#define MLX5E_INNER_TTC_GROUP1_SIZE BIT(3) +#define MLX5E_INNER_TTC_GROUP2_SIZE BIT(1) +#define MLX5E_INNER_TTC_GROUP3_SIZE BIT(0) +#define MLX5E_INNER_TTC_TABLE_SIZE (MLX5E_INNER_TTC_GROUP1_SIZE +\ + MLX5E_INNER_TTC_GROUP2_SIZE +\ + MLX5E_INNER_TTC_GROUP3_SIZE) + +static int mlx5e_create_ttc_table_groups(struct mlx5e_ttc_table *ttc, + bool use_ipv) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5e_flow_table *ft = &ttc->ft; @@ -761,7 +824,10 @@ static int mlx5e_create_ttc_table_groups(struct mlx5e_ttc_table *ttc) /* L4 Group */ mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + if (use_ipv) + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_version); + else + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); MLX5_SET_CFG(in, start_flow_index, ix); ix += MLX5E_TTC_GROUP1_SIZE; @@ -802,6 +868,190 @@ err: return err; } +static struct mlx5_flow_handle * +mlx5e_generate_inner_ttc_rule(struct mlx5e_priv *priv, + struct mlx5_flow_table *ft, + struct mlx5_flow_destination *dest, + u16 etype, u8 proto) +{ + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err = 0; + u8 ipv; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + ipv = mlx5e_etype_to_ipv(etype); + if (etype && ipv) { + spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_version); + MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_version, ipv); + } + + if (proto) { + spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_protocol); + MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_protocol, proto); + } + + rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(priv->netdev, "%s: add rule failed\n", __func__); + } + + kvfree(spec); + return err ? ERR_PTR(err) : rule; +} + +static int mlx5e_generate_inner_ttc_table_rules(struct mlx5e_priv *priv) +{ + struct mlx5_flow_destination dest; + struct mlx5_flow_handle **rules; + struct mlx5e_ttc_table *ttc; + struct mlx5_flow_table *ft; + int err; + int tt; + + ttc = &priv->fs.inner_ttc; + ft = ttc->ft.t; + rules = ttc->rules; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + for (tt = 0; tt < MLX5E_NUM_TT; tt++) { + if (tt == MLX5E_TT_ANY) + dest.tir_num = priv->direct_tir[0].tirn; + else + dest.tir_num = priv->inner_indir_tir[tt].tirn; + + rules[tt] = mlx5e_generate_inner_ttc_rule(priv, ft, &dest, + ttc_rules[tt].etype, + ttc_rules[tt].proto); + if (IS_ERR(rules[tt])) + goto del_rules; + } + + return 0; + +del_rules: + err = PTR_ERR(rules[tt]); + rules[tt] = NULL; + mlx5e_cleanup_ttc_rules(ttc); + return err; +} + +static int mlx5e_create_inner_ttc_table_groups(struct mlx5e_ttc_table *ttc) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5e_flow_table *ft = &ttc->ft; + int ix = 0; + u32 *in; + int err; + u8 *mc; + + ft->g = kcalloc(MLX5E_INNER_TTC_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL); + if (!ft->g) + return -ENOMEM; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + kfree(ft->g); + return -ENOMEM; + } + + /* L4 Group */ + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_protocol); + MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_version); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_INNER_HEADERS); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_INNER_TTC_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + /* L3 Group */ + MLX5_SET(fte_match_param, mc, inner_headers.ip_protocol, 0); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_INNER_TTC_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + /* Any Group */ + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_INNER_TTC_GROUP3_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + kvfree(in); + return 0; + +err: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; + kvfree(in); + + return err; +} + +static int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv) +{ + struct mlx5e_ttc_table *ttc = &priv->fs.inner_ttc; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5e_flow_table *ft = &ttc->ft; + int err; + + if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + return 0; + + ft_attr.max_fte = MLX5E_INNER_TTC_TABLE_SIZE; + ft_attr.level = MLX5E_INNER_TTC_FT_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + + ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr); + if (IS_ERR(ft->t)) { + err = PTR_ERR(ft->t); + ft->t = NULL; + return err; + } + + err = mlx5e_create_inner_ttc_table_groups(ttc); + if (err) + goto err; + + err = mlx5e_generate_inner_ttc_table_rules(priv); + if (err) + goto err; + + return 0; + +err: + mlx5e_destroy_flow_table(ft); + return err; +} + +static void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv) +{ + struct mlx5e_ttc_table *ttc = &priv->fs.inner_ttc; + + if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + return; + + mlx5e_cleanup_ttc_rules(ttc); + mlx5e_destroy_flow_table(&ttc->ft); +} + void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv) { struct mlx5e_ttc_table *ttc = &priv->fs.ttc; @@ -812,6 +1062,7 @@ void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv) int mlx5e_create_ttc_table(struct mlx5e_priv *priv) { + bool match_ipv_outer = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version); struct mlx5e_ttc_table *ttc = &priv->fs.ttc; struct mlx5_flow_table_attr ft_attr = {}; struct mlx5e_flow_table *ft = &ttc->ft; @@ -828,7 +1079,7 @@ int mlx5e_create_ttc_table(struct mlx5e_priv *priv) return err; } - err = mlx5e_create_ttc_table_groups(ttc); + err = mlx5e_create_ttc_table_groups(ttc, match_ipv_outer); if (err) goto err; @@ -1154,11 +1405,18 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv) priv->netdev->hw_features &= ~NETIF_F_NTUPLE; } + err = mlx5e_create_inner_ttc_table(priv); + if (err) { + netdev_err(priv->netdev, "Failed to create inner ttc table, err=%d\n", + err); + goto err_destroy_arfs_tables; + } + err = mlx5e_create_ttc_table(priv); if (err) { netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n", err); - goto err_destroy_arfs_tables; + goto err_destroy_inner_ttc_table; } err = mlx5e_create_l2_table(priv); @@ -1183,6 +1441,8 @@ err_destroy_l2_table: mlx5e_destroy_l2_table(priv); err_destroy_ttc_table: mlx5e_destroy_ttc_table(priv); +err_destroy_inner_ttc_table: + mlx5e_destroy_inner_ttc_table(priv); err_destroy_arfs_tables: mlx5e_arfs_destroy_tables(priv); @@ -1194,6 +1454,7 @@ void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv) mlx5e_destroy_vlan_table(priv); mlx5e_destroy_l2_table(priv); mlx5e_destroy_ttc_table(priv); + mlx5e_destroy_inner_ttc_table(priv); mlx5e_arfs_destroy_tables(priv); mlx5e_ethtool_cleanup_steering(priv); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index fdc2b92f020b..111c7523d448 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2349,9 +2349,10 @@ static void mlx5e_build_tir_ctx_lro(struct mlx5e_params *params, void *tirc) void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_params *params, enum mlx5e_traffic_types tt, - void *tirc) + void *tirc, bool inner) { - void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); + void *hfso = inner ? MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner) : + MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); #define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\ MLX5_HASH_FIELD_SEL_DST_IP) @@ -2500,6 +2501,21 @@ free_in: return err; } +static void mlx5e_build_inner_indir_tir_ctx(struct mlx5e_priv *priv, + enum mlx5e_traffic_types tt, + u32 *tirc) +{ + MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn); + + mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc); + + MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); + MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn); + MLX5_SET(tirc, tirc, tunneled_offload_en, 0x1); + + mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, true); +} + static int mlx5e_set_mtu(struct mlx5e_priv *priv, u16 mtu) { struct mlx5_core_dev *mdev = priv->mdev; @@ -2865,7 +2881,7 @@ static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv, MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn); - mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc); + mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false); } static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 rqtn, u32 *tirc) @@ -2884,6 +2900,7 @@ int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv) struct mlx5e_tir *tir; void *tirc; int inlen; + int i = 0; int err; u32 *in; int tt; @@ -2899,16 +2916,36 @@ int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv) tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); mlx5e_build_indir_tir_ctx(priv, tt, tirc); err = mlx5e_create_tir(priv->mdev, tir, in, inlen); - if (err) - goto err_destroy_tirs; + if (err) { + mlx5_core_warn(priv->mdev, "create indirect tirs failed, %d\n", err); + goto err_destroy_inner_tirs; + } } + if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + goto out; + + for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) { + memset(in, 0, inlen); + tir = &priv->inner_indir_tir[i]; + tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); + mlx5e_build_inner_indir_tir_ctx(priv, i, tirc); + err = mlx5e_create_tir(priv->mdev, tir, in, inlen); + if (err) { + mlx5_core_warn(priv->mdev, "create inner indirect tirs failed, %d\n", err); + goto err_destroy_inner_tirs; + } + } + +out: kvfree(in); return 0; -err_destroy_tirs: - mlx5_core_warn(priv->mdev, "create indirect tirs failed, %d\n", err); +err_destroy_inner_tirs: + for (i--; i >= 0; i--) + mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]); + for (tt--; tt >= 0; tt--) mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[tt]); @@ -2962,6 +2999,12 @@ void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv) for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[i]); + + if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + return; + + for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) + mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]); } void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv) @@ -3499,13 +3542,13 @@ static void mlx5e_del_vxlan_port(struct net_device *netdev, mlx5e_vxlan_queue_work(priv, ti->sa_family, be16_to_cpu(ti->port), 0); } -static netdev_features_t mlx5e_vxlan_features_check(struct mlx5e_priv *priv, - struct sk_buff *skb, - netdev_features_t features) +static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv, + struct sk_buff *skb, + netdev_features_t features) { struct udphdr *udph; - u16 proto; - u16 port = 0; + u8 proto; + u16 port; switch (vlan_get_protocol(skb)) { case htons(ETH_P_IP): @@ -3518,14 +3561,17 @@ static netdev_features_t mlx5e_vxlan_features_check(struct mlx5e_priv *priv, goto out; } - if (proto == IPPROTO_UDP) { + switch (proto) { + case IPPROTO_GRE: + return features; + case IPPROTO_UDP: udph = udp_hdr(skb); port = be16_to_cpu(udph->dest); - } - /* Verify if UDP port is being offloaded by HW */ - if (port && mlx5e_vxlan_lookup_port(priv, port)) - return features; + /* Verify if UDP port is being offloaded by HW */ + if (mlx5e_vxlan_lookup_port(priv, port)) + return features; + } out: /* Disable CSUM and GSO if the udp dport is not offloaded by HW */ @@ -3549,7 +3595,7 @@ static netdev_features_t mlx5e_features_check(struct sk_buff *skb, /* Validate if the tunneled packet is being offloaded by HW */ if (skb->encapsulation && (features & NETIF_F_CSUM_MASK || features & NETIF_F_GSO_MASK)) - return mlx5e_vxlan_features_check(priv, skb, features); + return mlx5e_tunnel_features_check(priv, skb, features); return features; } @@ -4014,20 +4060,32 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; - if (mlx5e_vxlan_allowed(mdev)) { - netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_UDP_TUNNEL_CSUM | - NETIF_F_GSO_PARTIAL; + if (mlx5e_vxlan_allowed(mdev) || MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) { + netdev->hw_features |= NETIF_F_GSO_PARTIAL; netdev->hw_enc_features |= NETIF_F_IP_CSUM; netdev->hw_enc_features |= NETIF_F_IPV6_CSUM; netdev->hw_enc_features |= NETIF_F_TSO; netdev->hw_enc_features |= NETIF_F_TSO6; - netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL; - netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM | - NETIF_F_GSO_PARTIAL; + netdev->hw_enc_features |= NETIF_F_GSO_PARTIAL; + } + + if (mlx5e_vxlan_allowed(mdev)) { + netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM; + netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM; netdev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM; } + if (MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) { + netdev->hw_features |= NETIF_F_GSO_GRE | + NETIF_F_GSO_GRE_CSUM; + netdev->hw_enc_features |= NETIF_F_GSO_GRE | + NETIF_F_GSO_GRE_CSUM; + netdev->gso_partial_features |= NETIF_F_GSO_GRE | + NETIF_F_GSO_GRE_CSUM; + } + mlx5_query_port_fcs(mdev, &fcs_supported, &fcs_enabled); if (fcs_supported) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index d731d57a996a..5a7bea688ec8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -83,8 +83,8 @@ #define ETHTOOL_PRIO_NUM_LEVELS 1 #define ETHTOOL_NUM_PRIOS 11 #define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS) -/* Vlan, mac, ttc, aRFS */ -#define KERNEL_NIC_PRIO_NUM_LEVELS 4 +/* Vlan, mac, ttc, inner ttc, aRFS */ +#define KERNEL_NIC_PRIO_NUM_LEVELS 5 #define KERNEL_NIC_NUM_PRIOS 1 /* One more level for tc */ #define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 1) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c index 3ea13148ed05..51e6846da72b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c @@ -76,6 +76,7 @@ static struct devlink_dpipe_header *mlxsw_dpipe_headers[] = { &mlxsw_sp_dpipe_header_metadata, &devlink_dpipe_header_ethernet, &devlink_dpipe_header_ipv4, + &devlink_dpipe_header_ipv6, }; static struct devlink_dpipe_headers mlxsw_sp_dpipe_headers = { @@ -328,9 +329,21 @@ static int mlxsw_sp_dpipe_table_host_matches_dump(struct sk_buff *skb, int type) if (err) return err; - match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; - match.header = &devlink_dpipe_header_ipv4; - match.field_id = DEVLINK_DPIPE_FIELD_IPV4_DST_IP; + switch (type) { + case AF_INET: + match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match.header = &devlink_dpipe_header_ipv4; + match.field_id = DEVLINK_DPIPE_FIELD_IPV4_DST_IP; + break; + case AF_INET6: + match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match.header = &devlink_dpipe_header_ipv6; + match.field_id = DEVLINK_DPIPE_FIELD_IPV6_DST_IP; + break; + default: + WARN_ON(1); + return -EINVAL; + } return devlink_dpipe_match_put(skb, &match); } @@ -342,7 +355,7 @@ mlxsw_sp_dpipe_table_host4_matches_dump(void *priv, struct sk_buff *skb) } static int -mlxsw_sp_dpipe_table_host4_actions_dump(void *priv, struct sk_buff *skb) +mlxsw_sp_dpipe_table_host_actions_dump(void *priv, struct sk_buff *skb) { struct devlink_dpipe_action action = {0}; @@ -373,8 +386,19 @@ mlxsw_sp_dpipe_table_host_match_action_prepare(struct devlink_dpipe_match *match match = &matches[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_DIP]; match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; - match->header = &devlink_dpipe_header_ipv4; - match->field_id = DEVLINK_DPIPE_FIELD_IPV4_DST_IP; + switch (type) { + case AF_INET: + match->header = &devlink_dpipe_header_ipv4; + match->field_id = DEVLINK_DPIPE_FIELD_IPV4_DST_IP; + break; + case AF_INET6: + match->header = &devlink_dpipe_header_ipv6; + match->field_id = DEVLINK_DPIPE_FIELD_IPV6_DST_IP; + break; + default: + WARN_ON(1); + return; + } action->type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY; action->header = &devlink_dpipe_header_ethernet; @@ -411,7 +435,18 @@ mlxsw_sp_dpipe_table_host_entry_prepare(struct devlink_dpipe_entry *entry, match_value = &match_values[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_DIP]; match_value->match = match; - match_value->value_size = sizeof(u32); + switch (type) { + case AF_INET: + match_value->value_size = sizeof(u32); + break; + case AF_INET6: + match_value->value_size = sizeof(struct in6_addr); + break; + default: + WARN_ON(1); + return -EINVAL; + } + match_value->value = kmalloc(match_value->value_size, GFP_KERNEL); if (!match_value->value) return -ENOMEM; @@ -426,13 +461,12 @@ mlxsw_sp_dpipe_table_host_entry_prepare(struct devlink_dpipe_entry *entry, } static void -__mlxsw_sp_dpipe_table_host4_entry_fill(struct devlink_dpipe_entry *entry, - struct mlxsw_sp_rif *rif, - unsigned char *ha, u32 dip) +__mlxsw_sp_dpipe_table_host_entry_fill(struct devlink_dpipe_entry *entry, + struct mlxsw_sp_rif *rif, + unsigned char *ha, void *dip) { struct devlink_dpipe_value *value; u32 *rif_value; - u32 *dip_value; u8 *ha_value; /* Set Match RIF index */ @@ -445,9 +479,7 @@ __mlxsw_sp_dpipe_table_host4_entry_fill(struct devlink_dpipe_entry *entry, /* Set Match DIP */ value = &entry->match_values[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_DIP]; - - dip_value = value->value; - *dip_value = dip; + memcpy(value->value, dip, value->value_size); /* Set Action DMAC */ value = entry->action_values; @@ -465,7 +497,21 @@ mlxsw_sp_dpipe_table_host4_entry_fill(struct devlink_dpipe_entry *entry, ha = mlxsw_sp_neigh_entry_ha(neigh_entry); dip = mlxsw_sp_neigh4_entry_dip(neigh_entry); - __mlxsw_sp_dpipe_table_host4_entry_fill(entry, rif, ha, dip); + __mlxsw_sp_dpipe_table_host_entry_fill(entry, rif, ha, &dip); +} + +static void +mlxsw_sp_dpipe_table_host6_entry_fill(struct devlink_dpipe_entry *entry, + struct mlxsw_sp_neigh_entry *neigh_entry, + struct mlxsw_sp_rif *rif) +{ + struct in6_addr *dip; + unsigned char *ha; + + ha = mlxsw_sp_neigh_entry_ha(neigh_entry); + dip = mlxsw_sp_neigh6_entry_dip(neigh_entry); + + __mlxsw_sp_dpipe_table_host_entry_fill(entry, rif, ha, dip); } static void @@ -477,7 +523,18 @@ mlxsw_sp_dpipe_table_host_entry_fill(struct mlxsw_sp *mlxsw_sp, { int err; - mlxsw_sp_dpipe_table_host4_entry_fill(entry, neigh_entry, rif); + switch (type) { + case AF_INET: + mlxsw_sp_dpipe_table_host4_entry_fill(entry, neigh_entry, rif); + break; + case AF_INET6: + mlxsw_sp_dpipe_table_host6_entry_fill(entry, neigh_entry, rif); + break; + default: + WARN_ON(1); + return; + } + err = mlxsw_sp_neigh_counter_get(mlxsw_sp, neigh_entry, &entry->counter); if (!err) @@ -516,7 +573,13 @@ start_again: rif_neigh_count = 0; mlxsw_sp_rif_neigh_for_each(neigh_entry, rif) { - if (mlxsw_sp_neigh_entry_type(neigh_entry) != type) + int neigh_type = mlxsw_sp_neigh_entry_type(neigh_entry); + + if (neigh_type != type) + continue; + + if (neigh_type == AF_INET6 && + mlxsw_sp_neigh_ipv6_ignore(neigh_entry)) continue; if (rif_neigh_count < rif_neigh_skip) @@ -616,8 +679,15 @@ mlxsw_sp_dpipe_table_host_counters_update(struct mlxsw_sp *mlxsw_sp, if (!rif) continue; mlxsw_sp_rif_neigh_for_each(neigh_entry, rif) { - if (mlxsw_sp_neigh_entry_type(neigh_entry) != type) + int neigh_type = mlxsw_sp_neigh_entry_type(neigh_entry); + + if (neigh_type != type) continue; + + if (neigh_type == AF_INET6 && + mlxsw_sp_neigh_ipv6_ignore(neigh_entry)) + continue; + mlxsw_sp_neigh_entry_counter_update(mlxsw_sp, neigh_entry, enable); @@ -648,8 +718,15 @@ mlxsw_sp_dpipe_table_host_size_get(struct mlxsw_sp *mlxsw_sp, int type) if (!rif) continue; mlxsw_sp_rif_neigh_for_each(neigh_entry, rif) { - if (mlxsw_sp_neigh_entry_type(neigh_entry) != type) + int neigh_type = mlxsw_sp_neigh_entry_type(neigh_entry); + + if (neigh_type != type) continue; + + if (neigh_type == AF_INET6 && + mlxsw_sp_neigh_ipv6_ignore(neigh_entry)) + continue; + size++; } } @@ -667,7 +744,7 @@ static u64 mlxsw_sp_dpipe_table_host4_size_get(void *priv) static struct devlink_dpipe_table_ops mlxsw_sp_host4_ops = { .matches_dump = mlxsw_sp_dpipe_table_host4_matches_dump, - .actions_dump = mlxsw_sp_dpipe_table_host4_actions_dump, + .actions_dump = mlxsw_sp_dpipe_table_host_actions_dump, .entries_dump = mlxsw_sp_dpipe_table_host4_entries_dump, .counters_set_update = mlxsw_sp_dpipe_table_host4_counters_update, .size_get = mlxsw_sp_dpipe_table_host4_size_get, @@ -691,6 +768,64 @@ static void mlxsw_sp_dpipe_host4_table_fini(struct mlxsw_sp *mlxsw_sp) MLXSW_SP_DPIPE_TABLE_NAME_HOST4); } +static int +mlxsw_sp_dpipe_table_host6_matches_dump(void *priv, struct sk_buff *skb) +{ + return mlxsw_sp_dpipe_table_host_matches_dump(skb, AF_INET6); +} + +static int +mlxsw_sp_dpipe_table_host6_entries_dump(void *priv, bool counters_enabled, + struct devlink_dpipe_dump_ctx *dump_ctx) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + return mlxsw_sp_dpipe_table_host_entries_dump(mlxsw_sp, + counters_enabled, + dump_ctx, AF_INET6); +} + +static int mlxsw_sp_dpipe_table_host6_counters_update(void *priv, bool enable) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + mlxsw_sp_dpipe_table_host_counters_update(mlxsw_sp, enable, AF_INET6); + return 0; +} + +static u64 mlxsw_sp_dpipe_table_host6_size_get(void *priv) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + return mlxsw_sp_dpipe_table_host_size_get(mlxsw_sp, AF_INET6); +} + +static struct devlink_dpipe_table_ops mlxsw_sp_host6_ops = { + .matches_dump = mlxsw_sp_dpipe_table_host6_matches_dump, + .actions_dump = mlxsw_sp_dpipe_table_host_actions_dump, + .entries_dump = mlxsw_sp_dpipe_table_host6_entries_dump, + .counters_set_update = mlxsw_sp_dpipe_table_host6_counters_update, + .size_get = mlxsw_sp_dpipe_table_host6_size_get, +}; + +static int mlxsw_sp_dpipe_host6_table_init(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + + return devlink_dpipe_table_register(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_HOST6, + &mlxsw_sp_host6_ops, + mlxsw_sp, false); +} + +static void mlxsw_sp_dpipe_host6_table_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + + devlink_dpipe_table_unregister(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_HOST6); +} + int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp) { struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); @@ -707,8 +842,14 @@ int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp) err = mlxsw_sp_dpipe_host4_table_init(mlxsw_sp); if (err) goto err_host4_table_init; + + err = mlxsw_sp_dpipe_host6_table_init(mlxsw_sp); + if (err) + goto err_host6_table_init; return 0; +err_host6_table_init: + mlxsw_sp_dpipe_host4_table_fini(mlxsw_sp); err_host4_table_init: mlxsw_sp_dpipe_erif_table_fini(mlxsw_sp); err_erif_table_init: @@ -720,6 +861,7 @@ void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp) { struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + mlxsw_sp_dpipe_host6_table_fini(mlxsw_sp); mlxsw_sp_dpipe_host4_table_fini(mlxsw_sp); mlxsw_sp_dpipe_erif_table_fini(mlxsw_sp); devlink_dpipe_headers_unregister(devlink); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h index c56a3d92a2bb..283fde4e6783 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h @@ -55,5 +55,6 @@ static inline void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp) #define MLXSW_SP_DPIPE_TABLE_NAME_ERIF "mlxsw_erif" #define MLXSW_SP_DPIPE_TABLE_NAME_HOST4 "mlxsw_host4" +#define MLXSW_SP_DPIPE_TABLE_NAME_HOST6 "mlxsw_host6" #endif /* _MLXSW_PIPELINE_H_*/ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 377d85cff7fa..0cf68102d113 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -947,6 +947,15 @@ u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry) return ntohl(*((__be32 *) n->primary_key)); } +struct in6_addr * +mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry) +{ + struct neighbour *n; + + n = neigh_entry->key.n; + return (struct in6_addr *) &n->primary_key; +} + int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_neigh_entry *neigh_entry, u64 *p_counter) @@ -999,21 +1008,33 @@ mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp, } static bool -mlxsw_sp_neigh4_counter_should_alloc(struct mlxsw_sp *mlxsw_sp) +mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry) { struct devlink *devlink; + const char *table_name; + + switch (mlxsw_sp_neigh_entry_type(neigh_entry)) { + case AF_INET: + table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4; + break; + case AF_INET6: + table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6; + break; + default: + WARN_ON(1); + return false; + } devlink = priv_to_devlink(mlxsw_sp->core); - return devlink_dpipe_table_counter_enabled(devlink, - MLXSW_SP_DPIPE_TABLE_NAME_HOST4); + return devlink_dpipe_table_counter_enabled(devlink, table_name); } static void mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_neigh_entry *neigh_entry) { - if (mlxsw_sp_neigh_entry_type(neigh_entry) != AF_INET || - !mlxsw_sp_neigh4_counter_should_alloc(mlxsw_sp)) + if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry)) return; if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index)) @@ -1396,8 +1417,10 @@ mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp, mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl); } -static bool mlxsw_sp_neigh_ipv6_ignore(struct neighbour *n) +bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry) { + struct neighbour *n = neigh_entry->key.n; + /* Packets with a link-local destination address are trapped * after LPM lookup and never reach the neighbour table, so * there is no need to program such neighbours to the device. @@ -1420,7 +1443,7 @@ mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry, mlxsw_sp_rauht_op(adding)); } else if (neigh_entry->key.n->tbl->family == AF_INET6) { - if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry->key.n)) + if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry)) return; mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry, mlxsw_sp_rauht_op(adding)); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index fb0f971c670c..87a04afee138 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -65,6 +65,8 @@ int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry); unsigned char * mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry); u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry); +struct in6_addr * +mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry); #define mlxsw_sp_rif_neigh_for_each(neigh_entry, rif) \ for (neigh_entry = mlxsw_sp_rif_neigh_next(rif, NULL); neigh_entry; \ @@ -76,5 +78,6 @@ void mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_neigh_entry *neigh_entry, bool adding); +bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry); #endif /* _MLXSW_ROUTER_H_*/ diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index ae7d09b9c52f..3d5d32e5446c 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -602,7 +602,7 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 reserved_at_1a[0x1]; u8 tunnel_lso_const_out_ip_id[0x1]; u8 reserved_at_1c[0x2]; - u8 tunnel_statless_gre[0x1]; + u8 tunnel_stateless_gre[0x1]; u8 tunnel_stateless_vxlan[0x1]; u8 swp[0x1]; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 29bf06ff157c..35de8312e0b5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1124,8 +1124,8 @@ struct xfrmdev_ops { * This function is used to submit a XDP packet for transmit on a * netdevice. * void (*ndo_xdp_flush)(struct net_device *dev); - * This function is used to inform the driver to flush a paticular - * xpd tx queue. Must be called on same CPU as xdp_xmit. + * This function is used to inform the driver to flush a particular + * xdp tx queue. Must be called on same CPU as xdp_xmit. */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); diff --git a/include/net/devlink.h b/include/net/devlink.h index aaf7178127a2..b9654e133599 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -330,6 +330,7 @@ int devlink_dpipe_match_put(struct sk_buff *skb, struct devlink_dpipe_match *match); extern struct devlink_dpipe_header devlink_dpipe_header_ethernet; extern struct devlink_dpipe_header devlink_dpipe_header_ipv4; +extern struct devlink_dpipe_header devlink_dpipe_header_ipv6; #else diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index c30b634c5f82..d6247a3c40df 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -217,6 +217,7 @@ struct tcf_proto_ops { void **, bool); int (*delete)(struct tcf_proto*, void *, bool*); void (*walk)(struct tcf_proto*, struct tcf_walker *arg); + void (*bind_class)(void *, u32, unsigned long); /* rtnetlink specific */ int (*dump)(struct net*, struct tcf_proto*, void *, diff --git a/include/trace/events/bridge.h b/include/trace/events/bridge.h index 0f1cde00af11..1bee3e7fdf32 100644 --- a/include/trace/events/bridge.h +++ b/include/trace/events/bridge.h @@ -92,6 +92,37 @@ TRACE_EVENT(fdb_delete, __entry->addr[4], __entry->addr[5], __entry->vid) ); +TRACE_EVENT(br_fdb_update, + + TP_PROTO(struct net_bridge *br, struct net_bridge_port *source, + const unsigned char *addr, u16 vid, bool added_by_user), + + TP_ARGS(br, source, addr, vid, added_by_user), + + TP_STRUCT__entry( + __string(br_dev, br->dev->name) + __string(dev, source->dev->name) + __array(unsigned char, addr, ETH_ALEN) + __field(u16, vid) + __field(bool, added_by_user) + ), + + TP_fast_assign( + __assign_str(br_dev, br->dev->name); + __assign_str(dev, source->dev->name); + memcpy(__entry->addr, addr, ETH_ALEN); + __entry->vid = vid; + __entry->added_by_user = added_by_user; + ), + + TP_printk("br_dev %s source %s addr %02x:%02x:%02x:%02x:%02x:%02x vid %u added_by_user %d", + __get_str(br_dev), __get_str(dev), __entry->addr[0], + __entry->addr[1], __entry->addr[2], __entry->addr[3], + __entry->addr[4], __entry->addr[5], __entry->vid, + __entry->added_by_user) +); + + #endif /* _TRACE_BRIDGE_H */ /* This part must be outside protection */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 08c206a863e1..ba848b761cfb 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -758,6 +758,8 @@ struct bpf_sock { __u32 family; __u32 type; __u32 protocol; + __u32 mark; + __u32 priority; }; #define XDP_PACKET_HEADROOM 256 diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 6c172548589d..0cbca96c66b9 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -234,9 +234,14 @@ enum devlink_dpipe_field_ipv4_id { DEVLINK_DPIPE_FIELD_IPV4_DST_IP, }; +enum devlink_dpipe_field_ipv6_id { + DEVLINK_DPIPE_FIELD_IPV6_DST_IP, +}; + enum devlink_dpipe_header_id { DEVLINK_DPIPE_HEADER_ETHERNET, DEVLINK_DPIPE_HEADER_IPV4, + DEVLINK_DPIPE_HEADER_IPV6, }; #endif /* _UAPI_LINUX_DEVLINK_H_ */ diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index be5e1da6d824..4ea5c8bbe286 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -583,8 +583,10 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, fdb->updated = now; if (unlikely(added_by_user)) fdb->added_by_user = 1; - if (unlikely(fdb_modified)) + if (unlikely(fdb_modified)) { + trace_br_fdb_update(br, source, addr, vid, added_by_user); fdb_notify(br, fdb, RTM_NEWNEIGH); + } } } else { spin_lock(&br->hash_lock); @@ -593,6 +595,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, if (fdb) { if (unlikely(added_by_user)) fdb->added_by_user = 1; + trace_br_fdb_update(br, source, addr, vid, added_by_user); fdb_notify(br, fdb, RTM_NEWNEIGH); } } diff --git a/net/core/devlink.c b/net/core/devlink.c index cbc4b0461b0f..7d430c1d9c3e 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -63,6 +63,23 @@ struct devlink_dpipe_header devlink_dpipe_header_ipv4 = { }; EXPORT_SYMBOL(devlink_dpipe_header_ipv4); +static struct devlink_dpipe_field devlink_dpipe_fields_ipv6[] = { + { + .name = "destination ip", + .id = DEVLINK_DPIPE_FIELD_IPV6_DST_IP, + .bitwidth = 128, + }, +}; + +struct devlink_dpipe_header devlink_dpipe_header_ipv6 = { + .name = "ipv6", + .id = DEVLINK_DPIPE_HEADER_IPV6, + .fields = devlink_dpipe_fields_ipv6, + .fields_count = ARRAY_SIZE(devlink_dpipe_fields_ipv6), + .global = true, +}; +EXPORT_SYMBOL(devlink_dpipe_header_ipv6); + EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg); static LIST_HEAD(devlink_list); diff --git a/net/core/filter.c b/net/core/filter.c index c6a37fe0285b..9dad3e7e2e10 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3150,6 +3150,20 @@ bpf_base_func_proto(enum bpf_func_id func_id) } static const struct bpf_func_proto * +sock_filter_func_proto(enum bpf_func_id func_id) +{ + switch (func_id) { + /* inet and inet6 sockets are created in a process + * context so there is always a valid uid/gid + */ + case BPF_FUNC_get_current_uid_gid: + return &bpf_get_current_uid_gid_proto; + default: + return bpf_base_func_proto(func_id); + } +} + +static const struct bpf_func_proto * sk_filter_func_proto(enum bpf_func_id func_id) { switch (func_id) { @@ -3455,6 +3469,10 @@ static bool sock_filter_is_valid_access(int off, int size, switch (off) { case offsetof(struct bpf_sock, bound_dev_if): break; + case offsetof(struct bpf_sock, mark): + break; + case offsetof(struct bpf_sock, priority): + break; default: return false; } @@ -3958,6 +3976,28 @@ static u32 sock_filter_convert_ctx_access(enum bpf_access_type type, offsetof(struct sock, sk_bound_dev_if)); break; + case offsetof(struct bpf_sock, mark): + BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_mark) != 4); + + if (type == BPF_WRITE) + *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, + offsetof(struct sock, sk_mark)); + else + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, + offsetof(struct sock, sk_mark)); + break; + + case offsetof(struct bpf_sock, priority): + BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_priority) != 4); + + if (type == BPF_WRITE) + *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, + offsetof(struct sock, sk_priority)); + else + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, + offsetof(struct sock, sk_priority)); + break; + case offsetof(struct bpf_sock, family): BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_family) != 2); @@ -4207,7 +4247,7 @@ const struct bpf_verifier_ops lwt_xmit_prog_ops = { }; const struct bpf_verifier_ops cg_sock_prog_ops = { - .get_func_proto = bpf_base_func_proto, + .get_func_proto = sock_filter_func_proto, .is_valid_access = sock_filter_is_valid_access, .convert_ctx_access = sock_filter_convert_ctx_access, }; diff --git a/net/core/net-traces.c b/net/core/net-traces.c index 4a0292c97070..1132820c8e62 100644 --- a/net/core/net-traces.c +++ b/net/core/net-traces.c @@ -42,6 +42,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup); EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_add); EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_external_learn_add); EXPORT_TRACEPOINT_SYMBOL_GPL(fdb_delete); +EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_update); #endif EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 47a7b59b355e..5df7857fc0f3 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -16,6 +16,7 @@ #include <linux/random.h> #include <linux/slab.h> #include <linux/xfrm.h> +#include <linux/string.h> #include <net/addrconf.h> #include <net/inet_common.h> @@ -30,6 +31,7 @@ #include <net/ip6_checksum.h> #include <net/xfrm.h> #include <net/secure_seq.h> +#include <net/sock.h> #include "dccp.h" #include "ipv6.h" @@ -597,19 +599,13 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) --ANK (980728) */ if (np->rxopt.all) - /* - * FIXME: Add handling of IPV6_PKTOPTIONS skb. See the comments below - * (wrt ipv6_pktopions) and net/ipv6/tcp_ipv6.c for an example. - */ opt_skb = skb_clone(skb, GFP_ATOMIC); if (sk->sk_state == DCCP_OPEN) { /* Fast path */ if (dccp_rcv_established(sk, skb, dccp_hdr(skb), skb->len)) goto reset; - if (opt_skb) { - /* XXX This is where we would goto ipv6_pktoptions. */ - __kfree_skb(opt_skb); - } + if (opt_skb) + goto ipv6_pktoptions; return 0; } @@ -640,10 +636,8 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (dccp_rcv_state_process(sk, skb, dccp_hdr(skb), skb->len)) goto reset; - if (opt_skb) { - /* XXX This is where we would goto ipv6_pktoptions. */ - __kfree_skb(opt_skb); - } + if (opt_skb) + goto ipv6_pktoptions; return 0; reset: @@ -653,6 +647,35 @@ discard: __kfree_skb(opt_skb); kfree_skb(skb); return 0; + +/* Handling IPV6_PKTOPTIONS skb the similar + * way it's done for net/ipv6/tcp_ipv6.c + */ +ipv6_pktoptions: + if (!((1 << sk->sk_state) & (DCCPF_CLOSED | DCCPF_LISTEN))) { + if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) + np->mcast_oif = inet6_iif(opt_skb); + if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) + np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; + if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) + np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); + if (np->repflow) + np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); + if (ipv6_opt_accepted(sk, opt_skb, + &DCCP_SKB_CB(opt_skb)->header.h6)) { + skb_set_owner_r(opt_skb, sk); + memmove(IP6CB(opt_skb), + &DCCP_SKB_CB(opt_skb)->header.h6, + sizeof(struct inet6_skb_parm)); + opt_skb = xchg(&np->pktoptions, opt_skb); + } else { + __kfree_skb(opt_skb); + opt_skb = xchg(&np->pktoptions, NULL); + } + } + + kfree_skb(opt_skb); + return 0; } static int dccp_v6_rcv(struct sk_buff *skb) diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 73cc7f167a38..d89ebafd2239 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -235,6 +235,14 @@ skip: } } +static void basic_bind_class(void *fh, u32 classid, unsigned long cl) +{ + struct basic_filter *f = fh; + + if (f && f->res.classid == classid) + f->res.class = cl; +} + static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t) { @@ -280,6 +288,7 @@ static struct tcf_proto_ops cls_basic_ops __read_mostly = { .delete = basic_delete, .walk = basic_walk, .dump = basic_dump, + .bind_class = basic_bind_class, .owner = THIS_MODULE, }; diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 6f2dffe30f25..520c5027646a 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -607,6 +607,14 @@ nla_put_failure: return -1; } +static void cls_bpf_bind_class(void *fh, u32 classid, unsigned long cl) +{ + struct cls_bpf_prog *prog = fh; + + if (prog && prog->res.classid == classid) + prog->res.class = cl; +} + static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg) { struct cls_bpf_head *head = rtnl_dereference(tp->root); @@ -635,6 +643,7 @@ static struct tcf_proto_ops cls_bpf_ops __read_mostly = { .delete = cls_bpf_delete, .walk = cls_bpf_walk, .dump = cls_bpf_dump, + .bind_class = cls_bpf_bind_class, }; static int __init cls_bpf_init_mod(void) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 3d041d279b92..1a267e77c6de 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -1351,6 +1351,14 @@ nla_put_failure: return -1; } +static void fl_bind_class(void *fh, u32 classid, unsigned long cl) +{ + struct cls_fl_filter *f = fh; + + if (f && f->res.classid == classid) + f->res.class = cl; +} + static struct tcf_proto_ops cls_fl_ops __read_mostly = { .kind = "flower", .classify = fl_classify, @@ -1361,6 +1369,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = { .delete = fl_delete, .walk = fl_walk, .dump = fl_dump, + .bind_class = fl_bind_class, .owner = THIS_MODULE, }; diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 192255ec50bd..941245ad07fd 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -412,6 +412,14 @@ nla_put_failure: return -1; } +static void fw_bind_class(void *fh, u32 classid, unsigned long cl) +{ + struct fw_filter *f = fh; + + if (f && f->res.classid == classid) + f->res.class = cl; +} + static struct tcf_proto_ops cls_fw_ops __read_mostly = { .kind = "fw", .classify = fw_classify, @@ -422,6 +430,7 @@ static struct tcf_proto_ops cls_fw_ops __read_mostly = { .delete = fw_delete, .walk = fw_walk, .dump = fw_dump, + .bind_class = fw_bind_class, .owner = THIS_MODULE, }; diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index d4dc387f7a56..21cc45caf842 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -251,6 +251,14 @@ nla_put_failure: return -1; } +static void mall_bind_class(void *fh, u32 classid, unsigned long cl) +{ + struct cls_mall_head *head = fh; + + if (head && head->res.classid == classid) + head->res.class = cl; +} + static struct tcf_proto_ops cls_mall_ops __read_mostly = { .kind = "matchall", .classify = mall_classify, @@ -261,6 +269,7 @@ static struct tcf_proto_ops cls_mall_ops __read_mostly = { .delete = mall_delete, .walk = mall_walk, .dump = mall_dump, + .bind_class = mall_bind_class, .owner = THIS_MODULE, }; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 3b70982394ce..9ddde65915d2 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -624,6 +624,14 @@ nla_put_failure: return -1; } +static void route4_bind_class(void *fh, u32 classid, unsigned long cl) +{ + struct route4_filter *f = fh; + + if (f && f->res.classid == classid) + f->res.class = cl; +} + static struct tcf_proto_ops cls_route4_ops __read_mostly = { .kind = "route", .classify = route4_classify, @@ -634,6 +642,7 @@ static struct tcf_proto_ops cls_route4_ops __read_mostly = { .delete = route4_delete, .walk = route4_walk, .dump = route4_dump, + .bind_class = route4_bind_class, .owner = THIS_MODULE, }; diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 26203ff817f3..98c05db85bcb 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -723,6 +723,14 @@ nla_put_failure: return -1; } +static void rsvp_bind_class(void *fh, u32 classid, unsigned long cl) +{ + struct rsvp_filter *f = fh; + + if (f && f->res.classid == classid) + f->res.class = cl; +} + static struct tcf_proto_ops RSVP_OPS __read_mostly = { .kind = RSVP_ID, .classify = rsvp_classify, @@ -733,6 +741,7 @@ static struct tcf_proto_ops RSVP_OPS __read_mostly = { .delete = rsvp_delete, .walk = rsvp_walk, .dump = rsvp_dump, + .bind_class = rsvp_bind_class, .owner = THIS_MODULE, }; diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index fb281b9b2c52..14a7e08b2fa9 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -606,6 +606,14 @@ nla_put_failure: return -1; } +static void tcindex_bind_class(void *fh, u32 classid, unsigned long cl) +{ + struct tcindex_filter_result *r = fh; + + if (r && r->res.classid == classid) + r->res.class = cl; +} + static struct tcf_proto_ops cls_tcindex_ops __read_mostly = { .kind = "tcindex", .classify = tcindex_classify, @@ -616,6 +624,7 @@ static struct tcf_proto_ops cls_tcindex_ops __read_mostly = { .delete = tcindex_delete, .walk = tcindex_walk, .dump = tcindex_dump, + .bind_class = tcindex_bind_class, .owner = THIS_MODULE, }; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 99ea4c74dd5b..10b8d851fc6b 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -1112,6 +1112,14 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg) } } +static void u32_bind_class(void *fh, u32 classid, unsigned long cl) +{ + struct tc_u_knode *n = fh; + + if (n && n->res.classid == classid) + n->res.class = cl; +} + static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t) { @@ -1242,6 +1250,7 @@ static struct tcf_proto_ops cls_u32_ops __read_mostly = { .delete = u32_delete, .walk = u32_walk, .dump = u32_dump, + .bind_class = u32_bind_class, .owner = THIS_MODULE, }; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index e7f8e4bfd4ec..929b024f41ba 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -35,6 +35,7 @@ #include <net/sock.h> #include <net/netlink.h> #include <net/pkt_sched.h> +#include <net/pkt_cls.h> /* @@ -1648,6 +1649,64 @@ static int tclass_del_notify(struct net *net, n->nlmsg_flags & NLM_F_ECHO); } +#ifdef CONFIG_NET_CLS + +struct tcf_bind_args { + struct tcf_walker w; + u32 classid; + unsigned long cl; +}; + +static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg) +{ + struct tcf_bind_args *a = (void *)arg; + + if (tp->ops->bind_class) { + tcf_tree_lock(tp); + tp->ops->bind_class(n, a->classid, a->cl); + tcf_tree_unlock(tp); + } + return 0; +} + +static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, + unsigned long new_cl) +{ + const struct Qdisc_class_ops *cops = q->ops->cl_ops; + struct tcf_block *block; + struct tcf_chain *chain; + unsigned long cl; + + cl = cops->find(q, portid); + if (!cl) + return; + block = cops->tcf_block(q, cl); + if (!block) + return; + list_for_each_entry(chain, &block->chain_list, list) { + struct tcf_proto *tp; + + for (tp = rtnl_dereference(chain->filter_chain); + tp; tp = rtnl_dereference(tp->next)) { + struct tcf_bind_args arg = {}; + + arg.w.fn = tcf_node_bind; + arg.classid = clid; + arg.cl = new_cl; + tp->ops->walk(tp, &arg.w); + } + } +} + +#else + +static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, + unsigned long new_cl) +{ +} + +#endif + static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack) { @@ -1753,6 +1812,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, break; case RTM_DELTCLASS: err = tclass_del_notify(net, cops, skb, n, q, cl); + /* Unbind the class with flilters with 0 */ + tc_bind_tclass(q, portid, clid, 0); goto out; case RTM_GETTCLASS: err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS); @@ -1767,9 +1828,12 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, err = -EOPNOTSUPP; if (cops->change) err = cops->change(q, clid, portid, tca, &new_cl); - if (err == 0) + if (err == 0) { tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS); - + /* We just create a new class, need to do reverse binding. */ + if (cl != new_cl) + tc_bind_tclass(q, portid, clid, new_cl); + } out: return err; } diff --git a/samples/bpf/sock_flags_kern.c b/samples/bpf/sock_flags_kern.c index 533dd11a6baa..05dcdf8a4baa 100644 --- a/samples/bpf/sock_flags_kern.c +++ b/samples/bpf/sock_flags_kern.c @@ -9,8 +9,13 @@ SEC("cgroup/sock1") int bpf_prog1(struct bpf_sock *sk) { char fmt[] = "socket: family %d type %d protocol %d\n"; + char fmt2[] = "socket: uid %u gid %u\n"; + __u64 gid_uid = bpf_get_current_uid_gid(); + __u32 uid = gid_uid & 0xffffffff; + __u32 gid = gid_uid >> 32; bpf_trace_printk(fmt, sizeof(fmt), sk->family, sk->type, sk->protocol); + bpf_trace_printk(fmt2, sizeof(fmt2), uid, gid); /* block PF_INET6, SOCK_RAW, IPPROTO_ICMPV6 sockets * ie., make ping6 fail diff --git a/samples/bpf/test_cgrp2_sock.c b/samples/bpf/test_cgrp2_sock.c index c3cfb23e23b5..e79594dd629b 100644 --- a/samples/bpf/test_cgrp2_sock.c +++ b/samples/bpf/test_cgrp2_sock.c @@ -19,68 +19,271 @@ #include <errno.h> #include <fcntl.h> #include <net/if.h> +#include <inttypes.h> #include <linux/bpf.h> #include "libbpf.h" char bpf_log_buf[BPF_LOG_BUF_SIZE]; -static int prog_load(int idx) +static int prog_load(__u32 idx, __u32 mark, __u32 prio) { - struct bpf_insn prog[] = { + /* save pointer to context */ + struct bpf_insn prog_start[] = { BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + }; + struct bpf_insn prog_end[] = { + BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */ + BPF_EXIT_INSN(), + }; + + /* set sk_bound_dev_if on socket */ + struct bpf_insn prog_dev[] = { BPF_MOV64_IMM(BPF_REG_3, idx), BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, bound_dev_if)), BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, bound_dev_if)), - BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */ - BPF_EXIT_INSN(), }; - size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); - return bpf_load_program(BPF_PROG_TYPE_CGROUP_SOCK, prog, insns_cnt, + /* set mark on socket */ + struct bpf_insn prog_mark[] = { + /* get uid of process */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_current_uid_gid), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffffffff), + + /* if uid is 0, use given mark, else use the uid as the mark */ + BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_MOV64_IMM(BPF_REG_3, mark), + + /* set the mark on the new socket */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, mark)), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, mark)), + }; + + /* set priority on socket */ + struct bpf_insn prog_prio[] = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_3, prio), + BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, priority)), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, priority)), + }; + + struct bpf_insn *prog; + size_t insns_cnt; + void *p; + int ret; + + insns_cnt = sizeof(prog_start) + sizeof(prog_end); + if (idx) + insns_cnt += sizeof(prog_dev); + + if (mark) + insns_cnt += sizeof(prog_mark); + + if (prio) + insns_cnt += sizeof(prog_prio); + + p = prog = malloc(insns_cnt); + if (!prog) { + fprintf(stderr, "Failed to allocate memory for instructions\n"); + return EXIT_FAILURE; + } + + memcpy(p, prog_start, sizeof(prog_start)); + p += sizeof(prog_start); + + if (idx) { + memcpy(p, prog_dev, sizeof(prog_dev)); + p += sizeof(prog_dev); + } + + if (mark) { + memcpy(p, prog_mark, sizeof(prog_mark)); + p += sizeof(prog_mark); + } + + if (prio) { + memcpy(p, prog_prio, sizeof(prog_prio)); + p += sizeof(prog_prio); + } + + memcpy(p, prog_end, sizeof(prog_end)); + p += sizeof(prog_end); + + insns_cnt /= sizeof(struct bpf_insn); + + ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SOCK, prog, insns_cnt, "GPL", 0, bpf_log_buf, BPF_LOG_BUF_SIZE); + + free(prog); + + return ret; +} + +static int get_bind_to_device(int sd, char *name, size_t len) +{ + socklen_t optlen = len; + int rc; + + name[0] = '\0'; + rc = getsockopt(sd, SOL_SOCKET, SO_BINDTODEVICE, name, &optlen); + if (rc < 0) + perror("setsockopt(SO_BINDTODEVICE)"); + + return rc; +} + +static unsigned int get_somark(int sd) +{ + unsigned int mark = 0; + socklen_t optlen = sizeof(mark); + int rc; + + rc = getsockopt(sd, SOL_SOCKET, SO_MARK, &mark, &optlen); + if (rc < 0) + perror("getsockopt(SO_MARK)"); + + return mark; +} + +static unsigned int get_priority(int sd) +{ + unsigned int prio = 0; + socklen_t optlen = sizeof(prio); + int rc; + + rc = getsockopt(sd, SOL_SOCKET, SO_PRIORITY, &prio, &optlen); + if (rc < 0) + perror("getsockopt(SO_PRIORITY)"); + + return prio; +} + +static int show_sockopts(int family) +{ + unsigned int mark, prio; + char name[16]; + int sd; + + sd = socket(family, SOCK_DGRAM, 17); + if (sd < 0) { + perror("socket"); + return 1; + } + + if (get_bind_to_device(sd, name, sizeof(name)) < 0) + return 1; + + mark = get_somark(sd); + prio = get_priority(sd); + + close(sd); + + printf("sd %d: dev %s, mark %u, priority %u\n", sd, name, mark, prio); + + return 0; } static int usage(const char *argv0) { - printf("Usage: %s cg-path device-index\n", argv0); + printf("Usage:\n"); + printf(" Attach a program\n"); + printf(" %s -b bind-to-dev -m mark -p prio cg-path\n", argv0); + printf("\n"); + printf(" Detach a program\n"); + printf(" %s -d cg-path\n", argv0); + printf("\n"); + printf(" Show inherited socket settings (mark, priority, and device)\n"); + printf(" %s [-6]\n", argv0); return EXIT_FAILURE; } int main(int argc, char **argv) { + __u32 idx = 0, mark = 0, prio = 0; + const char *cgrp_path = NULL; int cg_fd, prog_fd, ret; - unsigned int idx; + int family = PF_INET; + int do_attach = 1; + int rc; + + while ((rc = getopt(argc, argv, "db:m:p:6")) != -1) { + switch (rc) { + case 'd': + do_attach = 0; + break; + case 'b': + idx = if_nametoindex(optarg); + if (!idx) { + idx = strtoumax(optarg, NULL, 0); + if (!idx) { + printf("Invalid device name\n"); + return EXIT_FAILURE; + } + } + break; + case 'm': + mark = strtoumax(optarg, NULL, 0); + break; + case 'p': + prio = strtoumax(optarg, NULL, 0); + break; + case '6': + family = PF_INET6; + break; + default: + return usage(argv[0]); + } + } - if (argc < 2) - return usage(argv[0]); + if (optind == argc) + return show_sockopts(family); - idx = if_nametoindex(argv[2]); - if (!idx) { - printf("Invalid device name\n"); + cgrp_path = argv[optind]; + if (!cgrp_path) { + fprintf(stderr, "cgroup path not given\n"); return EXIT_FAILURE; } - cg_fd = open(argv[1], O_DIRECTORY | O_RDONLY); - if (cg_fd < 0) { - printf("Failed to open cgroup path: '%s'\n", strerror(errno)); + if (do_attach && !idx && !mark && !prio) { + fprintf(stderr, + "One of device, mark or priority must be given\n"); return EXIT_FAILURE; } - prog_fd = prog_load(idx); - printf("Output from kernel verifier:\n%s\n-------\n", bpf_log_buf); - - if (prog_fd < 0) { - printf("Failed to load prog: '%s'\n", strerror(errno)); + cg_fd = open(cgrp_path, O_DIRECTORY | O_RDONLY); + if (cg_fd < 0) { + printf("Failed to open cgroup path: '%s'\n", strerror(errno)); return EXIT_FAILURE; } - ret = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE, 0); - if (ret < 0) { - printf("Failed to attach prog to cgroup: '%s'\n", - strerror(errno)); - return EXIT_FAILURE; + if (do_attach) { + prog_fd = prog_load(idx, mark, prio); + if (prog_fd < 0) { + printf("Failed to load prog: '%s'\n", strerror(errno)); + printf("Output from kernel verifier:\n%s\n-------\n", + bpf_log_buf); + return EXIT_FAILURE; + } + + ret = bpf_prog_attach(prog_fd, cg_fd, + BPF_CGROUP_INET_SOCK_CREATE, 0); + if (ret < 0) { + printf("Failed to attach prog to cgroup: '%s'\n", + strerror(errno)); + return EXIT_FAILURE; + } + } else { + ret = bpf_prog_detach(cg_fd, BPF_CGROUP_INET_SOCK_CREATE); + if (ret < 0) { + printf("Failed to detach prog from cgroup: '%s'\n", + strerror(errno)); + return EXIT_FAILURE; + } } + close(cg_fd); return EXIT_SUCCESS; } diff --git a/samples/bpf/test_cgrp2_sock.sh b/samples/bpf/test_cgrp2_sock.sh index 925fd467c7cc..a81f38eef417 100755 --- a/samples/bpf/test_cgrp2_sock.sh +++ b/samples/bpf/test_cgrp2_sock.sh @@ -1,47 +1,133 @@ -#!/bin/bash - -function config_device { - ip netns add at_ns0 - ip link add veth0 type veth peer name veth0b - ip link set veth0b up - ip link set veth0 netns at_ns0 - ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0 - ip netns exec at_ns0 ip addr add 2401:db00::1/64 dev veth0 nodad - ip netns exec at_ns0 ip link set dev veth0 up - ip link add foo type vrf table 1234 - ip link set foo up - ip addr add 172.16.1.101/24 dev veth0b - ip addr add 2401:db00::2/64 dev veth0b nodad - ip link set veth0b master foo +#!/bin/sh + +# Test various socket options that can be set by attaching programs to cgroups. + +CGRP_MNT="/tmp/cgroupv2-test_cgrp2_sock" + +################################################################################ +# +print_result() +{ + local rc=$1 + local status=" OK " + + [ $rc -ne 0 ] && status="FAIL" + + printf "%-50s [%4s]\n" "$2" "$status" } -function attach_bpf { - rm -rf /tmp/cgroupv2 - mkdir -p /tmp/cgroupv2 - mount -t cgroup2 none /tmp/cgroupv2 - mkdir -p /tmp/cgroupv2/foo - test_cgrp2_sock /tmp/cgroupv2/foo foo - echo $$ >> /tmp/cgroupv2/foo/cgroup.procs +check_sock() +{ + out=$(test_cgrp2_sock) + echo $out | grep -q "$1" + if [ $? -ne 0 ]; then + print_result 1 "IPv4: $2" + echo " expected: $1" + echo " have: $out" + rc=1 + else + print_result 0 "IPv4: $2" + fi } -function cleanup { - set +ex - ip netns delete at_ns0 - ip link del veth0 - ip link del foo - umount /tmp/cgroupv2 - rm -rf /tmp/cgroupv2 - set -ex +check_sock6() +{ + out=$(test_cgrp2_sock -6) + echo $out | grep -q "$1" + if [ $? -ne 0 ]; then + print_result 1 "IPv6: $2" + echo " expected: $1" + echo " have: $out" + rc=1 + else + print_result 0 "IPv6: $2" + fi } -function do_test { - ping -c1 -w1 172.16.1.100 - ping6 -c1 -w1 2401:db00::1 +################################################################################ +# + +cleanup() +{ + echo $$ >> ${CGRP_MNT}/cgroup.procs + rmdir ${CGRP_MNT}/sockopts } +cleanup_and_exit() +{ + local rc=$1 + local msg="$2" + + [ -n "$msg" ] && echo "ERROR: $msg" + + ip li del cgrp2_sock + umount ${CGRP_MNT} + + exit $rc +} + + +################################################################################ +# main + +rc=0 + +ip li add cgrp2_sock type dummy 2>/dev/null + +set -e +mkdir -p ${CGRP_MNT} +mount -t cgroup2 none ${CGRP_MNT} +set +e + + +# make sure we have a known start point cleanup 2>/dev/null -config_device -attach_bpf -do_test -cleanup -echo "*** PASS ***" + +mkdir -p ${CGRP_MNT}/sockopts +[ $? -ne 0 ] && cleanup_and_exit 1 "Failed to create cgroup hierarchy" + + +# set pid into cgroup +echo $$ > ${CGRP_MNT}/sockopts/cgroup.procs + +# no bpf program attached, so socket should show no settings +check_sock "dev , mark 0, priority 0" "No programs attached" +check_sock6 "dev , mark 0, priority 0" "No programs attached" + +# verify device is set +# +test_cgrp2_sock -b cgrp2_sock ${CGRP_MNT}/sockopts +if [ $? -ne 0 ]; then + cleanup_and_exit 1 "Failed to install program to set device" +fi +check_sock "dev cgrp2_sock, mark 0, priority 0" "Device set" +check_sock6 "dev cgrp2_sock, mark 0, priority 0" "Device set" + +# verify mark is set +# +test_cgrp2_sock -m 666 ${CGRP_MNT}/sockopts +if [ $? -ne 0 ]; then + cleanup_and_exit 1 "Failed to install program to set mark" +fi +check_sock "dev , mark 666, priority 0" "Mark set" +check_sock6 "dev , mark 666, priority 0" "Mark set" + +# verify priority is set +# +test_cgrp2_sock -p 123 ${CGRP_MNT}/sockopts +if [ $? -ne 0 ]; then + cleanup_and_exit 1 "Failed to install program to set priority" +fi +check_sock "dev , mark 0, priority 123" "Priority set" +check_sock6 "dev , mark 0, priority 123" "Priority set" + +# all 3 at once +# +test_cgrp2_sock -b cgrp2_sock -m 666 -p 123 ${CGRP_MNT}/sockopts +if [ $? -ne 0 ]; then + cleanup_and_exit 1 "Failed to install program to set device, mark and priority" +fi +check_sock "dev cgrp2_sock, mark 666, priority 123" "Priority set" +check_sock6 "dev cgrp2_sock, mark 666, priority 123" "Priority set" + +cleanup_and_exit $rc diff --git a/samples/bpf/xdp_redirect_kern.c b/samples/bpf/xdp_redirect_kern.c index 1c90288d0203..8abb151e385f 100644 --- a/samples/bpf/xdp_redirect_kern.c +++ b/samples/bpf/xdp_redirect_kern.c @@ -82,7 +82,7 @@ int xdp_redirect_prog(struct xdp_md *ctx) /* Redirect require an XDP bpf_prog loaded on the TX device */ SEC("xdp_redirect_dummy") -int xdp_redirect_dummy(struct xdp_md *ctx) +int xdp_redirect_dummy_prog(struct xdp_md *ctx) { return XDP_PASS; } diff --git a/samples/bpf/xdp_redirect_map_kern.c b/samples/bpf/xdp_redirect_map_kern.c index 79795d41ad0d..740a529ba84f 100644 --- a/samples/bpf/xdp_redirect_map_kern.c +++ b/samples/bpf/xdp_redirect_map_kern.c @@ -84,7 +84,7 @@ int xdp_redirect_map_prog(struct xdp_md *ctx) /* Redirect require an XDP bpf_prog loaded on the TX device */ SEC("xdp_redirect_dummy") -int xdp_redirect_dummy(struct xdp_md *ctx) +int xdp_redirect_dummy_prog(struct xdp_md *ctx) { return XDP_PASS; } |