summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
diff options
context:
space:
mode:
authorVlad Buslov <vladbu@nvidia.com>2021-01-24 22:07:04 +0200
committerSaeed Mahameed <saeedm@nvidia.com>2021-02-05 20:53:37 -0800
commit0d9f96471493d5483d116c137693f03604332a04 (patch)
tree9b93b6d1d88b3803512a534146dec26e41d790d4 /drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
parent8e404fefa58b6138531e3d4b5647ee79f75ae9a8 (diff)
net/mlx5e: Extract tc tunnel encap/decap code to dedicated file
Following patches in series extend the extracted code with routing infrastructure. To improve code modularity created a dedicated tc_tun_encap.c source file and move encap/decap related code to the new file. Export code that is used by both regular TC code and encap/decap code into tc_priv.h (new header intended to be used only by TC module). Rename some exported functions by adding "mlx5e_" prefix to their names. Signed-off-by: Vlad Buslov <vladbu@nvidia.com> Signed-off-by: Dmytro Linkin <dlinkin@nvidia.com> Reviewed-by: Roi Dayan <roid@nvidia.com> Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c734
1 files changed, 734 insertions, 0 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
new file mode 100644
index 000000000000..63652911d56e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
@@ -0,0 +1,734 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#include "tc_tun_encap.h"
+#include "en_tc.h"
+#include "tc_tun.h"
+#include "rep/tc.h"
+#include "diag/en_tc_tracepoint.h"
+
+int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_spec *spec)
+{
+ struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
+ struct mlx5_rx_tun_attr *tun_attr;
+ void *daddr, *saddr;
+ u8 ip_version;
+
+ tun_attr = kvzalloc(sizeof(*tun_attr), GFP_KERNEL);
+ if (!tun_attr)
+ return -ENOMEM;
+
+ esw_attr->rx_tun_attr = tun_attr;
+ ip_version = mlx5e_tc_get_ip_version(spec, true);
+
+ if (ip_version == 4) {
+ daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+ saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
+ tun_attr->dst_ip.v4 = *(__be32 *)daddr;
+ tun_attr->src_ip.v4 = *(__be32 *)saddr;
+ }
+#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
+ else if (ip_version == 6) {
+ int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6);
+
+ daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
+ saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6);
+ memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size);
+ memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size);
+ }
+#endif
+ return 0;
+}
+
+void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct list_head *flow_list)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_attr *attr;
+ struct mlx5_flow_spec *spec;
+ struct mlx5e_tc_flow *flow;
+ int err;
+
+ e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
+ e->reformat_type,
+ e->encap_size, e->encap_header,
+ MLX5_FLOW_NAMESPACE_FDB);
+ if (IS_ERR(e->pkt_reformat)) {
+ mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
+ PTR_ERR(e->pkt_reformat));
+ return;
+ }
+ e->flags |= MLX5_ENCAP_ENTRY_VALID;
+ mlx5e_rep_queue_neigh_stats_work(priv);
+
+ list_for_each_entry(flow, flow_list, tmp_list) {
+ bool all_flow_encaps_valid = true;
+ int i;
+
+ if (!mlx5e_is_offloaded_flow(flow))
+ continue;
+ attr = flow->attr;
+ esw_attr = attr->esw_attr;
+ spec = &attr->parse_attr->spec;
+
+ esw_attr->dests[flow->tmp_efi_index].pkt_reformat = e->pkt_reformat;
+ esw_attr->dests[flow->tmp_efi_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
+ /* Flow can be associated with multiple encap entries.
+ * Before offloading the flow verify that all of them have
+ * a valid neighbour.
+ */
+ for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
+ if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
+ continue;
+ if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
+ all_flow_encaps_valid = false;
+ break;
+ }
+ }
+ /* Do not offload flows with unresolved neighbors */
+ if (!all_flow_encaps_valid)
+ continue;
+ /* update from slow path rule to encap rule */
+ rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
+ err);
+ continue;
+ }
+
+ mlx5e_tc_unoffload_from_slow_path(esw, flow);
+ flow->rule[0] = rule;
+ /* was unset when slow path rule removed */
+ flow_flag_set(flow, OFFLOADED);
+ }
+}
+
+void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct list_head *flow_list)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_attr *attr;
+ struct mlx5_flow_spec *spec;
+ struct mlx5e_tc_flow *flow;
+ int err;
+
+ list_for_each_entry(flow, flow_list, tmp_list) {
+ if (!mlx5e_is_offloaded_flow(flow))
+ continue;
+ attr = flow->attr;
+ esw_attr = attr->esw_attr;
+ spec = &attr->parse_attr->spec;
+
+ /* update from encap rule to slow path rule */
+ rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
+ /* mark the flow's encap dest as non-valid */
+ esw_attr->dests[flow->tmp_efi_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
+
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
+ err);
+ continue;
+ }
+
+ mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
+ flow->rule[0] = rule;
+ /* was unset when fast path rule removed */
+ flow_flag_set(flow, OFFLOADED);
+ }
+
+ /* we know that the encap is valid */
+ e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
+ mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
+}
+
+/* Takes reference to all flows attached to encap and adds the flows to
+ * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
+ */
+void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
+{
+ struct encap_flow_item *efi;
+ struct mlx5e_tc_flow *flow;
+
+ list_for_each_entry(efi, &e->flows, list) {
+ flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
+ if (IS_ERR(mlx5e_flow_get(flow)))
+ continue;
+ wait_for_completion(&flow->init_done);
+
+ flow->tmp_efi_index = efi->index;
+ list_add(&flow->tmp_list, flow_list);
+ }
+}
+
+static struct mlx5e_encap_entry *
+mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
+ struct mlx5e_encap_entry *e)
+{
+ struct mlx5e_encap_entry *next = NULL;
+
+retry:
+ rcu_read_lock();
+
+ /* find encap with non-zero reference counter value */
+ for (next = e ?
+ list_next_or_null_rcu(&nhe->encap_list,
+ &e->encap_list,
+ struct mlx5e_encap_entry,
+ encap_list) :
+ list_first_or_null_rcu(&nhe->encap_list,
+ struct mlx5e_encap_entry,
+ encap_list);
+ next;
+ next = list_next_or_null_rcu(&nhe->encap_list,
+ &next->encap_list,
+ struct mlx5e_encap_entry,
+ encap_list))
+ if (mlx5e_encap_take(next))
+ break;
+
+ rcu_read_unlock();
+
+ /* release starting encap */
+ if (e)
+ mlx5e_encap_put(netdev_priv(e->out_dev), e);
+ if (!next)
+ return next;
+
+ /* wait for encap to be fully initialized */
+ wait_for_completion(&next->res_ready);
+ /* continue searching if encap entry is not in valid state after completion */
+ if (!(next->flags & MLX5_ENCAP_ENTRY_VALID)) {
+ e = next;
+ goto retry;
+ }
+
+ return next;
+}
+
+void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
+{
+ struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
+ struct mlx5e_encap_entry *e = NULL;
+ struct mlx5e_tc_flow *flow;
+ struct mlx5_fc *counter;
+ struct neigh_table *tbl;
+ bool neigh_used = false;
+ struct neighbour *n;
+ u64 lastuse;
+
+ if (m_neigh->family == AF_INET)
+ tbl = &arp_tbl;
+#if IS_ENABLED(CONFIG_IPV6)
+ else if (m_neigh->family == AF_INET6)
+ tbl = ipv6_stub->nd_tbl;
+#endif
+ else
+ return;
+
+ /* mlx5e_get_next_valid_encap() releases previous encap before returning
+ * next one.
+ */
+ while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
+ struct mlx5e_priv *priv = netdev_priv(e->out_dev);
+ struct encap_flow_item *efi, *tmp;
+ struct mlx5_eswitch *esw;
+ LIST_HEAD(flow_list);
+
+ esw = priv->mdev->priv.eswitch;
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ list_for_each_entry_safe(efi, tmp, &e->flows, list) {
+ flow = container_of(efi, struct mlx5e_tc_flow,
+ encaps[efi->index]);
+ if (IS_ERR(mlx5e_flow_get(flow)))
+ continue;
+ list_add(&flow->tmp_list, &flow_list);
+
+ if (mlx5e_is_offloaded_flow(flow)) {
+ counter = mlx5e_tc_get_counter(flow);
+ lastuse = mlx5_fc_query_lastuse(counter);
+ if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
+ neigh_used = true;
+ break;
+ }
+ }
+ }
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ mlx5e_put_encap_flow_list(priv, &flow_list);
+ if (neigh_used) {
+ /* release current encap before breaking the loop */
+ mlx5e_encap_put(priv, e);
+ break;
+ }
+ }
+
+ trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
+
+ if (neigh_used) {
+ nhe->reported_lastuse = jiffies;
+
+ /* find the relevant neigh according to the cached device and
+ * dst ip pair
+ */
+ n = neigh_lookup(tbl, &m_neigh->dst_ip, m_neigh->dev);
+ if (!n)
+ return;
+
+ neigh_event_send(n, NULL);
+ neigh_release(n);
+ }
+}
+
+static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
+{
+ WARN_ON(!list_empty(&e->flows));
+
+ if (e->compl_result > 0) {
+ mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
+
+ if (e->flags & MLX5_ENCAP_ENTRY_VALID)
+ mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
+ }
+
+ kfree(e->tun_info);
+ kfree(e->encap_header);
+ kfree_rcu(e, rcu);
+}
+
+static void mlx5e_decap_dealloc(struct mlx5e_priv *priv,
+ struct mlx5e_decap_entry *d)
+{
+ WARN_ON(!list_empty(&d->flows));
+
+ if (!d->compl_result)
+ mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat);
+
+ kfree_rcu(d, rcu);
+}
+
+void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
+ return;
+ hash_del_rcu(&e->encap_hlist);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ mlx5e_encap_dealloc(priv, e);
+}
+
+static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock))
+ return;
+ hash_del_rcu(&d->hlist);
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+
+ mlx5e_decap_dealloc(priv, d);
+}
+
+void mlx5e_detach_encap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow, int out_index)
+{
+ struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ /* flow wasn't fully initialized */
+ if (!e)
+ return;
+
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ list_del(&flow->encaps[out_index].list);
+ flow->encaps[out_index].e = NULL;
+ if (!refcount_dec_and_test(&e->refcnt)) {
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ return;
+ }
+ hash_del_rcu(&e->encap_hlist);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ mlx5e_encap_dealloc(priv, e);
+}
+
+void mlx5e_detach_decap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_decap_entry *d = flow->decap_reformat;
+
+ if (!d)
+ return;
+
+ mutex_lock(&esw->offloads.decap_tbl_lock);
+ list_del(&flow->l3_to_l2_reformat);
+ flow->decap_reformat = NULL;
+
+ if (!refcount_dec_and_test(&d->refcnt)) {
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+ return;
+ }
+ hash_del_rcu(&d->hlist);
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+
+ mlx5e_decap_dealloc(priv, d);
+}
+
+struct encap_key {
+ const struct ip_tunnel_key *ip_tun_key;
+ struct mlx5e_tc_tunnel *tc_tunnel;
+};
+
+static int cmp_encap_info(struct encap_key *a,
+ struct encap_key *b)
+{
+ return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) ||
+ a->tc_tunnel->tunnel_type != b->tc_tunnel->tunnel_type;
+}
+
+static int cmp_decap_info(struct mlx5e_decap_key *a,
+ struct mlx5e_decap_key *b)
+{
+ return memcmp(&a->key, &b->key, sizeof(b->key));
+}
+
+static int hash_encap_info(struct encap_key *key)
+{
+ return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
+ key->tc_tunnel->tunnel_type);
+}
+
+static int hash_decap_info(struct mlx5e_decap_key *key)
+{
+ return jhash(&key->key, sizeof(key->key), 0);
+}
+
+bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
+{
+ return refcount_inc_not_zero(&e->refcnt);
+}
+
+static bool mlx5e_decap_take(struct mlx5e_decap_entry *e)
+{
+ return refcount_inc_not_zero(&e->refcnt);
+}
+
+static struct mlx5e_encap_entry *
+mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key,
+ uintptr_t hash_key)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_encap_entry *e;
+ struct encap_key e_key;
+
+ hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
+ encap_hlist, hash_key) {
+ e_key.ip_tun_key = &e->tun_info->key;
+ e_key.tc_tunnel = e->tunnel;
+ if (!cmp_encap_info(&e_key, key) &&
+ mlx5e_encap_take(e))
+ return e;
+ }
+
+ return NULL;
+}
+
+static struct mlx5e_decap_entry *
+mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key,
+ uintptr_t hash_key)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_decap_key r_key;
+ struct mlx5e_decap_entry *e;
+
+ hash_for_each_possible_rcu(esw->offloads.decap_tbl, e,
+ hlist, hash_key) {
+ r_key = e->key;
+ if (!cmp_decap_info(&r_key, key) &&
+ mlx5e_decap_take(e))
+ return e;
+ }
+ return NULL;
+}
+
+struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info)
+{
+ size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
+
+ return kmemdup(tun_info, tun_size, GFP_KERNEL);
+}
+
+static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ int out_index,
+ struct mlx5e_encap_entry *e,
+ struct netlink_ext_ack *extack)
+{
+ int i;
+
+ for (i = 0; i < out_index; i++) {
+ if (flow->encaps[i].e != e)
+ continue;
+ NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
+ netdev_err(priv->netdev, "can't duplicate encap action\n");
+ return true;
+ }
+
+ return false;
+}
+
+static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw,
+ struct mlx5_flow_attr *attr,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
+ struct net_device *out_dev,
+ int route_dev_ifindex,
+ int out_index)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct net_device *route_dev;
+ u16 vport_num;
+ int err = 0;
+ u32 data;
+
+ route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
+
+ if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
+ !mlx5e_tc_is_vf_tunnel(out_dev, route_dev))
+ goto out;
+
+ err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
+ if (err)
+ goto out;
+
+ attr->dest_chain = 0;
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
+ data = mlx5_eswitch_get_vport_metadata_for_set(esw_attr->in_mdev->priv.eswitch,
+ vport_num);
+ err = mlx5e_tc_match_to_reg_set(esw->dev, mod_hdr_acts,
+ MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG, data);
+ if (err)
+ goto out;
+
+out:
+ if (route_dev)
+ dev_put(route_dev);
+ return err;
+}
+
+int mlx5e_attach_encap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct net_device *mirred_dev,
+ int out_index,
+ struct netlink_ext_ack *extack,
+ struct net_device **encap_dev,
+ bool *encap_valid)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5_flow_attr *attr = flow->attr;
+ const struct ip_tunnel_info *tun_info;
+ struct encap_key key;
+ struct mlx5e_encap_entry *e;
+ unsigned short family;
+ uintptr_t hash_key;
+ int err = 0;
+
+ parse_attr = attr->parse_attr;
+ tun_info = parse_attr->tun_info[out_index];
+ family = ip_tunnel_info_af(tun_info);
+ key.ip_tun_key = &tun_info->key;
+ key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
+ if (!key.tc_tunnel) {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
+ return -EOPNOTSUPP;
+ }
+
+ hash_key = hash_encap_info(&key);
+
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ e = mlx5e_encap_get(priv, &key, hash_key);
+
+ /* must verify if encap is valid or not */
+ if (e) {
+ /* Check that entry was not already attached to this flow */
+ if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
+ err = -EOPNOTSUPP;
+ goto out_err;
+ }
+
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ wait_for_completion(&e->res_ready);
+
+ /* Protect against concurrent neigh update. */
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ if (e->compl_result < 0) {
+ err = -EREMOTEIO;
+ goto out_err;
+ }
+ goto attach_flow;
+ }
+
+ e = kzalloc(sizeof(*e), GFP_KERNEL);
+ if (!e) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+
+ refcount_set(&e->refcnt, 1);
+ init_completion(&e->res_ready);
+
+ tun_info = mlx5e_dup_tun_info(tun_info);
+ if (!tun_info) {
+ err = -ENOMEM;
+ goto out_err_init;
+ }
+ e->tun_info = tun_info;
+ err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
+ if (err)
+ goto out_err_init;
+
+ INIT_LIST_HEAD(&e->flows);
+ hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ if (family == AF_INET)
+ err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
+ else if (family == AF_INET6)
+ err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
+
+ /* Protect against concurrent neigh update. */
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ complete_all(&e->res_ready);
+ if (err) {
+ e->compl_result = err;
+ goto out_err;
+ }
+ e->compl_result = 1;
+
+attach_flow:
+ err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev,
+ e->route_dev_ifindex, out_index);
+ if (err)
+ goto out_err;
+
+ flow->encaps[out_index].e = e;
+ list_add(&flow->encaps[out_index].list, &e->flows);
+ flow->encaps[out_index].index = out_index;
+ *encap_dev = e->out_dev;
+ if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
+ attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat;
+ attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
+ *encap_valid = true;
+ } else {
+ *encap_valid = false;
+ }
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+ return err;
+
+out_err:
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ if (e)
+ mlx5e_encap_put(priv, e);
+ return err;
+
+out_err_init:
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ kfree(tun_info);
+ kfree(e);
+ return err;
+}
+
+int mlx5e_attach_decap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5e_decap_entry *d;
+ struct mlx5e_decap_key key;
+ uintptr_t hash_key;
+ int err = 0;
+
+ parse_attr = flow->attr->parse_attr;
+ if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "encap header larger than max supported");
+ return -EOPNOTSUPP;
+ }
+
+ key.key = parse_attr->eth;
+ hash_key = hash_decap_info(&key);
+ mutex_lock(&esw->offloads.decap_tbl_lock);
+ d = mlx5e_decap_get(priv, &key, hash_key);
+ if (d) {
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+ wait_for_completion(&d->res_ready);
+ mutex_lock(&esw->offloads.decap_tbl_lock);
+ if (d->compl_result) {
+ err = -EREMOTEIO;
+ goto out_free;
+ }
+ goto found;
+ }
+
+ d = kzalloc(sizeof(*d), GFP_KERNEL);
+ if (!d) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+
+ d->key = key;
+ refcount_set(&d->refcnt, 1);
+ init_completion(&d->res_ready);
+ INIT_LIST_HEAD(&d->flows);
+ hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+
+ d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
+ MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2,
+ sizeof(parse_attr->eth),
+ &parse_attr->eth,
+ MLX5_FLOW_NAMESPACE_FDB);
+ if (IS_ERR(d->pkt_reformat)) {
+ err = PTR_ERR(d->pkt_reformat);
+ d->compl_result = err;
+ }
+ mutex_lock(&esw->offloads.decap_tbl_lock);
+ complete_all(&d->res_ready);
+ if (err)
+ goto out_free;
+
+found:
+ flow->decap_reformat = d;
+ attr->decap_pkt_reformat = d->pkt_reformat;
+ list_add(&flow->l3_to_l2_reformat, &d->flows);
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+ return 0;
+
+out_free:
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+ mlx5e_decap_put(priv, d);
+ return err;
+
+out_err:
+ mutex_unlock(&esw->offloads.decap_tbl_lock);
+ return err;
+}