From af6dabc9c70ae3f307685b1f32f52d60b1bf0527 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 19 Dec 2014 11:09:13 +0800 Subject: net: drop the packet when fails to do software segmentation or header check Commit cecda693a969816bac5e470e1d9c9c0ef5567bca ("net: keep original skb which only needs header checking during software GSO") keeps the original skb for packets that only needs header check, but it doesn't drop the packet if software segmentation or header check were failed. Fixes cecda693a9 ("net: keep original skb which only needs header checking during software GSO") Cc: Eric Dumazet Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index f411c28d0a66..a989f8502412 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2673,7 +2673,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device segs = skb_gso_segment(skb, features); if (IS_ERR(segs)) { - segs = NULL; + goto out_kfree_skb; } else if (segs) { consume_skb(skb); skb = segs; -- cgit v1.2.3 From 726ce70e9e4050409243f3a1d735dc86bc6e6e57 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 21 Dec 2014 07:16:21 +1100 Subject: net: Move napi polling code out of net_rx_action This patch creates a new function napi_poll and moves the napi polling code from net_rx_action into it. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 98 ++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 54 insertions(+), 44 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index a989f8502412..493ae8ee569f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4557,6 +4557,59 @@ void netif_napi_del(struct napi_struct *napi) } EXPORT_SYMBOL(netif_napi_del); +static int napi_poll(struct napi_struct *n, struct list_head *repoll) +{ + void *have; + int work, weight; + + list_del_init(&n->poll_list); + + have = netpoll_poll_lock(n); + + weight = n->weight; + + /* This NAPI_STATE_SCHED test is for avoiding a race + * with netpoll's poll_napi(). Only the entity which + * obtains the lock and sees NAPI_STATE_SCHED set will + * actually make the ->poll() call. Therefore we avoid + * accidentally calling ->poll() when NAPI is not scheduled. + */ + work = 0; + if (test_bit(NAPI_STATE_SCHED, &n->state)) { + work = n->poll(n, weight); + trace_napi_poll(n); + } + + WARN_ON_ONCE(work > weight); + + if (likely(work < weight)) + goto out_unlock; + + /* Drivers must not modify the NAPI state if they + * consume the entire weight. In such cases this code + * still "owns" the NAPI instance and therefore can + * move the instance around on the list at-will. + */ + if (unlikely(napi_disable_pending(n))) { + napi_complete(n); + goto out_unlock; + } + + if (n->gro_list) { + /* flush too old packets + * If HZ < 1000, flush all packets. + */ + napi_gro_flush(n, HZ >= 1000); + } + + list_add_tail(&n->poll_list, repoll); + +out_unlock: + netpoll_poll_unlock(have); + + return work; +} + static void net_rx_action(struct softirq_action *h) { struct softnet_data *sd = this_cpu_ptr(&softnet_data); @@ -4564,7 +4617,6 @@ static void net_rx_action(struct softirq_action *h) int budget = netdev_budget; LIST_HEAD(list); LIST_HEAD(repoll); - void *have; local_irq_disable(); list_splice_init(&sd->poll_list, &list); @@ -4572,7 +4624,6 @@ static void net_rx_action(struct softirq_action *h) while (!list_empty(&list)) { struct napi_struct *n; - int work, weight; /* If softirq window is exhausted then punt. * Allow this to run for 2 jiffies since which will allow @@ -4583,48 +4634,7 @@ static void net_rx_action(struct softirq_action *h) n = list_first_entry(&list, struct napi_struct, poll_list); - list_del_init(&n->poll_list); - - have = netpoll_poll_lock(n); - - weight = n->weight; - - /* This NAPI_STATE_SCHED test is for avoiding a race - * with netpoll's poll_napi(). Only the entity which - * obtains the lock and sees NAPI_STATE_SCHED set will - * actually make the ->poll() call. Therefore we avoid - * accidentally calling ->poll() when NAPI is not scheduled. - */ - work = 0; - if (test_bit(NAPI_STATE_SCHED, &n->state)) { - work = n->poll(n, weight); - trace_napi_poll(n); - } - - WARN_ON_ONCE(work > weight); - - budget -= work; - - /* Drivers must not modify the NAPI state if they - * consume the entire weight. In such cases this code - * still "owns" the NAPI instance and therefore can - * move the instance around on the list at-will. - */ - if (unlikely(work == weight)) { - if (unlikely(napi_disable_pending(n))) { - napi_complete(n); - } else { - if (n->gro_list) { - /* flush too old packets - * If HZ < 1000, flush all packets. - */ - napi_gro_flush(n, HZ >= 1000); - } - list_add_tail(&n->poll_list, &repoll); - } - } - - netpoll_poll_unlock(have); + budget -= napi_poll(n, &repoll); } if (!sd_has_rps_ipi_waiting(sd) && -- cgit v1.2.3 From 001ce546bb537bb5b7821f05633556a0c9787e32 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 21 Dec 2014 07:16:22 +1100 Subject: net: Detect drivers that reschedule NAPI and exhaust budget The commit d75b1ade567ffab085e8adbbdacf0092d10cd09c (net: less interrupt masking in NAPI) required drivers to leave poll_list empty if the entire budget is consumed. We have already had two broken drivers so let's add a check for this. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 493ae8ee569f..c0cf1293df06 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4602,6 +4602,15 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll) napi_gro_flush(n, HZ >= 1000); } + /* Some drivers may have called napi_schedule + * prior to exhausting their budget. + */ + if (unlikely(!list_empty(&n->poll_list))) { + pr_warn_once("%s: Budget exhausted after napi rescheduled\n", + n->dev ? n->dev->name : "backlog"); + goto out_unlock; + } + list_add_tail(&n->poll_list, repoll); out_unlock: -- cgit v1.2.3 From 6bd373ebbac4b13ecd39ddc37a0dc5ad4c5e4585 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 21 Dec 2014 07:16:24 +1100 Subject: net: Always poll at least one device in net_rx_action We should only perform the softnet_break check after we have polled at least one device in net_rx_action. Otherwise a zero or negative setting of netdev_budget can lock up the whole system. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index c0cf1293df06..b85eba9dfd88 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4634,16 +4634,15 @@ static void net_rx_action(struct softirq_action *h) while (!list_empty(&list)) { struct napi_struct *n; + n = list_first_entry(&list, struct napi_struct, poll_list); + budget -= napi_poll(n, &repoll); + /* If softirq window is exhausted then punt. * Allow this to run for 2 jiffies since which will allow * an average latency of 1.5/HZ. */ if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit))) goto softnet_break; - - - n = list_first_entry(&list, struct napi_struct, poll_list); - budget -= napi_poll(n, &repoll); } if (!sd_has_rps_ipi_waiting(sd) && -- cgit v1.2.3 From ceb8d5bf17d366534f32d2f60f41d905a5bc864b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 21 Dec 2014 07:16:25 +1100 Subject: net: Rearrange loop in net_rx_action This patch rearranges the loop in net_rx_action to reduce the amount of jumping back and forth when reading the code. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index b85eba9dfd88..c97ae6fec040 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4631,9 +4631,15 @@ static void net_rx_action(struct softirq_action *h) list_splice_init(&sd->poll_list, &list); local_irq_enable(); - while (!list_empty(&list)) { + for (;;) { struct napi_struct *n; + if (list_empty(&list)) { + if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll)) + return; + break; + } + n = list_first_entry(&list, struct napi_struct, poll_list); budget -= napi_poll(n, &repoll); @@ -4641,15 +4647,13 @@ static void net_rx_action(struct softirq_action *h) * Allow this to run for 2 jiffies since which will allow * an average latency of 1.5/HZ. */ - if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit))) - goto softnet_break; + if (unlikely(budget <= 0 || + time_after_eq(jiffies, time_limit))) { + sd->time_squeeze++; + break; + } } - if (!sd_has_rps_ipi_waiting(sd) && - list_empty(&list) && - list_empty(&repoll)) - return; -out: local_irq_disable(); list_splice_tail_init(&sd->poll_list, &list); @@ -4659,12 +4663,6 @@ out: __raise_softirq_irqoff(NET_RX_SOFTIRQ); net_rps_action_and_irq_enable(sd); - - return; - -softnet_break: - sd->time_squeeze++; - goto out; } struct netdev_adjacent { -- cgit v1.2.3 From d0edc7bf397a5e0f312bf8a1e87cfee0019dc07b Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Tue, 23 Dec 2014 16:20:11 -0800 Subject: mpls: Fix config check for mpls. Fixes MPLS GSO for case when mpls is compiled as kernel module. Fixes: 0d89d2035f ("MPLS: Add limited GSO support"). Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index c97ae6fec040..67b6210a589a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2522,7 +2522,7 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb) /* If MPLS offload request, verify we are testing hardware MPLS features * instead of standard features for the netdev. */ -#ifdef CONFIG_NET_MPLS_GSO +#if IS_ENABLED(CONFIG_NET_MPLS_GSO) static netdev_features_t net_mpls_features(struct sk_buff *skb, netdev_features_t features, __be16 type) -- cgit v1.2.3 From 796f2da81bead71ffc91ef70912cd8d1827bf756 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Mon, 22 Dec 2014 19:04:14 +0900 Subject: net: Fix stacked vlan offload features computation When vlan tags are stacked, it is very likely that the outer tag is stored in skb->vlan_tci and skb->protocol shows the inner tag's vlan_proto. Currently netif_skb_features() first looks at skb->protocol even if there is the outer tag in vlan_tci, thus it incorrectly retrieves the protocol encapsulated by the inner vlan instead of the inner vlan protocol. This allows GSO packets to be passed to HW and they end up being corrupted. Fixes: 58e998c6d239 ("offloading: Force software GSO for multiple vlan tags.") Signed-off-by: Toshiaki Makita Signed-off-by: David S. Miller --- net/core/dev.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 67b6210a589a..bd44e28c735e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2570,11 +2570,14 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) if (gso_segs > dev->gso_max_segs || gso_segs < dev->gso_min_segs) features &= ~NETIF_F_GSO_MASK; - if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) { - struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; - protocol = veh->h_vlan_encapsulated_proto; - } else if (!vlan_tx_tag_present(skb)) { - return harmonize_features(skb, features); + if (!vlan_tx_tag_present(skb)) { + if (unlikely(protocol == htons(ETH_P_8021Q) || + protocol == htons(ETH_P_8021AD))) { + struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; + protocol = veh->h_vlan_encapsulated_proto; + } else { + return harmonize_features(skb, features); + } } features = netdev_intersect_features(features, -- cgit v1.2.3 From 2c26d34bbcc0b3f30385d5587aa232289e2eed8e Mon Sep 17 00:00:00 2001 From: Jay Vosburgh Date: Fri, 19 Dec 2014 15:32:00 -0800 Subject: net/core: Handle csum for CHECKSUM_COMPLETE VXLAN forwarding When using VXLAN tunnels and a sky2 device, I have experienced checksum failures of the following type: [ 4297.761899] eth0: hw csum failure [...] [ 4297.765223] Call Trace: [ 4297.765224] [] dump_stack+0x46/0x58 [ 4297.765235] [] netdev_rx_csum_fault+0x42/0x50 [ 4297.765238] [] ? skb_push+0x40/0x40 [ 4297.765240] [] __skb_checksum_complete+0xbc/0xd0 [ 4297.765243] [] tcp_v4_rcv+0x2e2/0x950 [ 4297.765246] [] ? ip_rcv_finish+0x360/0x360 These are reliably reproduced in a network topology of: container:eth0 == host(OVS VXLAN on VLAN) == bond0 == eth0 (sky2) -> switch When VXLAN encapsulated traffic is received from a similarly configured peer, the above warning is generated in the receive processing of the encapsulated packet. Note that the warning is associated with the container eth0. The skbs from sky2 have ip_summed set to CHECKSUM_COMPLETE, and because the packet is an encapsulated Ethernet frame, the checksum generated by the hardware includes the inner protocol and Ethernet headers. The receive code is careful to update the skb->csum, except in __dev_forward_skb, as called by dev_forward_skb. __dev_forward_skb calls eth_type_trans, which in turn calls skb_pull_inline(skb, ETH_HLEN) to skip over the Ethernet header, but does not update skb->csum when doing so. This patch resolves the problem by adding a call to skb_postpull_rcsum to update the skb->csum after the call to eth_type_trans. Signed-off-by: Jay Vosburgh Signed-off-by: David S. Miller --- net/core/dev.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index bd44e28c735e..0094562b732a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1694,6 +1694,7 @@ int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb) skb_scrub_packet(skb, true); skb->protocol = eth_type_trans(skb, dev); + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); return 0; } -- cgit v1.2.3 From 5f35227ea34bb616c436d9da47fc325866c428f3 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Tue, 23 Dec 2014 22:37:26 -0800 Subject: net: Generalize ndo_gso_check to ndo_features_check GSO isn't the only offload feature with restrictions that potentially can't be expressed with the current features mechanism. Checksum is another although it's a general issue that could in theory apply to anything. Even if it may be possible to implement these restrictions in other ways, it can result in duplicate code or inefficient per-packet behavior. This generalizes ndo_gso_check so that drivers can remove any features that don't make sense for a given packet, similar to netif_skb_features(). It also converts existing driver restrictions to the new format, completing the work that was done to support tunnel protocols since the issues apply to checksums as well. By actually removing features from the set that are used to do offloading, it solves another problem with the existing interface. In these cases, GSO would run with the original set of features and not do anything because it appears that segmentation is not required. CC: Tom Herbert CC: Joe Stringer CC: Eric Dumazet CC: Hayes Wang Signed-off-by: Jesse Gross Acked-by: Tom Herbert Fixes: 04ffcb255f22 ("net: Add ndo_gso_check") Tested-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 8 ++++--- drivers/net/ethernet/emulex/benet/be_main.c | 8 ++++--- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 10 +++++---- drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 8 ++++--- include/linux/netdevice.h | 20 +++++++++-------- include/net/vxlan.h | 28 ++++++++++++++++++++---- net/core/dev.c | 23 +++++++++++-------- 7 files changed, 70 insertions(+), 35 deletions(-) (limited to 'net/core/dev.c') diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 9f5e38769a29..72eef9fc883e 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -12553,9 +12553,11 @@ static int bnx2x_get_phys_port_id(struct net_device *netdev, return 0; } -static bool bnx2x_gso_check(struct sk_buff *skb, struct net_device *dev) +static netdev_features_t bnx2x_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) { - return vxlan_gso_check(skb); + return vxlan_features_check(skb, features); } static const struct net_device_ops bnx2x_netdev_ops = { @@ -12589,7 +12591,7 @@ static const struct net_device_ops bnx2x_netdev_ops = { #endif .ndo_get_phys_port_id = bnx2x_get_phys_port_id, .ndo_set_vf_link_state = bnx2x_set_vf_link_state, - .ndo_gso_check = bnx2x_gso_check, + .ndo_features_check = bnx2x_features_check, }; static int bnx2x_set_coherency_mask(struct bnx2x *bp) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 196073110e32..41a0a5498da7 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -4459,9 +4459,11 @@ done: adapter->vxlan_port_count--; } -static bool be_gso_check(struct sk_buff *skb, struct net_device *dev) +static netdev_features_t be_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) { - return vxlan_gso_check(skb); + return vxlan_features_check(skb, features); } #endif @@ -4492,7 +4494,7 @@ static const struct net_device_ops be_netdev_ops = { #ifdef CONFIG_BE2NET_VXLAN .ndo_add_vxlan_port = be_add_vxlan_port, .ndo_del_vxlan_port = be_del_vxlan_port, - .ndo_gso_check = be_gso_check, + .ndo_features_check = be_features_check, #endif }; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 190cbd931f6b..d0d6dc1b8e46 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2365,9 +2365,11 @@ static void mlx4_en_del_vxlan_port(struct net_device *dev, queue_work(priv->mdev->workqueue, &priv->vxlan_del_task); } -static bool mlx4_en_gso_check(struct sk_buff *skb, struct net_device *dev) +static netdev_features_t mlx4_en_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) { - return vxlan_gso_check(skb); + return vxlan_features_check(skb, features); } #endif @@ -2400,7 +2402,7 @@ static const struct net_device_ops mlx4_netdev_ops = { #ifdef CONFIG_MLX4_EN_VXLAN .ndo_add_vxlan_port = mlx4_en_add_vxlan_port, .ndo_del_vxlan_port = mlx4_en_del_vxlan_port, - .ndo_gso_check = mlx4_en_gso_check, + .ndo_features_check = mlx4_en_features_check, #endif }; @@ -2434,7 +2436,7 @@ static const struct net_device_ops mlx4_netdev_ops_master = { #ifdef CONFIG_MLX4_EN_VXLAN .ndo_add_vxlan_port = mlx4_en_add_vxlan_port, .ndo_del_vxlan_port = mlx4_en_del_vxlan_port, - .ndo_gso_check = mlx4_en_gso_check, + .ndo_features_check = mlx4_en_features_check, #endif }; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 1aa25b13ace1..9929b97cfb36 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -505,9 +505,11 @@ static void qlcnic_del_vxlan_port(struct net_device *netdev, adapter->flags |= QLCNIC_DEL_VXLAN_PORT; } -static bool qlcnic_gso_check(struct sk_buff *skb, struct net_device *dev) +static netdev_features_t qlcnic_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) { - return vxlan_gso_check(skb); + return vxlan_features_check(skb, features); } #endif @@ -532,7 +534,7 @@ static const struct net_device_ops qlcnic_netdev_ops = { #ifdef CONFIG_QLCNIC_VXLAN .ndo_add_vxlan_port = qlcnic_add_vxlan_port, .ndo_del_vxlan_port = qlcnic_del_vxlan_port, - .ndo_gso_check = qlcnic_gso_check, + .ndo_features_check = qlcnic_features_check, #endif #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = qlcnic_poll_controller, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c31f74d76ebd..679e6e90aa4c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1012,12 +1012,15 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * Callback to use for xmit over the accelerated station. This * is used in place of ndo_start_xmit on accelerated net * devices. - * bool (*ndo_gso_check) (struct sk_buff *skb, - * struct net_device *dev); + * netdev_features_t (*ndo_features_check) (struct sk_buff *skb, + * struct net_device *dev + * netdev_features_t features); * Called by core transmit path to determine if device is capable of - * performing GSO on a packet. The device returns true if it is - * able to GSO the packet, false otherwise. If the return value is - * false the stack will do software GSO. + * performing offload operations on a given packet. This is to give + * the device an opportunity to implement any restrictions that cannot + * be otherwise expressed by feature flags. The check is called with + * the set of features that the stack has calculated and it returns + * those the driver believes to be appropriate. * * int (*ndo_switch_parent_id_get)(struct net_device *dev, * struct netdev_phys_item_id *psid); @@ -1178,8 +1181,9 @@ struct net_device_ops { struct net_device *dev, void *priv); int (*ndo_get_lock_subclass)(struct net_device *dev); - bool (*ndo_gso_check) (struct sk_buff *skb, - struct net_device *dev); + netdev_features_t (*ndo_features_check) (struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features); #ifdef CONFIG_NET_SWITCHDEV int (*ndo_switch_parent_id_get)(struct net_device *dev, struct netdev_phys_item_id *psid); @@ -3611,8 +3615,6 @@ static inline bool netif_needs_gso(struct net_device *dev, struct sk_buff *skb, netdev_features_t features) { return skb_is_gso(skb) && (!skb_gso_ok(skb, features) || - (dev->netdev_ops->ndo_gso_check && - !dev->netdev_ops->ndo_gso_check(skb, dev)) || unlikely((skb->ip_summed != CHECKSUM_PARTIAL) && (skb->ip_summed != CHECKSUM_UNNECESSARY))); } diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 57cccd0052e5..903461aa5644 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -1,6 +1,9 @@ #ifndef __NET_VXLAN_H #define __NET_VXLAN_H 1 +#include +#include +#include #include #include #include @@ -51,16 +54,33 @@ int vxlan_xmit_skb(struct vxlan_sock *vs, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port, __be32 vni, bool xnet); -static inline bool vxlan_gso_check(struct sk_buff *skb) +static inline netdev_features_t vxlan_features_check(struct sk_buff *skb, + netdev_features_t features) { - if ((skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) && + u8 l4_hdr = 0; + + if (!skb->encapsulation) + return features; + + switch (vlan_get_protocol(skb)) { + case htons(ETH_P_IP): + l4_hdr = ip_hdr(skb)->protocol; + break; + case htons(ETH_P_IPV6): + l4_hdr = ipv6_hdr(skb)->nexthdr; + break; + default: + return features;; + } + + if ((l4_hdr == IPPROTO_UDP) && (skb->inner_protocol_type != ENCAP_TYPE_ETHER || skb->inner_protocol != htons(ETH_P_TEB) || (skb_inner_mac_header(skb) - skb_transport_header(skb) != sizeof(struct udphdr) + sizeof(struct vxlanhdr)))) - return false; + return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK); - return true; + return features; } /* IP header + UDP + VXLAN + Ethernet header */ diff --git a/net/core/dev.c b/net/core/dev.c index 0094562b732a..683d493aa1bf 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2563,7 +2563,7 @@ static netdev_features_t harmonize_features(struct sk_buff *skb, netdev_features_t netif_skb_features(struct sk_buff *skb) { - const struct net_device *dev = skb->dev; + struct net_device *dev = skb->dev; netdev_features_t features = dev->features; u16 gso_segs = skb_shinfo(skb)->gso_segs; __be16 protocol = skb->protocol; @@ -2571,13 +2571,20 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) if (gso_segs > dev->gso_max_segs || gso_segs < dev->gso_min_segs) features &= ~NETIF_F_GSO_MASK; + /* If encapsulation offload request, verify we are testing + * hardware encapsulation features instead of standard + * features for the netdev + */ + if (skb->encapsulation) + features &= dev->hw_enc_features; + if (!vlan_tx_tag_present(skb)) { if (unlikely(protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD))) { struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; protocol = veh->h_vlan_encapsulated_proto; } else { - return harmonize_features(skb, features); + goto finalize; } } @@ -2595,6 +2602,11 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX); +finalize: + if (dev->netdev_ops->ndo_features_check) + features &= dev->netdev_ops->ndo_features_check(skb, dev, + features); + return harmonize_features(skb, features); } EXPORT_SYMBOL(netif_skb_features); @@ -2665,13 +2677,6 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device if (unlikely(!skb)) goto out_null; - /* If encapsulation offload request, verify we are testing - * hardware encapsulation features instead of standard - * features for the netdev - */ - if (skb->encapsulation) - features &= dev->hw_enc_features; - if (netif_needs_gso(dev, skb, features)) { struct sk_buff *segs; -- cgit v1.2.3