From a09a4c8dd1ec7f830e1fb9e59eb72bddc965d168 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Sat, 19 Mar 2016 09:32:02 -0700 Subject: tunnels: Remove encapsulation offloads on decap. If a packet is either locally encapsulated or processed through GRO it is marked with the offloads that it requires. However, when it is decapsulated these tunnel offload indications are not removed. This means that if we receive an encapsulated TCP packet, aggregate it with GRO, decapsulate, and retransmit the resulting frame on a NIC that does not support encapsulation, we won't be able to take advantage of hardware offloads even though it is just a simple TCP packet at this point. This fixes the problem by stripping off encapsulation offload indications when packets are decapsulated. The performance impacts of this bug are significant. In a test where a Geneve encapsulated TCP stream is sent to a hypervisor, GRO'ed, decapsulated, and bridged to a VM performance is improved by 60% (5Gbps->8Gbps) as a result of avoiding unnecessary segmentation at the VM tap interface. Reported-by: Ramu Ramamurthy Fixes: 68c33163 ("v4 GRE: Add TCP segmentation offload for GRE") Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- net/ipv4/fou.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'net/ipv4/fou.c') diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 780484243e14..a0586b4a197d 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -48,7 +48,7 @@ static inline struct fou *fou_from_sock(struct sock *sk) return sk->sk_user_data; } -static void fou_recv_pull(struct sk_buff *skb, size_t len) +static int fou_recv_pull(struct sk_buff *skb, size_t len) { struct iphdr *iph = ip_hdr(skb); @@ -59,6 +59,7 @@ static void fou_recv_pull(struct sk_buff *skb, size_t len) __skb_pull(skb, len); skb_postpull_rcsum(skb, udp_hdr(skb), len); skb_reset_transport_header(skb); + return iptunnel_pull_offloads(skb); } static int fou_udp_recv(struct sock *sk, struct sk_buff *skb) @@ -68,9 +69,14 @@ static int fou_udp_recv(struct sock *sk, struct sk_buff *skb) if (!fou) return 1; - fou_recv_pull(skb, sizeof(struct udphdr)); + if (fou_recv_pull(skb, sizeof(struct udphdr))) + goto drop; return -fou->protocol; + +drop: + kfree_skb(skb); + return 0; } static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr, @@ -170,6 +176,9 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb) __skb_pull(skb, sizeof(struct udphdr) + hdrlen); skb_reset_transport_header(skb); + if (iptunnel_pull_offloads(skb)) + goto drop; + return -guehdr->proto_ctype; drop: -- cgit v1.2.3 From c3483384ee511ee2af40b4076366cd82a6a47b86 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 29 Mar 2016 14:55:22 -0700 Subject: gro: Allow tunnel stacking in the case of FOU/GUE This patch should fix the issues seen with a recent fix to prevent tunnel-in-tunnel frames from being generated with GRO. The fix itself is correct for now as long as we do not add any devices that support NETIF_F_GSO_GRE_CSUM. When such a device is added it could have the potential to mess things up due to the fact that the outer transport header points to the outer UDP header and not the GRE header as would be expected. Fixes: fac8e0f579695 ("tunnels: Don't apply GRO to multiple layers of encapsulation.") Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- net/ipv4/fou.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'net/ipv4/fou.c') diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index a0586b4a197d..5a94aea280d3 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -195,6 +195,14 @@ static struct sk_buff **fou_gro_receive(struct sk_buff **head, u8 proto = NAPI_GRO_CB(skb)->proto; const struct net_offload **offloads; + /* We can clear the encap_mark for FOU as we are essentially doing + * one of two possible things. We are either adding an L4 tunnel + * header to the outer L3 tunnel header, or we are are simply + * treating the GRE tunnel header as though it is a UDP protocol + * specific header such as VXLAN or GENEVE. + */ + NAPI_GRO_CB(skb)->encap_mark = 0; + rcu_read_lock(); offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; ops = rcu_dereference(offloads[proto]); @@ -352,6 +360,14 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head, } } + /* We can clear the encap_mark for GUE as we are essentially doing + * one of two possible things. We are either adding an L4 tunnel + * header to the outer L3 tunnel header, or we are are simply + * treating the GRE tunnel header as though it is a UDP protocol + * specific header such as VXLAN or GENEVE. + */ + NAPI_GRO_CB(skb)->encap_mark = 0; + rcu_read_lock(); offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; ops = rcu_dereference(offloads[guehdr->proto_ctype]); -- cgit v1.2.3 From d92283e338f6d6503b7417536bf3478f466cbc01 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 5 Apr 2016 08:22:54 -0700 Subject: fou: change to use UDP socket GRO Adapt gue_gro_receive, gue_gro_complete to take a socket argument. Don't set udp_offloads any more. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/ipv4/fou.c | 48 +++++++++++++++++------------------------------- 1 file changed, 17 insertions(+), 31 deletions(-) (limited to 'net/ipv4/fou.c') diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 5a94aea280d3..5738b9771067 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -22,7 +22,6 @@ struct fou { u8 flags; __be16 port; u16 type; - struct udp_offload udp_offloads; struct list_head list; struct rcu_head rcu; }; @@ -186,13 +185,13 @@ drop: return 0; } -static struct sk_buff **fou_gro_receive(struct sk_buff **head, - struct sk_buff *skb, - struct udp_offload *uoff) +static struct sk_buff **fou_gro_receive(struct sock *sk, + struct sk_buff **head, + struct sk_buff *skb) { const struct net_offload *ops; struct sk_buff **pp = NULL; - u8 proto = NAPI_GRO_CB(skb)->proto; + u8 proto = fou_from_sock(sk)->protocol; const struct net_offload **offloads; /* We can clear the encap_mark for FOU as we are essentially doing @@ -217,11 +216,11 @@ out_unlock: return pp; } -static int fou_gro_complete(struct sk_buff *skb, int nhoff, - struct udp_offload *uoff) +static int fou_gro_complete(struct sock *sk, struct sk_buff *skb, + int nhoff) { const struct net_offload *ops; - u8 proto = NAPI_GRO_CB(skb)->proto; + u8 proto = fou_from_sock(sk)->protocol; int err = -ENOSYS; const struct net_offload **offloads; @@ -264,9 +263,9 @@ static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off, return guehdr; } -static struct sk_buff **gue_gro_receive(struct sk_buff **head, - struct sk_buff *skb, - struct udp_offload *uoff) +static struct sk_buff **gue_gro_receive(struct sock *sk, + struct sk_buff **head, + struct sk_buff *skb) { const struct net_offload **offloads; const struct net_offload *ops; @@ -277,7 +276,7 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head, void *data; u16 doffset = 0; int flush = 1; - struct fou *fou = container_of(uoff, struct fou, udp_offloads); + struct fou *fou = fou_from_sock(sk); struct gro_remcsum grc; skb_gro_remcsum_init(&grc); @@ -386,8 +385,7 @@ out: return pp; } -static int gue_gro_complete(struct sk_buff *skb, int nhoff, - struct udp_offload *uoff) +static int gue_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff) { const struct net_offload **offloads; struct guehdr *guehdr = (struct guehdr *)(skb->data + nhoff); @@ -435,10 +433,7 @@ static int fou_add_to_port_list(struct net *net, struct fou *fou) static void fou_release(struct fou *fou) { struct socket *sock = fou->sock; - struct sock *sk = sock->sk; - if (sk->sk_family == AF_INET) - udp_del_offload(&fou->udp_offloads); list_del(&fou->list); udp_tunnel_sock_release(sock); @@ -448,11 +443,9 @@ static void fou_release(struct fou *fou) static int fou_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg) { udp_sk(sk)->encap_rcv = fou_udp_recv; - fou->protocol = cfg->protocol; - fou->udp_offloads.callbacks.gro_receive = fou_gro_receive; - fou->udp_offloads.callbacks.gro_complete = fou_gro_complete; - fou->udp_offloads.port = cfg->udp_config.local_udp_port; - fou->udp_offloads.ipproto = cfg->protocol; + udp_sk(sk)->gro_receive = fou_gro_receive; + udp_sk(sk)->gro_complete = fou_gro_complete; + fou_from_sock(sk)->protocol = cfg->protocol; return 0; } @@ -460,9 +453,8 @@ static int fou_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg) static int gue_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg) { udp_sk(sk)->encap_rcv = gue_udp_recv; - fou->udp_offloads.callbacks.gro_receive = gue_gro_receive; - fou->udp_offloads.callbacks.gro_complete = gue_gro_complete; - fou->udp_offloads.port = cfg->udp_config.local_udp_port; + udp_sk(sk)->gro_receive = gue_gro_receive; + udp_sk(sk)->gro_complete = gue_gro_complete; return 0; } @@ -521,12 +513,6 @@ static int fou_create(struct net *net, struct fou_cfg *cfg, sk->sk_allocation = GFP_ATOMIC; - if (cfg->udp_config.family == AF_INET) { - err = udp_add_offload(net, &fou->udp_offloads); - if (err) - goto error; - } - err = fou_add_to_port_list(net, fou); if (err) goto error; -- cgit v1.2.3 From a0ca153f98db8cf25298565a09e11fe9d82846ad Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 5 Apr 2016 09:13:39 -0700 Subject: GRE: Disable segmentation offloads w/ CSUM and we are encapsulated via FOU This patch fixes an issue I found in which we were dropping frames if we had enabled checksums on GRE headers that were encapsulated by either FOU or GUE. Without this patch I was barely able to get 1 Gb/s of throughput. With this patch applied I am now at least getting around 6 Gb/s. The issue is due to the fact that with FOU or GUE applied we do not provide a transport offset pointing to the GRE header, nor do we offload it in software as the GRE header is completely skipped by GSO and treated like a VXLAN or GENEVE type header. As such we need to prevent the stack from generating it and also prevent GRE from generating it via any interface we create. Fixes: c3483384ee511 ("gro: Allow tunnel stacking in the case of FOU/GUE") Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 ++++- net/core/dev.c | 1 + net/ipv4/fou.c | 6 ++++++ net/ipv4/gre_offload.c | 8 ++++++++ net/ipv4/ip_gre.c | 13 ++++++++++--- 5 files changed, 29 insertions(+), 4 deletions(-) (limited to 'net/ipv4/fou.c') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cb0d5d09c2e4..8395308a2445 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2120,7 +2120,10 @@ struct napi_gro_cb { /* Used in foo-over-udp, set in udp[46]_gro_receive */ u8 is_ipv6:1; - /* 7 bit hole */ + /* Used in GRE, set in fou/gue_gro_receive */ + u8 is_fou:1; + + /* 6 bit hole */ /* used to support CHECKSUM_COMPLETE for tunneling protocols */ __wsum csum; diff --git a/net/core/dev.c b/net/core/dev.c index b9bcbe77d913..77a71cd68535 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4439,6 +4439,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff NAPI_GRO_CB(skb)->flush = 0; NAPI_GRO_CB(skb)->free = 0; NAPI_GRO_CB(skb)->encap_mark = 0; + NAPI_GRO_CB(skb)->is_fou = 0; NAPI_GRO_CB(skb)->gro_remcsum_start = 0; /* Setup for GRO checksum validation */ diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 5a94aea280d3..a39068b4a4d9 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -203,6 +203,9 @@ static struct sk_buff **fou_gro_receive(struct sk_buff **head, */ NAPI_GRO_CB(skb)->encap_mark = 0; + /* Flag this frame as already having an outer encap header */ + NAPI_GRO_CB(skb)->is_fou = 1; + rcu_read_lock(); offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; ops = rcu_dereference(offloads[proto]); @@ -368,6 +371,9 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head, */ NAPI_GRO_CB(skb)->encap_mark = 0; + /* Flag this frame as already having an outer encap header */ + NAPI_GRO_CB(skb)->is_fou = 1; + rcu_read_lock(); offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; ops = rcu_dereference(offloads[guehdr->proto_ctype]); diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index c47539d04b88..6a5bd4317866 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -150,6 +150,14 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head, if ((greh->flags & ~(GRE_KEY|GRE_CSUM)) != 0) goto out; + /* We can only support GRE_CSUM if we can track the location of + * the GRE header. In the case of FOU/GUE we cannot because the + * outer UDP header displaces the GRE header leaving us in a state + * of limbo. + */ + if ((greh->flags & GRE_CSUM) && NAPI_GRO_CB(skb)->is_fou) + goto out; + type = greh->protocol; rcu_read_lock(); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 31936d387cfd..af5d1f38217f 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -862,9 +862,16 @@ static void __gre_tunnel_init(struct net_device *dev) dev->hw_features |= GRE_FEATURES; if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) { - /* TCP offload with GRE SEQ is not supported. */ - dev->features |= NETIF_F_GSO_SOFTWARE; - dev->hw_features |= NETIF_F_GSO_SOFTWARE; + /* TCP offload with GRE SEQ is not supported, nor + * can we support 2 levels of outer headers requiring + * an update. + */ + if (!(tunnel->parms.o_flags & TUNNEL_CSUM) || + (tunnel->encap.type == TUNNEL_ENCAP_NONE)) { + dev->features |= NETIF_F_GSO_SOFTWARE; + dev->hw_features |= NETIF_F_GSO_SOFTWARE; + } + /* Can use a lockless transmit, unless we generate * output sequences */ -- cgit v1.2.3