From fda55eca5a33f33ffcd4192c6b2d75179714a52c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 7 Jan 2013 09:28:21 +0000 Subject: net: introduce skb_transport_header_was_set() We have skb_mac_header_was_set() helper to tell if mac_header was set on a skb. We would like the same for transport_header. __netif_receive_skb() doesn't reset the transport header if already set by GRO layer. Note that network stacks usually reset the transport header anyway, after pulling the network header, so this change only allows a followup patch to have more precise qdisc pkt_len computation for GSO packets at ingress side. Signed-off-by: Eric Dumazet Cc: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/linux/skbuff.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 320e976d5ab8..8b2256e880e0 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1492,6 +1492,11 @@ static inline void skb_set_inner_network_header(struct sk_buff *skb, skb->inner_network_header += offset; } +static inline bool skb_transport_header_was_set(const struct sk_buff *skb) +{ + return skb->transport_header != ~0U; +} + static inline unsigned char *skb_transport_header(const struct sk_buff *skb) { return skb->head + skb->transport_header; @@ -1580,6 +1585,11 @@ static inline void skb_set_inner_network_header(struct sk_buff *skb, skb->inner_network_header = skb->data + offset; } +static inline bool skb_transport_header_was_set(const struct sk_buff *skb) +{ + return skb->transport_header != NULL; +} + static inline unsigned char *skb_transport_header(const struct sk_buff *skb) { return skb->transport_header; -- cgit v1.2.3 From cef401de7be8c4e155c6746bfccf721a4fa5fab9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 25 Jan 2013 20:34:37 +0000 Subject: net: fix possible wrong checksum generation Pravin Shelar mentioned that GSO could potentially generate wrong TX checksum if skb has fragments that are overwritten by the user between the checksum computation and transmit. He suggested to linearize skbs but this extra copy can be avoided for normal tcp skbs cooked by tcp_sendmsg(). This patch introduces a new SKB_GSO_SHARED_FRAG flag, set in skb_shinfo(skb)->gso_type if at least one frag can be modified by the user. Typical sources of such possible overwrites are {vm}splice(), sendfile(), and macvtap/tun/virtio_net drivers. Tested: $ netperf -H 7.7.8.84 MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 7.7.8.84 () port 0 AF_INET Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 87380 16384 16384 10.00 3959.52 $ netperf -H 7.7.8.84 -t TCP_SENDFILE TCP SENDFILE TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 7.7.8.84 () port 0 AF_INET Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 87380 16384 16384 10.00 3216.80 Performance of the SENDFILE is impacted by the extra allocation and copy, and because we use order-0 pages, while the TCP_STREAM uses bigger pages. Reported-by: Pravin Shelar Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 3 ++- drivers/net/tun.c | 12 ++++++++---- drivers/net/virtio_net.c | 12 ++++++++---- include/linux/skbuff.h | 19 +++++++++++++++++++ net/core/dev.c | 9 +++++++++ net/core/skbuff.c | 4 ++++ net/ipv4/af_inet.c | 1 + net/ipv4/ip_gre.c | 4 +++- net/ipv4/ipip.c | 4 +++- net/ipv4/tcp.c | 3 +++ net/ipv4/tcp_input.c | 4 ++-- net/ipv4/tcp_output.c | 4 ++-- net/ipv6/ip6_offload.c | 1 + 13 files changed, 65 insertions(+), 15 deletions(-) (limited to 'include/linux/skbuff.h') diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 0f0f9ce3a776..b181dfb3d6d6 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -543,6 +543,7 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from, skb->data_len += len; skb->len += len; skb->truesize += truesize; + skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG; atomic_add(truesize, &skb->sk->sk_wmem_alloc); while (len) { int off = base & ~PAGE_MASK; @@ -598,7 +599,7 @@ static int macvtap_skb_from_vnet_hdr(struct sk_buff *skb, if (vnet_hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { skb_shinfo(skb)->gso_size = vnet_hdr->gso_size; - skb_shinfo(skb)->gso_type = gso_type; + skb_shinfo(skb)->gso_type |= gso_type; /* Header must be checked, and gso_segs computed. */ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index c81680dc10eb..293ce8dfc9e6 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1005,6 +1005,7 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from, skb->data_len += len; skb->len += len; skb->truesize += truesize; + skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG; atomic_add(truesize, &skb->sk->sk_wmem_alloc); while (len) { int off = base & ~PAGE_MASK; @@ -1150,16 +1151,18 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, } if (gso.gso_type != VIRTIO_NET_HDR_GSO_NONE) { + unsigned short gso_type = 0; + pr_debug("GSO!\n"); switch (gso.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { case VIRTIO_NET_HDR_GSO_TCPV4: - skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; + gso_type = SKB_GSO_TCPV4; break; case VIRTIO_NET_HDR_GSO_TCPV6: - skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; + gso_type = SKB_GSO_TCPV6; break; case VIRTIO_NET_HDR_GSO_UDP: - skb_shinfo(skb)->gso_type = SKB_GSO_UDP; + gso_type = SKB_GSO_UDP; break; default: tun->dev->stats.rx_frame_errors++; @@ -1168,9 +1171,10 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, } if (gso.gso_type & VIRTIO_NET_HDR_GSO_ECN) - skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; + gso_type |= SKB_GSO_TCP_ECN; skb_shinfo(skb)->gso_size = gso.gso_size; + skb_shinfo(skb)->gso_type |= gso_type; if (skb_shinfo(skb)->gso_size == 0) { tun->dev->stats.rx_frame_errors++; kfree_skb(skb); diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 701408a1ded6..58914c8ea68f 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -220,6 +220,7 @@ static void set_skb_frag(struct sk_buff *skb, struct page *page, skb->len += size; skb->truesize += PAGE_SIZE; skb_shinfo(skb)->nr_frags++; + skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG; *len -= size; } @@ -379,16 +380,18 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) ntohs(skb->protocol), skb->len, skb->pkt_type); if (hdr->hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) { + unsigned short gso_type = 0; + pr_debug("GSO!\n"); switch (hdr->hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { case VIRTIO_NET_HDR_GSO_TCPV4: - skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; + gso_type = SKB_GSO_TCPV4; break; case VIRTIO_NET_HDR_GSO_UDP: - skb_shinfo(skb)->gso_type = SKB_GSO_UDP; + gso_type = SKB_GSO_UDP; break; case VIRTIO_NET_HDR_GSO_TCPV6: - skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; + gso_type = SKB_GSO_TCPV6; break; default: net_warn_ratelimited("%s: bad gso type %u.\n", @@ -397,7 +400,7 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) } if (hdr->hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN) - skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; + gso_type |= SKB_GSO_TCP_ECN; skb_shinfo(skb)->gso_size = hdr->hdr.gso_size; if (skb_shinfo(skb)->gso_size == 0) { @@ -405,6 +408,7 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) goto frame_err; } + skb_shinfo(skb)->gso_type |= gso_type; /* Header must be checked, and gso_segs computed. */ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; skb_shinfo(skb)->gso_segs = 0; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 8b2256e880e0..0259b719bebf 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -307,6 +307,13 @@ enum { SKB_GSO_TCPV6 = 1 << 4, SKB_GSO_FCOE = 1 << 5, + + /* This indicates at least one fragment might be overwritten + * (as in vmsplice(), sendfile() ...) + * If we need to compute a TX checksum, we'll need to copy + * all frags to avoid possible bad checksum + */ + SKB_GSO_SHARED_FRAG = 1 << 6, }; #if BITS_PER_LONG > 32 @@ -2200,6 +2207,18 @@ static inline int skb_linearize(struct sk_buff *skb) return skb_is_nonlinear(skb) ? __skb_linearize(skb) : 0; } +/** + * skb_has_shared_frag - can any frag be overwritten + * @skb: buffer to test + * + * Return true if the skb has at least one frag that might be modified + * by an external entity (as in vmsplice()/sendfile()) + */ +static inline bool skb_has_shared_frag(const struct sk_buff *skb) +{ + return skb_shinfo(skb)->gso_type & SKB_GSO_SHARED_FRAG; +} + /** * skb_linearize_cow - make sure skb is linear and writable * @skb: buffer to process diff --git a/net/core/dev.c b/net/core/dev.c index c69cd8721b28..a83375d3af72 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2271,6 +2271,15 @@ int skb_checksum_help(struct sk_buff *skb) return -EINVAL; } + /* Before computing a checksum, we should make sure no frag could + * be modified by an external entity : checksum could be wrong. + */ + if (skb_has_shared_frag(skb)) { + ret = __skb_linearize(skb); + if (ret) + goto out; + } + offset = skb_checksum_start_offset(skb); BUG_ON(offset >= skb_headlen(skb)); csum = skb_checksum(skb, offset, skb->len - offset, 0); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2568c449fe36..bddc1dd2e7f2 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2340,6 +2340,8 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) { int pos = skb_headlen(skb); + skb_shinfo(skb1)->gso_type = skb_shinfo(skb)->gso_type; + if (len < pos) /* Split line is inside header. */ skb_split_inside_header(skb, skb1, len, pos); else /* Second chunk has no header, nothing to copy. */ @@ -2845,6 +2847,8 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) skb_copy_from_linear_data_offset(skb, offset, skb_put(nskb, hsize), hsize); + skb_shinfo(nskb)->gso_type = skb_shinfo(skb)->gso_type; + while (pos < offset + len && i < nfrags) { *frag = skb_shinfo(skb)->frags[i]; __skb_frag_ref(frag); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 4b7053919976..49ddca31c4da 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1306,6 +1306,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_TCP_ECN | + SKB_GSO_SHARED_FRAG | 0))) goto out; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 303012adf9e6..af6be70821c4 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -738,7 +738,7 @@ drop: static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); - const struct iphdr *old_iph = ip_hdr(skb); + const struct iphdr *old_iph; const struct iphdr *tiph; struct flowi4 fl4; u8 tos; @@ -756,6 +756,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev skb_checksum_help(skb)) goto tx_error; + old_iph = ip_hdr(skb); + if (dev->type == ARPHRD_ETHER) IPCB(skb)->flags = 0; diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 191fc24a745a..8f024d41eefa 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -472,7 +472,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) __be16 df = tiph->frag_off; struct rtable *rt; /* Route to the other host */ struct net_device *tdev; /* Device to other host */ - const struct iphdr *old_iph = ip_hdr(skb); + const struct iphdr *old_iph; struct iphdr *iph; /* Our new IP header */ unsigned int max_headroom; /* The extra header space needed */ __be32 dst = tiph->daddr; @@ -486,6 +486,8 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) skb_checksum_help(skb)) goto tx_error; + old_iph = ip_hdr(skb); + if (tos & 1) tos = old_iph->tos; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 52271947a471..3ec1f69c5ceb 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -896,6 +896,8 @@ new_segment: skb_fill_page_desc(skb, i, page, offset, copy); } + skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG; + skb->len += copy; skb->data_len += copy; skb->truesize += copy; @@ -3032,6 +3034,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_TCPV6 | + SKB_GSO_SHARED_FRAG | 0) || !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) goto out; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 0905997e5873..492c7cfe1453 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1240,13 +1240,13 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, */ if (!skb_shinfo(prev)->gso_size) { skb_shinfo(prev)->gso_size = mss; - skb_shinfo(prev)->gso_type = sk->sk_gso_type; + skb_shinfo(prev)->gso_type |= sk->sk_gso_type; } /* CHECKME: To clear or not to clear? Mimics normal skb currently */ if (skb_shinfo(skb)->gso_segs <= 1) { skb_shinfo(skb)->gso_size = 0; - skb_shinfo(skb)->gso_type = 0; + skb_shinfo(skb)->gso_type &= SKB_GSO_SHARED_FRAG; } /* Difference in this won't matter, both ACKed by the same cumul. ACK */ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 667a6adfccf8..367e2ec01da1 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1133,6 +1133,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { + skb_shinfo(skb)->gso_type &= SKB_GSO_SHARED_FRAG; if (skb->len <= mss_now || !sk_can_gso(sk) || skb->ip_summed == CHECKSUM_NONE) { /* Avoid the costly divide in the normal @@ -1140,11 +1141,10 @@ static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, */ skb_shinfo(skb)->gso_segs = 1; skb_shinfo(skb)->gso_size = 0; - skb_shinfo(skb)->gso_type = 0; } else { skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss_now); skb_shinfo(skb)->gso_size = mss_now; - skb_shinfo(skb)->gso_type = sk->sk_gso_type; + skb_shinfo(skb)->gso_type |= sk->sk_gso_type; } } diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index f26f0da7f095..d141fc32a2ea 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -100,6 +100,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_TCPV6 | + SKB_GSO_SHARED_FRAG | 0))) goto out; -- cgit v1.2.3 From e5e67305885eb12849b5475764b0542f03dc2b59 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 8 Feb 2013 10:17:15 +0000 Subject: skbuff: Move definition of NETDEV_FRAG_PAGE_MAX_SIZE In order to address the fact that some devices cannot support the full 32K frag size we need to have the value accessible somewhere so that we can use it to do comparisons against what the device can support. As such I am moving the values out of skbuff.c and into skbuff.h. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++++ net/core/skbuff.c | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0259b719bebf..d7573c37a51d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1832,6 +1832,10 @@ static inline void __skb_queue_purge(struct sk_buff_head *list) kfree_skb(skb); } +#define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768) +#define NETDEV_FRAG_PAGE_MAX_SIZE (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER) +#define NETDEV_PAGECNT_MAX_BIAS NETDEV_FRAG_PAGE_MAX_SIZE + extern void *netdev_alloc_frag(unsigned int fragsz); extern struct sk_buff *__netdev_alloc_skb(struct net_device *dev, diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 55f7ef6ada6d..6114c1143564 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -351,10 +351,6 @@ struct netdev_alloc_cache { }; static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache); -#define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768) -#define NETDEV_FRAG_PAGE_MAX_SIZE (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER) -#define NETDEV_PAGECNT_MAX_BIAS NETDEV_FRAG_PAGE_MAX_SIZE - static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) { struct netdev_alloc_cache *nc; -- cgit v1.2.3 From c9af6db4c11ccc6c3e7f19bbc15d54023956f97c Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 11 Feb 2013 09:27:41 +0000 Subject: net: Fix possible wrong checksum generation. Patch cef401de7be8c4e (net: fix possible wrong checksum generation) fixed wrong checksum calculation but it broke TSO by defining new GSO type but not a netdev feature for that type. net_gso_ok() would not allow hardware checksum/segmentation offload of such packets without the feature. Following patch fixes TSO and wrong checksum. This patch uses same logic that Eric Dumazet used. Patch introduces new flag SKBTX_SHARED_FRAG if at least one frag can be modified by the user. but SKBTX_SHARED_FRAG flag is kept in skb shared info tx_flags rather than gso_type. tx_flags is better compared to gso_type since we can have skb with shared frag without gso packet. It does not link SHARED_FRAG to GSO, So there is no need to define netdev feature for this. Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 4 ++-- drivers/net/tun.c | 13 +++++-------- drivers/net/virtio_net.c | 13 +++++-------- include/linux/skbuff.h | 17 +++++++++-------- net/core/skbuff.c | 5 ++--- net/ipv4/af_inet.c | 1 - net/ipv4/ip_output.c | 1 + net/ipv4/tcp.c | 4 +--- net/ipv4/tcp_input.c | 4 ++-- net/ipv4/tcp_output.c | 4 ++-- net/ipv6/ip6_offload.c | 1 - 11 files changed, 29 insertions(+), 38 deletions(-) (limited to 'include/linux/skbuff.h') diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index b181dfb3d6d6..97243011d319 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -543,7 +543,6 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from, skb->data_len += len; skb->len += len; skb->truesize += truesize; - skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG; atomic_add(truesize, &skb->sk->sk_wmem_alloc); while (len) { int off = base & ~PAGE_MASK; @@ -599,7 +598,7 @@ static int macvtap_skb_from_vnet_hdr(struct sk_buff *skb, if (vnet_hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { skb_shinfo(skb)->gso_size = vnet_hdr->gso_size; - skb_shinfo(skb)->gso_type |= gso_type; + skb_shinfo(skb)->gso_type = gso_type; /* Header must be checked, and gso_segs computed. */ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; @@ -743,6 +742,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, if (zerocopy) { skb_shinfo(skb)->destructor_arg = m->msg_control; skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; + skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; } if (vlan) macvlan_start_xmit(skb, vlan->dev); diff --git a/drivers/net/tun.c b/drivers/net/tun.c index b1038c0e2240..b6f45c5d84d5 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1019,7 +1019,6 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from, skb->data_len += len; skb->len += len; skb->truesize += truesize; - skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG; atomic_add(truesize, &skb->sk->sk_wmem_alloc); while (len) { int off = base & ~PAGE_MASK; @@ -1165,18 +1164,16 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, } if (gso.gso_type != VIRTIO_NET_HDR_GSO_NONE) { - unsigned short gso_type = 0; - pr_debug("GSO!\n"); switch (gso.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { case VIRTIO_NET_HDR_GSO_TCPV4: - gso_type = SKB_GSO_TCPV4; + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; break; case VIRTIO_NET_HDR_GSO_TCPV6: - gso_type = SKB_GSO_TCPV6; + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; break; case VIRTIO_NET_HDR_GSO_UDP: - gso_type = SKB_GSO_UDP; + skb_shinfo(skb)->gso_type = SKB_GSO_UDP; break; default: tun->dev->stats.rx_frame_errors++; @@ -1185,10 +1182,9 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, } if (gso.gso_type & VIRTIO_NET_HDR_GSO_ECN) - gso_type |= SKB_GSO_TCP_ECN; + skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; skb_shinfo(skb)->gso_size = gso.gso_size; - skb_shinfo(skb)->gso_type |= gso_type; if (skb_shinfo(skb)->gso_size == 0) { tun->dev->stats.rx_frame_errors++; kfree_skb(skb); @@ -1204,6 +1200,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, if (zerocopy) { skb_shinfo(skb)->destructor_arg = msg_control; skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; + skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; } skb_reset_network_header(skb); diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 381a2d8d8a81..192c91c8e799 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -227,7 +227,7 @@ static void set_skb_frag(struct sk_buff *skb, struct page *page, skb->len += size; skb->truesize += PAGE_SIZE; skb_shinfo(skb)->nr_frags++; - skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG; + skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; *len -= size; } @@ -387,18 +387,16 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) ntohs(skb->protocol), skb->len, skb->pkt_type); if (hdr->hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) { - unsigned short gso_type = 0; - pr_debug("GSO!\n"); switch (hdr->hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { case VIRTIO_NET_HDR_GSO_TCPV4: - gso_type = SKB_GSO_TCPV4; + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; break; case VIRTIO_NET_HDR_GSO_UDP: - gso_type = SKB_GSO_UDP; + skb_shinfo(skb)->gso_type = SKB_GSO_UDP; break; case VIRTIO_NET_HDR_GSO_TCPV6: - gso_type = SKB_GSO_TCPV6; + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; break; default: net_warn_ratelimited("%s: bad gso type %u.\n", @@ -407,7 +405,7 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) } if (hdr->hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN) - gso_type |= SKB_GSO_TCP_ECN; + skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; skb_shinfo(skb)->gso_size = hdr->hdr.gso_size; if (skb_shinfo(skb)->gso_size == 0) { @@ -415,7 +413,6 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) goto frame_err; } - skb_shinfo(skb)->gso_type |= gso_type; /* Header must be checked, and gso_segs computed. */ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; skb_shinfo(skb)->gso_segs = 0; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d7573c37a51d..9da99520ccd5 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -230,6 +230,13 @@ enum { /* generate wifi status information (where possible) */ SKBTX_WIFI_STATUS = 1 << 4, + + /* This indicates at least one fragment might be overwritten + * (as in vmsplice(), sendfile() ...) + * If we need to compute a TX checksum, we'll need to copy + * all frags to avoid possible bad checksum + */ + SKBTX_SHARED_FRAG = 1 << 5, }; /* @@ -307,13 +314,6 @@ enum { SKB_GSO_TCPV6 = 1 << 4, SKB_GSO_FCOE = 1 << 5, - - /* This indicates at least one fragment might be overwritten - * (as in vmsplice(), sendfile() ...) - * If we need to compute a TX checksum, we'll need to copy - * all frags to avoid possible bad checksum - */ - SKB_GSO_SHARED_FRAG = 1 << 6, }; #if BITS_PER_LONG > 32 @@ -2220,7 +2220,8 @@ static inline int skb_linearize(struct sk_buff *skb) */ static inline bool skb_has_shared_frag(const struct sk_buff *skb) { - return skb_shinfo(skb)->gso_type & SKB_GSO_SHARED_FRAG; + return skb_is_nonlinear(skb) && + skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG; } /** diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 21a22cce6e53..6c1ad09f8796 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2326,8 +2326,7 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) { int pos = skb_headlen(skb); - skb_shinfo(skb1)->gso_type = skb_shinfo(skb)->gso_type; - + skb_shinfo(skb)->tx_flags = skb_shinfo(skb1)->tx_flags & SKBTX_SHARED_FRAG; if (len < pos) /* Split line is inside header. */ skb_split_inside_header(skb, skb1, len, pos); else /* Second chunk has no header, nothing to copy. */ @@ -2833,7 +2832,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) skb_copy_from_linear_data_offset(skb, offset, skb_put(nskb, hsize), hsize); - skb_shinfo(nskb)->gso_type = skb_shinfo(skb)->gso_type; + skb_shinfo(nskb)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG; while (pos < offset + len && i < nfrags) { *frag = skb_shinfo(skb)->frags[i]; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 1aec92bf8018..e6e5d8506336 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1287,7 +1287,6 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_TCP_ECN | - SKB_GSO_SHARED_FRAG | 0))) goto out; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 3e98ed2bff55..5e12dca7b3dd 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -598,6 +598,7 @@ slow_path: /* for offloaded checksums cleanup checksum before fragmentation */ if ((skb->ip_summed == CHECKSUM_PARTIAL) && skb_checksum_help(skb)) goto fail; + iph = ip_hdr(skb); left = skb->len - hlen; /* Space per frame */ ptr = hlen; /* Where to start from */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 801b07b796f0..1f0bedb8622f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -897,8 +897,7 @@ new_segment: get_page(page); skb_fill_page_desc(skb, i, page, offset, copy); } - - skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG; + skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; skb->len += copy; skb->data_len += copy; @@ -3044,7 +3043,6 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_TCPV6 | - SKB_GSO_SHARED_FRAG | 0) || !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) goto out; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d9bfaea34322..a759e19496d2 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1239,13 +1239,13 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, */ if (!skb_shinfo(prev)->gso_size) { skb_shinfo(prev)->gso_size = mss; - skb_shinfo(prev)->gso_type |= sk->sk_gso_type; + skb_shinfo(prev)->gso_type = sk->sk_gso_type; } /* CHECKME: To clear or not to clear? Mimics normal skb currently */ if (skb_shinfo(skb)->gso_segs <= 1) { skb_shinfo(skb)->gso_size = 0; - skb_shinfo(skb)->gso_type &= SKB_GSO_SHARED_FRAG; + skb_shinfo(skb)->gso_type = 0; } /* Difference in this won't matter, both ACKed by the same cumul. ACK */ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 564bf89d9fd3..6182d90e97b0 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1133,7 +1133,6 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { - skb_shinfo(skb)->gso_type &= SKB_GSO_SHARED_FRAG; if (skb->len <= mss_now || !sk_can_gso(sk) || skb->ip_summed == CHECKSUM_NONE) { /* Avoid the costly divide in the normal @@ -1141,10 +1140,11 @@ static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, */ skb_shinfo(skb)->gso_segs = 1; skb_shinfo(skb)->gso_size = 0; + skb_shinfo(skb)->gso_type = 0; } else { skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss_now); skb_shinfo(skb)->gso_size = mss_now; - skb_shinfo(skb)->gso_type |= sk->sk_gso_type; + skb_shinfo(skb)->gso_type = sk->sk_gso_type; } } diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index d141fc32a2ea..f26f0da7f095 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -100,7 +100,6 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_TCPV6 | - SKB_GSO_SHARED_FRAG | 0))) goto out; -- cgit v1.2.3 From 14bbd6a565e1bcdc240d44687edb93f721cfdf99 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 14 Feb 2013 09:44:49 +0000 Subject: net: Add skb_unclone() helper function. This function will be used in next GRE_GSO patch. This patch does not change any functionality. Signed-off-by: Pravin B Shelar Acked-by: Eric Dumazet --- drivers/net/ppp/ppp_generic.c | 3 +-- include/linux/skbuff.h | 10 ++++++++++ net/ipv4/ah4.c | 3 +-- net/ipv4/ip_fragment.c | 2 +- net/ipv4/tcp_output.c | 2 +- net/ipv4/xfrm4_input.c | 2 +- net/ipv4/xfrm4_mode_tunnel.c | 3 +-- net/ipv6/ah6.c | 3 +-- net/ipv6/netfilter/nf_conntrack_reasm.c | 2 +- net/ipv6/reassembly.c | 2 +- net/ipv6/xfrm6_mode_tunnel.c | 3 +-- net/sched/act_ipt.c | 6 ++---- net/sched/act_pedit.c | 3 +-- 13 files changed, 23 insertions(+), 21 deletions(-) (limited to 'include/linux/skbuff.h') diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 0b2706abe3e3..4fd754e74eb2 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -1805,8 +1805,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb) /* the filter instructions are constructed assuming a four-byte PPP header on each packet */ if (ppp->pass_filter || ppp->active_filter) { - if (skb_cloned(skb) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) goto err; *skb_push(skb, 2) = 0; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9da99520ccd5..ca6ee7d93edb 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -804,6 +804,16 @@ static inline int skb_cloned(const struct sk_buff *skb) (atomic_read(&skb_shinfo(skb)->dataref) & SKB_DATAREF_MASK) != 1; } +static inline int skb_unclone(struct sk_buff *skb, gfp_t pri) +{ + might_sleep_if(pri & __GFP_WAIT); + + if (skb_cloned(skb)) + return pskb_expand_head(skb, 0, 0, pri); + + return 0; +} + /** * skb_header_cloned - is the header a clone * @skb: buffer to check diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index a69b4e4a02b5..2e7f1948216f 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -321,8 +321,7 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) /* We are going to _remove_ AH header to keep sockets happy, * so... Later this can change. */ - if (skb_cloned(skb) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) goto out; skb->ip_summed = CHECKSUM_NONE; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 1211613c6c34..b6d30acb600c 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -590,7 +590,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, goto out_oversize; /* Head of list must not be cloned. */ - if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) + if (skb_unclone(head, GFP_ATOMIC)) goto out_nomem; /* If the first fragment is fragmented itself, we split diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 6182d90e97b0..fd0cea114b5d 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1331,7 +1331,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len) /* Remove acked data from a packet in the transmit queue. */ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) { - if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) return -ENOMEM; __pskb_trim_head(skb, len); diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 06814b6216dc..1f12c8b45864 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -132,7 +132,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) * header and optional ESP marker bytes) and then modify the * protocol to ESP, and then call into the transform receiver. */ - if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) goto drop; /* Now we can update and verify the packet length... */ diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index ddee0a099a2c..1162ace30838 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -142,8 +142,7 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) for_each_input_rcu(rcv_notify_handlers, handler) handler->handler(skb); - if (skb_cloned(skb) && - (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + if (err = skb_unclone(skb, GFP_ATOMIC)) goto out; if (x->props.flags & XFRM_STATE_DECAP_DSCP) diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 384233188ac1..bb02e176cb70 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -521,8 +521,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) /* We are going to _remove_ AH header to keep sockets happy, * so... Later this can change. */ - if (skb_cloned(skb) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) goto out; skb->ip_summed = CHECKSUM_NONE; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index c674f158efa8..b89a8c3186cd 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -368,7 +368,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) } /* Head of list must not be cloned. */ - if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) { + if (skb_unclone(head, GFP_ATOMIC)) { pr_debug("skb is cloned but can't expand head"); goto out_oom; } diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index bab2c270f292..e354743ed426 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -404,7 +404,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, goto out_oversize; /* Head of list must not be cloned. */ - if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) + if (skb_unclone(head, GFP_ATOMIC)) goto out_oom; /* If the first fragment is fragmented itself, we split diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 9f2095b19ad0..93c41a81c4c3 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -69,8 +69,7 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto out; - if (skb_cloned(skb) && - (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + if (err = skb_unclone(skb, GFP_ATOMIC)) goto out; if (x->props.flags & XFRM_STATE_DECAP_DSCP) diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 0fb9e3f567e6..e0f6de64afec 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -207,10 +207,8 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a, struct tcf_ipt *ipt = a->priv; struct xt_action_param par; - if (skb_cloned(skb)) { - if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) - return TC_ACT_UNSPEC; - } + if (skb_unclone(skb, GFP_ATOMIC)) + return TC_ACT_UNSPEC; spin_lock(&ipt->tcf_lock); diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 0c3faddf3f2c..7ed78c9e505c 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -131,8 +131,7 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a, int i, munged = 0; unsigned int off; - if (skb_cloned(skb) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) return p->tcf_action; off = skb_network_offset(skb); -- cgit v1.2.3 From 68c331631143f5f039baac99a650e0b9e1ea02b6 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 14 Feb 2013 14:02:41 +0000 Subject: v4 GRE: Add TCP segmentation offload for GRE Following patch adds GRE protocol offload handler so that skb_gso_segment() can segment GRE packets. SKB GSO CB is added to keep track of total header length so that skb_segment can push entire header. e.g. in case of GRE, skb_segment need to push inner and outer headers to every segment. New NETIF_F_GRE_GSO feature is added for devices which support HW GRE TSO offload. Currently none of devices support it therefore GRE GSO always fall backs to software GSO. [ Compute pkt_len before ip_local_out() invocation. -DaveM ] Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 3 +- include/linux/skbuff.h | 17 ++++++ net/core/dev.c | 1 + net/core/ethtool.c | 1 + net/core/skbuff.c | 6 +- net/ipv4/af_inet.c | 1 + net/ipv4/gre.c | 118 ++++++++++++++++++++++++++++++++++++++++ net/ipv4/ip_gre.c | 82 +++++++++++++++++++++++++--- net/ipv4/tcp.c | 1 + net/ipv4/udp.c | 3 +- net/ipv6/ip6_offload.c | 1 + net/ipv6/udp_offload.c | 3 +- 12 files changed, 226 insertions(+), 11 deletions(-) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 5ac32123035a..3dd39340430e 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -41,7 +41,7 @@ enum { NETIF_F_TSO_ECN_BIT, /* ... TCP ECN support */ NETIF_F_TSO6_BIT, /* ... TCPv6 segmentation */ NETIF_F_FSO_BIT, /* ... FCoE segmentation */ - NETIF_F_GSO_RESERVED1, /* ... free (fill GSO_MASK to 8 bits) */ + NETIF_F_GSO_GRE_BIT, /* ... GRE with TSO */ /**/NETIF_F_GSO_LAST, /* [can't be last bit, see GSO_MASK] */ NETIF_F_GSO_RESERVED2 /* ... free (fill GSO_MASK to 8 bits) */ = NETIF_F_GSO_LAST, @@ -102,6 +102,7 @@ enum { #define NETIF_F_VLAN_CHALLENGED __NETIF_F(VLAN_CHALLENGED) #define NETIF_F_RXFCS __NETIF_F(RXFCS) #define NETIF_F_RXALL __NETIF_F(RXALL) +#define NETIF_F_GRE_GSO __NETIF_F(GSO_GRE) /* Features valid for ethtool to change */ /* = all defined minus driver/device-class-related */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index ca6ee7d93edb..821c7f45d2a7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -314,6 +314,8 @@ enum { SKB_GSO_TCPV6 = 1 << 4, SKB_GSO_FCOE = 1 << 5, + + SKB_GSO_GRE = 1 << 6, }; #if BITS_PER_LONG > 32 @@ -2732,6 +2734,21 @@ static inline struct sec_path *skb_sec_path(struct sk_buff *skb) } #endif +/* Keeps track of mac header offset relative to skb->head. + * It is useful for TSO of Tunneling protocol. e.g. GRE. + * For non-tunnel skb it points to skb_mac_header() and for + * tunnel skb it points to outer mac header. */ +struct skb_gso_cb { + int mac_offset; +}; +#define SKB_GSO_CB(skb) ((struct skb_gso_cb *)(skb)->cb) + +static inline int skb_tnl_header_len(const struct sk_buff *inner_skb) +{ + return (skb_mac_header(inner_skb) - inner_skb->head) - + SKB_GSO_CB(inner_skb)->mac_offset; +} + static inline bool skb_is_gso(const struct sk_buff *skb) { return skb_shinfo(skb)->gso_size; diff --git a/net/core/dev.c b/net/core/dev.c index 67deae60214c..1cd6297fd34b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2413,6 +2413,7 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb, return ERR_PTR(err); } + SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb); skb_reset_mac_header(skb); skb_reset_mac_len(skb); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index d9d55209db67..3e9b2c3e30f0 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -77,6 +77,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation", [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation", [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", + [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation", [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", [NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp", diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 6c1ad09f8796..2a3ca33c30aa 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2738,6 +2738,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) unsigned int mss = skb_shinfo(skb)->gso_size; unsigned int doffset = skb->data - skb_mac_header(skb); unsigned int offset = doffset; + unsigned int tnl_hlen = skb_tnl_header_len(skb); unsigned int headroom; unsigned int len; int sg = !!(features & NETIF_F_SG); @@ -2814,7 +2815,10 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) skb_set_network_header(nskb, skb->mac_len); nskb->transport_header = (nskb->network_header + skb_network_header_len(skb)); - skb_copy_from_linear_data(skb, nskb->data, doffset); + + skb_copy_from_linear_data_offset(skb, -tnl_hlen, + nskb->data - tnl_hlen, + doffset + tnl_hlen); if (fskb != skb_shinfo(skb)->frag_list) continue; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index e6e5d8506336..e225a4e5b572 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1287,6 +1287,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_TCP_ECN | + SKB_GSO_GRE | 0))) goto out; diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c index 42a491055c76..7a4c710c4cdd 100644 --- a/net/ipv4/gre.c +++ b/net/ipv4/gre.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -26,6 +27,11 @@ static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly; static DEFINE_SPINLOCK(gre_proto_lock); +struct gre_base_hdr { + __be16 flags; + __be16 protocol; +}; +#define GRE_HEADER_SECTION 4 int gre_add_protocol(const struct gre_protocol *proto, u8 version) { @@ -112,12 +118,117 @@ static void gre_err(struct sk_buff *skb, u32 info) rcu_read_unlock(); } +static struct sk_buff *gre_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + netdev_features_t enc_features; + int ghl = GRE_HEADER_SECTION; + struct gre_base_hdr *greh; + int mac_len = skb->mac_len; + int tnl_hlen; + bool csum; + + if (unlikely(skb_shinfo(skb)->gso_type & + ~(SKB_GSO_TCPV4 | + SKB_GSO_TCPV6 | + SKB_GSO_UDP | + SKB_GSO_DODGY | + SKB_GSO_TCP_ECN | + SKB_GSO_GRE))) + goto out; + + if (unlikely(!pskb_may_pull(skb, sizeof(*greh)))) + goto out; + + greh = (struct gre_base_hdr *)skb_transport_header(skb); + + if (greh->flags & GRE_KEY) + ghl += GRE_HEADER_SECTION; + if (greh->flags & GRE_SEQ) + ghl += GRE_HEADER_SECTION; + if (greh->flags & GRE_CSUM) { + ghl += GRE_HEADER_SECTION; + csum = true; + } else + csum = false; + + /* setup inner skb. */ + if (greh->protocol == htons(ETH_P_TEB)) { + struct ethhdr *eth = eth_hdr(skb); + skb->protocol = eth->h_proto; + } else { + skb->protocol = greh->protocol; + } + + skb->encapsulation = 0; + + if (unlikely(!pskb_may_pull(skb, ghl))) + goto out; + __skb_pull(skb, ghl); + skb_reset_mac_header(skb); + skb_set_network_header(skb, skb_inner_network_offset(skb)); + skb->mac_len = skb_inner_network_offset(skb); + + /* segment inner packet. */ + enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); + segs = skb_mac_gso_segment(skb, enc_features); + if (!segs || IS_ERR(segs)) + goto out; + + skb = segs; + tnl_hlen = skb_tnl_header_len(skb); + do { + __skb_push(skb, ghl); + if (csum) { + __be32 *pcsum; + + if (skb_has_shared_frag(skb)) { + int err; + + err = __skb_linearize(skb); + if (err) { + kfree_skb(segs); + segs = ERR_PTR(err); + goto out; + } + } + + greh = (struct gre_base_hdr *)(skb->data); + pcsum = (__be32 *)(greh + 1); + *pcsum = 0; + *(__sum16 *)pcsum = csum_fold(skb_checksum(skb, 0, skb->len, 0)); + } + __skb_push(skb, tnl_hlen - ghl); + + skb_reset_mac_header(skb); + skb_set_network_header(skb, mac_len); + skb->mac_len = mac_len; + } while ((skb = skb->next)); +out: + return segs; +} + +static int gre_gso_send_check(struct sk_buff *skb) +{ + if (!skb->encapsulation) + return -EINVAL; + return 0; +} + static const struct net_protocol net_gre_protocol = { .handler = gre_rcv, .err_handler = gre_err, .netns_ok = 1, }; +static const struct net_offload gre_offload = { + .callbacks = { + .gso_send_check = gre_gso_send_check, + .gso_segment = gre_gso_segment, + }, +}; + static int __init gre_init(void) { pr_info("GRE over IPv4 demultiplexor driver\n"); @@ -127,11 +238,18 @@ static int __init gre_init(void) return -EAGAIN; } + if (inet_add_offload(&gre_offload, IPPROTO_GRE)) { + pr_err("can't add protocol offload\n"); + inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); + return -EAGAIN; + } + return 0; } static void __exit gre_exit(void) { + inet_del_offload(&gre_offload, IPPROTO_GRE); inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 00a14b9864ea..a56f1182c176 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -735,8 +735,33 @@ drop: return 0; } +static struct sk_buff *handle_offloads(struct sk_buff *skb) +{ + int err; + + if (skb_is_gso(skb)) { + err = skb_unclone(skb, GFP_ATOMIC); + if (unlikely(err)) + goto error; + skb_shinfo(skb)->gso_type |= SKB_GSO_GRE; + return skb; + } else if (skb->ip_summed == CHECKSUM_PARTIAL) { + err = skb_checksum_help(skb); + if (unlikely(err)) + goto error; + } + skb->ip_summed = CHECKSUM_NONE; + + return skb; + +error: + kfree_skb(skb); + return ERR_PTR(err); +} + static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { + struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats); struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *old_iph; const struct iphdr *tiph; @@ -751,10 +776,19 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev __be32 dst; int mtu; u8 ttl; + int err; + int pkt_len; - if (skb->ip_summed == CHECKSUM_PARTIAL && - skb_checksum_help(skb)) - goto tx_error; + skb = handle_offloads(skb); + if (IS_ERR(skb)) { + dev->stats.tx_dropped++; + return NETDEV_TX_OK; + } + + if (!skb->encapsulation) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + } old_iph = ip_hdr(skb); @@ -855,7 +889,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev if (skb->protocol == htons(ETH_P_IP)) { df |= (old_iph->frag_off&htons(IP_DF)); - if ((old_iph->frag_off&htons(IP_DF)) && + if (!skb_is_gso(skb) && + (old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) { icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); ip_rt_put(rt); @@ -875,7 +910,9 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev } } - if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) { + if (!skb_is_gso(skb) && + mtu >= IPV6_MIN_MTU && + mtu < skb->len - tunnel->hlen + gre_hlen) { icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); ip_rt_put(rt); goto tx_error; @@ -936,6 +973,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev iph->daddr = fl4.daddr; iph->saddr = fl4.saddr; iph->ttl = ttl; + iph->id = 0; if (ttl == 0) { if (skb->protocol == htons(ETH_P_IP)) @@ -964,9 +1002,19 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev *ptr = tunnel->parms.o_key; ptr--; } - if (tunnel->parms.o_flags&GRE_CSUM) { + /* Skip GRE checksum if skb is getting offloaded. */ + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE) && + (tunnel->parms.o_flags&GRE_CSUM)) { int offset = skb_transport_offset(skb); + if (skb_has_shared_frag(skb)) { + err = __skb_linearize(skb); + if (err) { + ip_rt_put(rt); + goto tx_error; + } + } + *ptr = 0; *(__sum16 *)ptr = csum_fold(skb_checksum(skb, offset, skb->len - offset, @@ -974,7 +1022,19 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev } } - iptunnel_xmit(skb, dev); + nf_reset(skb); + + pkt_len = skb->len - skb_transport_offset(skb); + err = ip_local_out(skb); + if (likely(net_xmit_eval(err) == 0)) { + u64_stats_update_begin(&tstats->syncp); + tstats->tx_bytes += pkt_len; + tstats->tx_packets++; + u64_stats_update_end(&tstats->syncp); + } else { + dev->stats.tx_errors++; + dev->stats.tx_aborted_errors++; + } return NETDEV_TX_OK; #if IS_ENABLED(CONFIG_IPV6) @@ -1044,6 +1104,11 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev) mtu = 68; tunnel->hlen = addend; + /* TCP offload with GRE SEQ is not supported. */ + if (!(tunnel->parms.o_flags & GRE_SEQ)) { + dev->features |= NETIF_F_GSO_SOFTWARE; + dev->hw_features |= NETIF_F_GSO_SOFTWARE; + } return mtu; } @@ -1593,6 +1658,9 @@ static void ipgre_tap_setup(struct net_device *dev) dev->iflink = 0; dev->features |= NETIF_F_NETNS_LOCAL; + + dev->features |= GRE_FEATURES; + dev->hw_features |= GRE_FEATURES; } static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1f0bedb8622f..7a5ba48c2cc9 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3043,6 +3043,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_TCPV6 | + SKB_GSO_GRE | 0) || !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) goto out; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 6791aac06ea9..39a5e7a9a77f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2305,7 +2305,8 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, /* Packet is from an untrusted source, reset gso_segs. */ int type = skb_shinfo(skb)->gso_type; - if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) || + if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | + SKB_GSO_GRE) || !(type & (SKB_GSO_UDP)))) goto out; diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index f26f0da7f095..8234c1dcdf72 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -99,6 +99,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, ~(SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_TCP_ECN | + SKB_GSO_GRE | SKB_GSO_TCPV6 | 0))) goto out; diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 0c8934a317c2..cf05cf073c51 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -56,7 +56,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, /* Packet is from an untrusted source, reset gso_segs. */ int type = skb_shinfo(skb)->gso_type; - if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) || + if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | + SKB_GSO_GRE) || !(type & (SKB_GSO_UDP)))) goto out; -- cgit v1.2.3