From 4ed2d765dfaccff5ebdac68e2064b59125033a3b Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 4 Aug 2014 22:11:49 -0400 Subject: net-timestamp: TCP timestamping TCP timestamping extends SO_TIMESTAMPING to bytestreams. Bytestreams do not have a 1:1 relationship between send() buffers and network packets. The feature interprets a send call on a bytestream as a request for a timestamp for the last byte in that send() buffer. The choice corresponds to a request for a timestamp when all bytes in the buffer have been sent. That assumption depends on in-order kernel transmission. This is the common case. That said, it is possible to construct a traffic shaping tree that would result in reordering. The guarantee is strong, then, but not ironclad. This implementation supports send and sendpages (splice). GSO replaces one large packet with multiple smaller packets. This patch also copies the option into the correct smaller packet. This patch does not yet support timestamping on data in an initial TCP Fast Open SYN, because that takes a very different data path. If ID generation in ee_data is enabled, bytestream timestamps return a byte offset, instead of the packet counter for datagrams. The implementation supports a single timestamp per packet. It silenty replaces requests for previous timestamps. To avoid missing tstamps, flush the tcp queue by disabling Nagle, cork and autocork. Missing tstamps can be detected by offset when the ee_data ID is enabled. Implementation details: - On GSO, the timestamping code can be included in the main loop. I moved it into its own loop to reduce the impact on the common case to a single branch. - To avoid leaking the absolute seqno to userspace, the offset returned in ee_data must always be relative. It is an offset between an skb and sk field. The first is always set (also for GSO & ACK). The second must also never be uninitialized. Only allow the ID option on sockets in the ESTABLISHED state, for which the seqno is available. Never reset it to zero (instead, move it to the current seqno when reenabling the option). Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/ipv4/tcp_offload.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'net/ipv4/tcp_offload.c') diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 55046ecd083e..f597119fc4e7 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -14,6 +14,21 @@ #include #include +void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq, unsigned int seq, + unsigned int mss) +{ + while (skb) { + if (ts_seq < (__u64) seq + mss) { + skb_shinfo(skb)->tx_flags = SKBTX_SW_TSTAMP; + skb_shinfo(skb)->tskey = ts_seq; + return; + } + + skb = skb->next; + seq += mss; + } +} + struct sk_buff *tcp_gso_segment(struct sk_buff *skb, netdev_features_t features) { @@ -91,6 +106,9 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, th = tcp_hdr(skb); seq = ntohl(th->seq); + if (unlikely(skb_shinfo(gso_skb)->tx_flags & SKBTX_SW_TSTAMP)) + tcp_gso_tstamp(segs, skb_shinfo(gso_skb)->tskey, seq, mss); + newcheck = ~csum_fold((__force __wsum)((__force u32)th->check + (__force u32)delta)); -- cgit v1.2.3 From f066e2b091a50f0b76ade87250065d65996b93dd Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 6 Aug 2014 15:09:44 -0400 Subject: net-timestamp: cumulative tcp timestamping fixes A set of small fixes pointed out just after the merge: - make tcp_tx_timestamp static - make tcp_gso_tstamp static - use before() to compare TCP seqno, instead of cast to u64 - add tstamp to tx_flags in GSO, instead of overwrite tx_flags - record skb_shinfo(skb)->tskey for all timestamps, also HW. - optimization in tcp_tx_timestamp: call sock_tx_timestamp only if a tstamp option is set. Signed-off-by: Willem de Bruijn Fixes: 4ed2d765dfac ("net-timestamp: TCP timestamping") Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 12 +++++++----- net/ipv4/tcp_offload.c | 8 ++++---- 2 files changed, 11 insertions(+), 9 deletions(-) (limited to 'net/ipv4/tcp_offload.c') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 744af67a5989..181b70ebd964 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -426,13 +426,15 @@ void tcp_init_sock(struct sock *sk) } EXPORT_SYMBOL(tcp_init_sock); -void tcp_tx_timestamp(struct sock *sk, struct sk_buff *skb) +static void tcp_tx_timestamp(struct sock *sk, struct sk_buff *skb) { - struct skb_shared_info *shinfo = skb_shinfo(skb); + if (sk->sk_tsflags) { + struct skb_shared_info *shinfo = skb_shinfo(skb); - sock_tx_timestamp(sk, &shinfo->tx_flags); - if (shinfo->tx_flags & SKBTX_ANY_SW_TSTAMP) - shinfo->tskey = TCP_SKB_CB(skb)->seq + skb->len - 1; + sock_tx_timestamp(sk, &shinfo->tx_flags); + if (shinfo->tx_flags & SKBTX_ANY_TSTAMP) + shinfo->tskey = TCP_SKB_CB(skb)->seq + skb->len - 1; + } } /* diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index f597119fc4e7..bc1b83cb8309 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -14,12 +14,12 @@ #include #include -void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq, unsigned int seq, - unsigned int mss) +static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq, + unsigned int seq, unsigned int mss) { while (skb) { - if (ts_seq < (__u64) seq + mss) { - skb_shinfo(skb)->tx_flags = SKBTX_SW_TSTAMP; + if (before(ts_seq, seq + mss)) { + skb_shinfo(skb)->tx_flags |= SKBTX_SW_TSTAMP; skb_shinfo(skb)->tskey = ts_seq; return; } -- cgit v1.2.3