diff options
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r-- | net/ipv4/tcp_output.c | 135 |
1 files changed, 91 insertions, 44 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index cfe128b81a01..660eac4bf2a7 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1530,7 +1530,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *buff; - int nsize, old_factor; + int old_factor; long limit; int nlen; u8 flags; @@ -1538,9 +1538,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, if (WARN_ON(len > skb->len)) return -EINVAL; - nsize = skb_headlen(skb) - len; - if (nsize < 0) - nsize = 0; + DEBUG_NET_WARN_ON_ONCE(skb_headlen(skb)); /* tcp_sendmsg() can overshoot sk_wmem_queued by one full size skb. * We need some allowance to not penalize applications setting small @@ -1560,7 +1558,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, return -ENOMEM; /* Get a new skb... force flag on. */ - buff = tcp_stream_alloc_skb(sk, nsize, gfp, true); + buff = tcp_stream_alloc_skb(sk, gfp, true); if (!buff) return -ENOMEM; /* We'll just try again later. */ skb_copy_decrypted(buff, skb); @@ -1568,7 +1566,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, sk_wmem_queued_add(sk, buff->truesize); sk_mem_charge(sk, buff->truesize); - nlen = skb->len - len - nsize; + nlen = skb->len - len; buff->truesize += nlen; skb->truesize -= nlen; @@ -1626,13 +1624,7 @@ static int __pskb_trim_head(struct sk_buff *skb, int len) struct skb_shared_info *shinfo; int i, k, eat; - eat = min_t(int, len, skb_headlen(skb)); - if (eat) { - __skb_pull(skb, eat); - len -= eat; - if (!len) - return 0; - } + DEBUG_NET_WARN_ON_ONCE(skb_headlen(skb)); eat = len; k = 0; shinfo = skb_shinfo(skb); @@ -1671,12 +1663,10 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) TCP_SKB_CB(skb)->seq += len; - if (delta_truesize) { - skb->truesize -= delta_truesize; - sk_wmem_queued_add(sk, -delta_truesize); - if (!skb_zcopy_pure(skb)) - sk_mem_uncharge(sk, delta_truesize); - } + skb->truesize -= delta_truesize; + sk_wmem_queued_add(sk, -delta_truesize); + if (!skb_zcopy_pure(skb)) + sk_mem_uncharge(sk, delta_truesize); /* Any change of skb->len requires recalculation of tso factor. */ if (tcp_skb_pcount(skb) > 1) @@ -2126,11 +2116,9 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, u8 flags; /* All of a TSO frame must be composed of paged data. */ - if (skb->len != skb->data_len) - return tcp_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE, - skb, len, mss_now, gfp); + DEBUG_NET_WARN_ON_ONCE(skb->len != skb->data_len); - buff = tcp_stream_alloc_skb(sk, 0, gfp, true); + buff = tcp_stream_alloc_skb(sk, gfp, true); if (unlikely(!buff)) return -ENOMEM; skb_copy_decrypted(buff, skb); @@ -2319,6 +2307,57 @@ static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len) return true; } +static int tcp_clone_payload(struct sock *sk, struct sk_buff *to, + int probe_size) +{ + skb_frag_t *lastfrag = NULL, *fragto = skb_shinfo(to)->frags; + int i, todo, len = 0, nr_frags = 0; + const struct sk_buff *skb; + + if (!sk_wmem_schedule(sk, to->truesize + probe_size)) + return -ENOMEM; + + skb_queue_walk(&sk->sk_write_queue, skb) { + const skb_frag_t *fragfrom = skb_shinfo(skb)->frags; + + if (skb_headlen(skb)) + return -EINVAL; + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++, fragfrom++) { + if (len >= probe_size) + goto commit; + todo = min_t(int, skb_frag_size(fragfrom), + probe_size - len); + len += todo; + if (lastfrag && + skb_frag_page(fragfrom) == skb_frag_page(lastfrag) && + skb_frag_off(fragfrom) == skb_frag_off(lastfrag) + + skb_frag_size(lastfrag)) { + skb_frag_size_add(lastfrag, todo); + continue; + } + if (unlikely(nr_frags == MAX_SKB_FRAGS)) + return -E2BIG; + skb_frag_page_copy(fragto, fragfrom); + skb_frag_off_copy(fragto, fragfrom); + skb_frag_size_set(fragto, todo); + nr_frags++; + lastfrag = fragto++; + } + } +commit: + WARN_ON_ONCE(len != probe_size); + for (i = 0; i < nr_frags; i++) + skb_frag_ref(to, i); + + skb_shinfo(to)->nr_frags = nr_frags; + to->truesize += probe_size; + to->len += probe_size; + to->data_len += probe_size; + __skb_header_release(to); + return 0; +} + /* Create a new MTU probe if we are ready. * MTU probe is regularly attempting to increase the path MTU by * deliberately sending larger packets. This discovers routing @@ -2395,9 +2434,15 @@ static int tcp_mtu_probe(struct sock *sk) return -1; /* We're allowed to probe. Build it now. */ - nskb = tcp_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false); + nskb = tcp_stream_alloc_skb(sk, GFP_ATOMIC, false); if (!nskb) return -1; + + /* build the payload, and be prepared to abort if this fails. */ + if (tcp_clone_payload(sk, nskb, probe_size)) { + consume_skb(nskb); + return -1; + } sk_wmem_queued_add(sk, nskb->truesize); sk_mem_charge(sk, nskb->truesize); @@ -2415,7 +2460,6 @@ static int tcp_mtu_probe(struct sock *sk) len = 0; tcp_for_write_queue_from_safe(skb, next, sk) { copy = min_t(int, skb->len, probe_size - len); - skb_copy_bits(skb, 0, skb_put(nskb, copy), copy); if (skb->len <= copy) { /* We've eaten all the data from this skb. @@ -2431,12 +2475,8 @@ static int tcp_mtu_probe(struct sock *sk) } else { TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags & ~(TCPHDR_FIN|TCPHDR_PSH); - if (!skb_shinfo(skb)->nr_frags) { - skb_pull(skb, copy); - } else { - __pskb_trim_head(skb, copy); - tcp_set_skb_tso_segs(skb, mss_now); - } + __pskb_trim_head(skb, copy); + tcp_set_skb_tso_segs(skb, mss_now); TCP_SKB_CB(skb)->seq += copy; } @@ -3746,8 +3786,9 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct tcp_fastopen_request *fo = tp->fastopen_req; - int space, err = 0; + struct page_frag *pfrag = sk_page_frag(sk); struct sk_buff *syn_data; + int space, err = 0; tp->rx_opt.mss_clamp = tp->advmss; /* If MSS is not cached */ if (!tcp_fastopen_cookie_check(sk, &tp->rx_opt.mss_clamp, &fo->cookie)) @@ -3766,25 +3807,31 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) space = min_t(size_t, space, fo->size); - /* limit to order-0 allocations */ - space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER)); - - syn_data = tcp_stream_alloc_skb(sk, space, sk->sk_allocation, false); + if (space && + !skb_page_frag_refill(min_t(size_t, space, PAGE_SIZE), + pfrag, sk->sk_allocation)) + goto fallback; + syn_data = tcp_stream_alloc_skb(sk, sk->sk_allocation, false); if (!syn_data) goto fallback; memcpy(syn_data->cb, syn->cb, sizeof(syn->cb)); if (space) { - int copied = copy_from_iter(skb_put(syn_data, space), space, - &fo->data->msg_iter); - if (unlikely(!copied)) { + space = min_t(size_t, space, pfrag->size - pfrag->offset); + space = tcp_wmem_schedule(sk, space); + } + if (space) { + space = copy_page_from_iter(pfrag->page, pfrag->offset, + space, &fo->data->msg_iter); + if (unlikely(!space)) { tcp_skb_tsorted_anchor_cleanup(syn_data); kfree_skb(syn_data); goto fallback; } - if (copied != space) { - skb_trim(syn_data, copied); - space = copied; - } + skb_fill_page_desc(syn_data, 0, pfrag->page, + pfrag->offset, space); + page_ref_inc(pfrag->page); + pfrag->offset += space; + skb_len_add(syn_data, space); skb_zcopy_set(syn_data, fo->uarg, NULL); } /* No more data pending in inet_wait_for_connect() */ @@ -3849,7 +3896,7 @@ int tcp_connect(struct sock *sk) return 0; } - buff = tcp_stream_alloc_skb(sk, 0, sk->sk_allocation, true); + buff = tcp_stream_alloc_skb(sk, sk->sk_allocation, true); if (unlikely(!buff)) return -ENOBUFS; |