summaryrefslogtreecommitdiff
path: root/net/ipv4/tcp_ipv4.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
-rw-r--r--net/ipv4/tcp_ipv4.c220
1 files changed, 101 insertions, 119 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 8f8527d41682..3c8d9b6f1ea4 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -146,13 +146,15 @@ EXPORT_SYMBOL_GPL(tcp_twsk_unique);
/* This will initiate an outgoing connection. */
int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
+ struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
struct inet_sock *inet = inet_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
- struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
- struct rtable *rt;
+ __be16 orig_sport, orig_dport;
__be32 daddr, nexthop;
- int tmp;
+ struct flowi4 *fl4;
+ struct rtable *rt;
int err;
+ struct ip_options_rcu *inet_opt;
if (addr_len < sizeof(struct sockaddr_in))
return -EINVAL;
@@ -161,20 +163,26 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
return -EAFNOSUPPORT;
nexthop = daddr = usin->sin_addr.s_addr;
- if (inet->opt && inet->opt->srr) {
+ inet_opt = rcu_dereference_protected(inet->inet_opt,
+ sock_owned_by_user(sk));
+ if (inet_opt && inet_opt->opt.srr) {
if (!daddr)
return -EINVAL;
- nexthop = inet->opt->faddr;
- }
-
- tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr,
- RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
- IPPROTO_TCP,
- inet->inet_sport, usin->sin_port, sk, 1);
- if (tmp < 0) {
- if (tmp == -ENETUNREACH)
+ nexthop = inet_opt->opt.faddr;
+ }
+
+ orig_sport = inet->inet_sport;
+ orig_dport = usin->sin_port;
+ fl4 = &inet->cork.fl.u.ip4;
+ rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
+ RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
+ IPPROTO_TCP,
+ orig_sport, orig_dport, sk, true);
+ if (IS_ERR(rt)) {
+ err = PTR_ERR(rt);
+ if (err == -ENETUNREACH)
IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
- return tmp;
+ return err;
}
if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
@@ -182,11 +190,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
return -ENETUNREACH;
}
- if (!inet->opt || !inet->opt->srr)
- daddr = rt->rt_dst;
+ if (!inet_opt || !inet_opt->opt.srr)
+ daddr = fl4->daddr;
if (!inet->inet_saddr)
- inet->inet_saddr = rt->rt_src;
+ inet->inet_saddr = fl4->saddr;
inet->inet_rcv_saddr = inet->inet_saddr;
if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
@@ -197,8 +205,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
}
if (tcp_death_row.sysctl_tw_recycle &&
- !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
- struct inet_peer *peer = rt_get_peer(rt);
+ !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) {
+ struct inet_peer *peer = rt_get_peer(rt, fl4->daddr);
/*
* VJ's idea. We save last timestamp seen from
* the destination in peer table, when entering state
@@ -218,8 +226,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->inet_daddr = daddr;
inet_csk(sk)->icsk_ext_hdr_len = 0;
- if (inet->opt)
- inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
+ if (inet_opt)
+ inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
@@ -233,11 +241,13 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (err)
goto failure;
- err = ip_route_newports(&rt, IPPROTO_TCP,
- inet->inet_sport, inet->inet_dport, sk);
- if (err)
+ rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
+ inet->inet_sport, inet->inet_dport, sk);
+ if (IS_ERR(rt)) {
+ err = PTR_ERR(rt);
+ rt = NULL;
goto failure;
-
+ }
/* OK, now commit destination to socket. */
sk->sk_gso_type = SKB_GSO_TCPV4;
sk_setup_caps(sk, &rt->dst);
@@ -273,7 +283,7 @@ EXPORT_SYMBOL(tcp_v4_connect);
/*
* This routine does path mtu discovery as defined in RFC1191.
*/
-static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
+static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu)
{
struct dst_entry *dst;
struct inet_sock *inet = inet_sk(sk);
@@ -335,7 +345,7 @@ static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
{
- struct iphdr *iph = (struct iphdr *)icmp_skb->data;
+ const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
struct inet_connection_sock *icsk;
struct tcp_sock *tp;
@@ -415,6 +425,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
!icsk->icsk_backoff)
break;
+ if (sock_owned_by_user(sk))
+ break;
+
icsk->icsk_backoff--;
inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) <<
icsk->icsk_backoff;
@@ -429,11 +442,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
if (remaining) {
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
remaining, TCP_RTO_MAX);
- } else if (sock_owned_by_user(sk)) {
- /* RTO revert clocked out retransmission,
- * but socket is locked. Will defer. */
- inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
- HZ/20, TCP_RTO_MAX);
} else {
/* RTO revert clocked out retransmission.
* Will retransmit now */
@@ -643,7 +651,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
net = dev_net(skb_dst(skb)->dev);
- ip_send_reply(net->ipv4.tcp_sock, skb,
+ ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
&arg, arg.iov[0].iov_len);
TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
@@ -718,7 +726,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
if (oif)
arg.bound_dev_if = oif;
- ip_send_reply(net->ipv4.tcp_sock, skb,
+ ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
&arg, arg.iov[0].iov_len);
TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
@@ -761,11 +769,12 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
struct request_values *rvp)
{
const struct inet_request_sock *ireq = inet_rsk(req);
+ struct flowi4 fl4;
int err = -1;
struct sk_buff * skb;
/* First, grab a route. */
- if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
+ if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
return -1;
skb = tcp_make_synack(sk, dst, req, rvp);
@@ -816,17 +825,18 @@ static void syn_flood_warning(const struct sk_buff *skb)
/*
* Save and compile IPv4 options into the request_sock if needed.
*/
-static struct ip_options *tcp_v4_save_options(struct sock *sk,
- struct sk_buff *skb)
+static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk,
+ struct sk_buff *skb)
{
- struct ip_options *opt = &(IPCB(skb)->opt);
- struct ip_options *dopt = NULL;
+ const struct ip_options *opt = &(IPCB(skb)->opt);
+ struct ip_options_rcu *dopt = NULL;
if (opt && opt->optlen) {
- int opt_size = optlength(opt);
+ int opt_size = sizeof(*dopt) + opt->optlen;
+
dopt = kmalloc(opt_size, GFP_ATOMIC);
if (dopt) {
- if (ip_options_echo(dopt, skb)) {
+ if (ip_options_echo(&dopt->opt, skb)) {
kfree(dopt);
dopt = NULL;
}
@@ -1212,12 +1222,6 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
};
#endif
-static struct timewait_sock_ops tcp_timewait_sock_ops = {
- .twsk_obj_size = sizeof(struct tcp_timewait_sock),
- .twsk_unique = tcp_twsk_unique,
- .twsk_destructor= tcp_twsk_destructor,
-};
-
int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
{
struct tcp_extend_values tmp_ext;
@@ -1335,6 +1339,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
req->cookie_ts = tmp_opt.tstamp_ok;
} else if (!isn) {
struct inet_peer *peer = NULL;
+ struct flowi4 fl4;
/* VJ's idea. We save last timestamp seen
* from the destination in peer table, when entering
@@ -1347,9 +1352,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
*/
if (tmp_opt.saw_tstamp &&
tcp_death_row.sysctl_tw_recycle &&
- (dst = inet_csk_route_req(sk, req)) != NULL &&
- (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
- peer->v4daddr == saddr) {
+ (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
+ fl4.daddr == saddr &&
+ (peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) {
inet_peer_refcheck(peer);
if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
(s32)(peer->tcp_ts - req->ts_recent) >
@@ -1413,19 +1418,16 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
#ifdef CONFIG_TCP_MD5SIG
struct tcp_md5sig_key *key;
#endif
+ struct ip_options_rcu *inet_opt;
if (sk_acceptq_is_full(sk))
goto exit_overflow;
- if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
- goto exit;
-
newsk = tcp_create_openreq_child(sk, req, skb);
if (!newsk)
goto exit_nonewsk;
newsk->sk_gso_type = SKB_GSO_TCPV4;
- sk_setup_caps(newsk, dst);
newtp = tcp_sk(newsk);
newinet = inet_sk(newsk);
@@ -1433,18 +1435,24 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newinet->inet_daddr = ireq->rmt_addr;
newinet->inet_rcv_saddr = ireq->loc_addr;
newinet->inet_saddr = ireq->loc_addr;
- newinet->opt = ireq->opt;
+ inet_opt = ireq->opt;
+ rcu_assign_pointer(newinet->inet_opt, inet_opt);
ireq->opt = NULL;
newinet->mc_index = inet_iif(skb);
newinet->mc_ttl = ip_hdr(skb)->ttl;
inet_csk(newsk)->icsk_ext_hdr_len = 0;
- if (newinet->opt)
- inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
+ if (inet_opt)
+ inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
newinet->inet_id = newtp->write_seq ^ jiffies;
+ if (!dst && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL)
+ goto put_and_exit;
+
+ sk_setup_caps(newsk, dst);
+
tcp_mtup_init(newsk);
tcp_sync_mss(newsk, dst_mtu(dst));
- newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
+ newtp->advmss = dst_metric_advmss(dst);
if (tcp_sk(sk)->rx_opt.user_mss &&
tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
@@ -1469,10 +1477,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
}
#endif
- if (__inet_inherit_port(sk, newsk) < 0) {
- sock_put(newsk);
- goto exit;
- }
+ if (__inet_inherit_port(sk, newsk) < 0)
+ goto put_and_exit;
__inet_hash_nolisten(newsk, NULL);
return newsk;
@@ -1484,6 +1490,9 @@ exit_nonewsk:
exit:
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
return NULL;
+put_and_exit:
+ sock_put(newsk);
+ goto exit;
}
EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
@@ -1564,12 +1573,10 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
sock_rps_save_rxhash(sk, skb->rxhash);
- TCP_CHECK_TIMER(sk);
if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
rsk = sk;
goto reset;
}
- TCP_CHECK_TIMER(sk);
return 0;
}
@@ -1591,13 +1598,10 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
} else
sock_rps_save_rxhash(sk, skb->rxhash);
-
- TCP_CHECK_TIMER(sk);
if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
rsk = sk;
goto reset;
}
- TCP_CHECK_TIMER(sk);
return 0;
reset:
@@ -1765,64 +1769,41 @@ do_time_wait:
goto discard_it;
}
-/* VJ's idea. Save last timestamp seen from this destination
- * and hold it at least for normal timewait interval to use for duplicate
- * segment detection in subsequent connections, before they enter synchronized
- * state.
- */
-
-int tcp_v4_remember_stamp(struct sock *sk)
+struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it)
{
+ struct rtable *rt = (struct rtable *) __sk_dst_get(sk);
struct inet_sock *inet = inet_sk(sk);
- struct tcp_sock *tp = tcp_sk(sk);
- struct rtable *rt = (struct rtable *)__sk_dst_get(sk);
- struct inet_peer *peer = NULL;
- int release_it = 0;
+ struct inet_peer *peer;
- if (!rt || rt->rt_dst != inet->inet_daddr) {
- peer = inet_getpeer(inet->inet_daddr, 1);
- release_it = 1;
+ if (!rt ||
+ inet->cork.fl.u.ip4.daddr != inet->inet_daddr) {
+ peer = inet_getpeer_v4(inet->inet_daddr, 1);
+ *release_it = true;
} else {
if (!rt->peer)
- rt_bind_peer(rt, 1);
+ rt_bind_peer(rt, inet->inet_daddr, 1);
peer = rt->peer;
+ *release_it = false;
}
- if (peer) {
- if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
- ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL &&
- peer->tcp_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) {
- peer->tcp_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp;
- peer->tcp_ts = tp->rx_opt.ts_recent;
- }
- if (release_it)
- inet_putpeer(peer);
- return 1;
- }
-
- return 0;
+ return peer;
}
-EXPORT_SYMBOL(tcp_v4_remember_stamp);
+EXPORT_SYMBOL(tcp_v4_get_peer);
-int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
+void *tcp_v4_tw_get_peer(struct sock *sk)
{
- struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
-
- if (peer) {
- const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
-
- if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
- ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL &&
- peer->tcp_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) {
- peer->tcp_ts_stamp = (u32)tcptw->tw_ts_recent_stamp;
- peer->tcp_ts = tcptw->tw_ts_recent;
- }
- inet_putpeer(peer);
- return 1;
- }
+ struct inet_timewait_sock *tw = inet_twsk(sk);
- return 0;
+ return inet_getpeer_v4(tw->tw_daddr, 1);
}
+EXPORT_SYMBOL(tcp_v4_tw_get_peer);
+
+static struct timewait_sock_ops tcp_timewait_sock_ops = {
+ .twsk_obj_size = sizeof(struct tcp_timewait_sock),
+ .twsk_unique = tcp_twsk_unique,
+ .twsk_destructor= tcp_twsk_destructor,
+ .twsk_getpeer = tcp_v4_tw_get_peer,
+};
const struct inet_connection_sock_af_ops ipv4_specific = {
.queue_xmit = ip_queue_xmit,
@@ -1830,7 +1811,7 @@ const struct inet_connection_sock_af_ops ipv4_specific = {
.rebuild_header = inet_sk_rebuild_header,
.conn_request = tcp_v4_conn_request,
.syn_recv_sock = tcp_v4_syn_recv_sock,
- .remember_stamp = tcp_v4_remember_stamp,
+ .get_peer = tcp_v4_get_peer,
.net_header_len = sizeof(struct iphdr),
.setsockopt = ip_setsockopt,
.getsockopt = ip_getsockopt,
@@ -2026,13 +2007,12 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
}
req = req->dl_next;
}
- st->offset = 0;
if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
break;
get_req:
req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
}
- sk = sk_next(st->syn_wait_sk);
+ sk = sk_nulls_next(st->syn_wait_sk);
st->state = TCP_SEQ_STATE_LISTENING;
read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
} else {
@@ -2041,11 +2021,13 @@ get_req:
if (reqsk_queue_len(&icsk->icsk_accept_queue))
goto start_req;
read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
- sk = sk_next(sk);
+ sk = sk_nulls_next(sk);
}
get_sk:
sk_nulls_for_each_from(sk, node) {
- if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) {
+ if (!net_eq(sock_net(sk), net))
+ continue;
+ if (sk->sk_family == st->family) {
cur = sk;
goto out;
}
@@ -2557,7 +2539,7 @@ void tcp4_proc_exit(void)
struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
{
- struct iphdr *iph = skb_gro_network_header(skb);
+ const struct iphdr *iph = skb_gro_network_header(skb);
switch (skb->ip_summed) {
case CHECKSUM_COMPLETE:
@@ -2578,7 +2560,7 @@ struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
int tcp4_gro_complete(struct sk_buff *skb)
{
- struct iphdr *iph = ip_hdr(skb);
+ const struct iphdr *iph = ip_hdr(skb);
struct tcphdr *th = tcp_hdr(skb);
th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),