diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/Kconfig | 4 | ||||
-rw-r--r-- | net/ipv4/Makefile | 1 | ||||
-rw-r--r-- | net/ipv4/datagram.c | 1 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 7 | ||||
-rw-r--r-- | net/ipv4/ip_tunnel.c | 2 | ||||
-rw-r--r-- | net/ipv4/ipconfig.c | 5 | ||||
-rw-r--r-- | net/ipv4/ipmr.c | 2 | ||||
-rw-r--r-- | net/ipv4/raw.c | 4 | ||||
-rw-r--r-- | net/ipv4/syncookies.c | 3 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 165 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 194 | ||||
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 18 | ||||
-rw-r--r-- | net/ipv4/udp.c | 85 | ||||
-rw-r--r-- | net/ipv4/udp_offload.c | 76 | ||||
-rw-r--r-- | net/ipv4/udp_tunnel.c | 100 |
16 files changed, 420 insertions, 249 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 05c57f0fcabe..dbc10d84161f 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -307,6 +307,10 @@ config NET_IPVTI the notion of a secure tunnel for IPSEC and then use routing protocol on top. +config NET_UDP_TUNNEL + tristate + default n + config INET_AH tristate "IP: AH transformation" select XFRM_ALGO diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index f032688d20d3..8ee1cd4053ee 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -22,6 +22,7 @@ obj-$(CONFIG_NET_IPIP) += ipip.o gre-y := gre_demux.o obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o obj-$(CONFIG_NET_IPGRE) += ip_gre.o +obj-$(CONFIG_NET_UDP_TUNNEL) += udp_tunnel.o obj-$(CONFIG_NET_IPVTI) += ip_vti.o obj-$(CONFIG_SYN_COOKIES) += syncookies.o obj-$(CONFIG_INET_AH) += ah4.o diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index a3095fdefbed..90c0e8386116 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -76,6 +76,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_daddr = fl4->daddr; inet->inet_dport = usin->sin_port; sk->sk_state = TCP_ESTABLISHED; + inet_set_txhash(sk); inet->inet_id = jiffies; sk_dst_set(sk, &rt->dst); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 8d3b6b0e9857..b16556836d66 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -962,10 +962,6 @@ alloc_new_skb: sk->sk_allocation); if (unlikely(skb == NULL)) err = -ENOBUFS; - else - /* only the initial fragment is - time stamped */ - cork->tx_flags = 0; } if (skb == NULL) goto error; @@ -976,7 +972,10 @@ alloc_new_skb: skb->ip_summed = csummode; skb->csum = 0; skb_reserve(skb, hh_len); + + /* only the initial fragment is time stamped */ skb_shinfo(skb)->tx_flags = cork->tx_flags; + cork->tx_flags = 0; /* * Find where to start putting bytes. diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 6f9de61dce5f..dd8c8c765799 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -305,7 +305,7 @@ static struct net_device *__ip_tunnel_create(struct net *net, } ASSERT_RTNL(); - dev = alloc_netdev(ops->priv_size, name, ops->setup); + dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup); if (!dev) { err = -ENOMEM; goto failed; diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index b3e86ea7b71b..5bbef4fdcb43 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -143,8 +143,6 @@ __be32 ic_servaddr = NONE; /* Boot server IP address */ __be32 root_server_addr = NONE; /* Address of NFS server */ u8 root_server_path[256] = { 0, }; /* Path to mount as root */ -__be32 ic_dev_xid; /* Device under configuration */ - /* vendor class identifier */ static char vendor_class_identifier[253] __initdata; @@ -654,6 +652,7 @@ static struct packet_type bootp_packet_type __initdata = { .func = ic_bootp_recv, }; +static __be32 ic_dev_xid; /* Device under configuration */ /* * Initialize DHCP/BOOTP extension fields in the request. @@ -1218,10 +1217,10 @@ static int __init ic_dynamic(void) get_random_bytes(&timeout, sizeof(timeout)); timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned int) CONF_TIMEOUT_RANDOM); for (;;) { +#ifdef IPCONFIG_BOOTP /* Track the device we are configuring */ ic_dev_xid = d->xid; -#ifdef IPCONFIG_BOOTP if (do_bootp && (d->able & IC_BOOTP)) ic_bootp_send_if(d, jiffies - start_jiffies); #endif diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 65bcaa789043..c8034587859d 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -500,7 +500,7 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) else sprintf(name, "pimreg%u", mrt->id); - dev = alloc_netdev(0, name, reg_vif_setup); + dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup); if (dev == NULL) return NULL; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 2c65160565e1..2054d7136c62 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -365,6 +365,8 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, skb->ip_summed = CHECKSUM_NONE; + sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); + skb->transport_header = skb->network_header; err = -EFAULT; if (memcpy_fromiovecend((void *)iph, from, 0, length)) @@ -606,6 +608,8 @@ back_from_confirm: &rt, msg->msg_flags); else { + sock_tx_timestamp(sk, &ipc.tx_flags); + if (!ipc.addr) ipc.addr = fl4.daddr; lock_sock(sk); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index c86624b36a62..c0c75688896e 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -170,7 +170,8 @@ u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th, } EXPORT_SYMBOL_GPL(__cookie_v4_init_sequence); -__u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) +__u32 cookie_v4_init_sequence(struct sock *sk, const struct sk_buff *skb, + __u16 *mssp) { const struct iphdr *iph = ip_hdr(skb); const struct tcphdr *th = tcp_hdr(skb); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 40639c288dc2..7832d941dbcd 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2475,7 +2475,7 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo) * losses and/or application stalls), do not perform any further cwnd * reductions, but instead slow start up to ssthresh. */ -static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh) +static void tcp_init_cwnd_reduction(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); @@ -2485,8 +2485,7 @@ static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh) tp->prior_cwnd = tp->snd_cwnd; tp->prr_delivered = 0; tp->prr_out = 0; - if (set_ssthresh) - tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); + tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); TCP_ECN_queue_cwr(tp); } @@ -2528,14 +2527,14 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk) } /* Enter CWR state. Disable cwnd undo since congestion is proven with ECN */ -void tcp_enter_cwr(struct sock *sk, const int set_ssthresh) +void tcp_enter_cwr(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); tp->prior_ssthresh = 0; if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { tp->undo_marker = 0; - tcp_init_cwnd_reduction(sk, set_ssthresh); + tcp_init_cwnd_reduction(sk); tcp_set_ca_state(sk, TCP_CA_CWR); } } @@ -2564,7 +2563,7 @@ static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked) tp->retrans_stamp = 0; if (flag & FLAG_ECE) - tcp_enter_cwr(sk, 1); + tcp_enter_cwr(sk); if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { tcp_try_keep_open(sk); @@ -2670,7 +2669,7 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack) if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { if (!ece_ack) tp->prior_ssthresh = tcp_current_ssthresh(sk); - tcp_init_cwnd_reduction(sk, true); + tcp_init_cwnd_reduction(sk); } tcp_set_ca_state(sk, TCP_CA_Recovery); } @@ -3346,7 +3345,7 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) tp->tlp_high_seq = 0; /* Don't reduce cwnd if DSACK arrives for TLP retrans. */ if (!(flag & FLAG_DSACKING_ACK)) { - tcp_init_cwnd_reduction(sk, true); + tcp_init_cwnd_reduction(sk); tcp_set_ca_state(sk, TCP_CA_CWR); tcp_end_cwnd_reduction(sk); tcp_try_keep_open(sk); @@ -5877,3 +5876,153 @@ discard: return 0; } EXPORT_SYMBOL(tcp_rcv_state_process); + +static inline void pr_drop_req(struct request_sock *req, __u16 port, int family) +{ + struct inet_request_sock *ireq = inet_rsk(req); + + if (family == AF_INET) + LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"), + &ireq->ir_rmt_addr, port); +#if IS_ENABLED(CONFIG_IPV6) + else if (family == AF_INET6) + LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI6/%u\n"), + &ireq->ir_v6_rmt_addr, port); +#endif +} + +int tcp_conn_request(struct request_sock_ops *rsk_ops, + const struct tcp_request_sock_ops *af_ops, + struct sock *sk, struct sk_buff *skb) +{ + struct tcp_options_received tmp_opt; + struct request_sock *req; + struct tcp_sock *tp = tcp_sk(sk); + struct dst_entry *dst = NULL; + __u32 isn = TCP_SKB_CB(skb)->when; + bool want_cookie = false, fastopen; + struct flowi fl; + struct tcp_fastopen_cookie foc = { .len = -1 }; + int err; + + + /* TW buckets are converted to open requests without + * limitations, they conserve resources and peer is + * evidently real one. + */ + if ((sysctl_tcp_syncookies == 2 || + inet_csk_reqsk_queue_is_full(sk)) && !isn) { + want_cookie = tcp_syn_flood_action(sk, skb, rsk_ops->slab_name); + if (!want_cookie) + goto drop; + } + + + /* Accept backlog is full. If we have already queued enough + * of warm entries in syn queue, drop request. It is better than + * clogging syn queue with openreqs with exponentially increasing + * timeout. + */ + if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) { + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); + goto drop; + } + + req = inet_reqsk_alloc(rsk_ops); + if (!req) + goto drop; + + tcp_rsk(req)->af_specific = af_ops; + + tcp_clear_options(&tmp_opt); + tmp_opt.mss_clamp = af_ops->mss_clamp; + tmp_opt.user_mss = tp->rx_opt.user_mss; + tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc); + + if (want_cookie && !tmp_opt.saw_tstamp) + tcp_clear_options(&tmp_opt); + + tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; + tcp_openreq_init(req, &tmp_opt, skb, sk); + + af_ops->init_req(req, sk, skb); + + if (security_inet_conn_request(sk, skb, req)) + goto drop_and_free; + + if (!want_cookie || tmp_opt.tstamp_ok) + TCP_ECN_create_request(req, skb, sock_net(sk)); + + if (want_cookie) { + isn = cookie_init_sequence(af_ops, sk, skb, &req->mss); + req->cookie_ts = tmp_opt.tstamp_ok; + } else if (!isn) { + /* VJ's idea. We save last timestamp seen + * from the destination in peer table, when entering + * state TIME-WAIT, and check against it before + * accepting new connection request. + * + * If "isn" is not zero, this request hit alive + * timewait bucket, so that all the necessary checks + * are made in the function processing timewait state. + */ + if (tmp_opt.saw_tstamp && tcp_death_row.sysctl_tw_recycle) { + bool strict; + + dst = af_ops->route_req(sk, &fl, req, &strict); + if (dst && strict && + !tcp_peer_is_proven(req, dst, true)) { + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); + goto drop_and_release; + } + } + /* Kill the following clause, if you dislike this way. */ + else if (!sysctl_tcp_syncookies && + (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < + (sysctl_max_syn_backlog >> 2)) && + !tcp_peer_is_proven(req, dst, false)) { + /* Without syncookies last quarter of + * backlog is filled with destinations, + * proven to be alive. + * It means that we continue to communicate + * to destinations, already remembered + * to the moment of synflood. + */ + pr_drop_req(req, ntohs(tcp_hdr(skb)->source), + rsk_ops->family); + goto drop_and_release; + } + + isn = af_ops->init_seq(skb); + } + if (!dst) { + dst = af_ops->route_req(sk, &fl, req, NULL); + if (!dst) + goto drop_and_free; + } + + tcp_rsk(req)->snt_isn = isn; + tcp_openreq_init_rwin(req, sk, dst); + fastopen = !want_cookie && + tcp_try_fastopen(sk, skb, req, &foc, dst); + err = af_ops->send_synack(sk, dst, &fl, req, + skb_get_queue_mapping(skb), &foc); + if (!fastopen) { + if (err || want_cookie) + goto drop_and_free; + + tcp_rsk(req)->listener = NULL; + af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT); + } + + return 0; + +drop_and_release: + dst_release(dst); +drop_and_free: + reqsk_free(req); +drop: + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); + return 0; +} +EXPORT_SYMBOL(tcp_conn_request); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 77cccda1ad0c..1edc739b9da5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -99,7 +99,7 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, struct inet_hashinfo tcp_hashinfo; EXPORT_SYMBOL(tcp_hashinfo); -static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb) +static __u32 tcp_v4_init_sequence(const struct sk_buff *skb) { return secure_tcp_sequence_number(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr, @@ -208,6 +208,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_dport = usin->sin_port; inet->inet_daddr = daddr; + inet_set_txhash(sk); + inet_csk(sk)->icsk_ext_hdr_len = 0; if (inet_opt) inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; @@ -814,6 +816,7 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, * socket. */ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, + struct flowi *fl, struct request_sock *req, u16 queue_mapping, struct tcp_fastopen_cookie *foc) @@ -837,24 +840,11 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, ireq->ir_rmt_addr, ireq->opt); err = net_xmit_eval(err); - if (!tcp_rsk(req)->snt_synack && !err) - tcp_rsk(req)->snt_synack = tcp_time_stamp; } return err; } -static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req) -{ - int res = tcp_v4_send_synack(sk, NULL, req, 0, NULL); - - if (!res) { - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); - } - return res; -} - /* * IPv4 request_sock destructor. */ @@ -1237,161 +1227,68 @@ static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) #endif +static void tcp_v4_init_req(struct request_sock *req, struct sock *sk, + struct sk_buff *skb) +{ + struct inet_request_sock *ireq = inet_rsk(req); + + ireq->ir_loc_addr = ip_hdr(skb)->daddr; + ireq->ir_rmt_addr = ip_hdr(skb)->saddr; + ireq->no_srccheck = inet_sk(sk)->transparent; + ireq->opt = tcp_v4_save_options(skb); +} + +static struct dst_entry *tcp_v4_route_req(struct sock *sk, struct flowi *fl, + const struct request_sock *req, + bool *strict) +{ + struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req); + + if (strict) { + if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr) + *strict = true; + else + *strict = false; + } + + return dst; +} + struct request_sock_ops tcp_request_sock_ops __read_mostly = { .family = PF_INET, .obj_size = sizeof(struct tcp_request_sock), - .rtx_syn_ack = tcp_v4_rtx_synack, + .rtx_syn_ack = tcp_rtx_synack, .send_ack = tcp_v4_reqsk_send_ack, .destructor = tcp_v4_reqsk_destructor, .send_reset = tcp_v4_send_reset, .syn_ack_timeout = tcp_syn_ack_timeout, }; -#ifdef CONFIG_TCP_MD5SIG static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { + .mss_clamp = TCP_MSS_DEFAULT, +#ifdef CONFIG_TCP_MD5SIG .md5_lookup = tcp_v4_reqsk_md5_lookup, .calc_md5_hash = tcp_v4_md5_hash_skb, -}; #endif + .init_req = tcp_v4_init_req, +#ifdef CONFIG_SYN_COOKIES + .cookie_init_seq = cookie_v4_init_sequence, +#endif + .route_req = tcp_v4_route_req, + .init_seq = tcp_v4_init_sequence, + .send_synack = tcp_v4_send_synack, + .queue_hash_add = inet_csk_reqsk_queue_hash_add, +}; int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) { - struct tcp_options_received tmp_opt; - struct request_sock *req; - struct inet_request_sock *ireq; - struct tcp_sock *tp = tcp_sk(sk); - struct dst_entry *dst = NULL; - __be32 saddr = ip_hdr(skb)->saddr; - __be32 daddr = ip_hdr(skb)->daddr; - __u32 isn = TCP_SKB_CB(skb)->when; - bool want_cookie = false, fastopen; - struct flowi4 fl4; - struct tcp_fastopen_cookie foc = { .len = -1 }; - int err; - /* Never answer to SYNs send to broadcast or multicast */ if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) goto drop; - /* TW buckets are converted to open requests without - * limitations, they conserve resources and peer is - * evidently real one. - */ - if ((sysctl_tcp_syncookies == 2 || - inet_csk_reqsk_queue_is_full(sk)) && !isn) { - want_cookie = tcp_syn_flood_action(sk, skb, "TCP"); - if (!want_cookie) - goto drop; - } - - /* Accept backlog is full. If we have already queued enough - * of warm entries in syn queue, drop request. It is better than - * clogging syn queue with openreqs with exponentially increasing - * timeout. - */ - if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); - goto drop; - } - - req = inet_reqsk_alloc(&tcp_request_sock_ops); - if (!req) - goto drop; - -#ifdef CONFIG_TCP_MD5SIG - tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops; -#endif - - tcp_clear_options(&tmp_opt); - tmp_opt.mss_clamp = TCP_MSS_DEFAULT; - tmp_opt.user_mss = tp->rx_opt.user_mss; - tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc); - - if (want_cookie && !tmp_opt.saw_tstamp) - tcp_clear_options(&tmp_opt); - - tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; - tcp_openreq_init(req, &tmp_opt, skb); - - ireq = inet_rsk(req); - ireq->ir_loc_addr = daddr; - ireq->ir_rmt_addr = saddr; - ireq->no_srccheck = inet_sk(sk)->transparent; - ireq->opt = tcp_v4_save_options(skb); - ireq->ir_mark = inet_request_mark(sk, skb); - - if (security_inet_conn_request(sk, skb, req)) - goto drop_and_free; - - if (!want_cookie || tmp_opt.tstamp_ok) - TCP_ECN_create_request(req, skb, sock_net(sk)); - - if (want_cookie) { - isn = cookie_v4_init_sequence(sk, skb, &req->mss); - req->cookie_ts = tmp_opt.tstamp_ok; - } else if (!isn) { - /* VJ's idea. We save last timestamp seen - * from the destination in peer table, when entering - * state TIME-WAIT, and check against it before - * accepting new connection request. - * - * If "isn" is not zero, this request hit alive - * timewait bucket, so that all the necessary checks - * are made in the function processing timewait state. - */ - if (tmp_opt.saw_tstamp && - tcp_death_row.sysctl_tw_recycle && - (dst = inet_csk_route_req(sk, &fl4, req)) != NULL && - fl4.daddr == saddr) { - if (!tcp_peer_is_proven(req, dst, true)) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); - goto drop_and_release; - } - } - /* Kill the following clause, if you dislike this way. */ - else if (!sysctl_tcp_syncookies && - (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < - (sysctl_max_syn_backlog >> 2)) && - !tcp_peer_is_proven(req, dst, false)) { - /* Without syncookies last quarter of - * backlog is filled with destinations, - * proven to be alive. - * It means that we continue to communicate - * to destinations, already remembered - * to the moment of synflood. - */ - LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"), - &saddr, ntohs(tcp_hdr(skb)->source)); - goto drop_and_release; - } + return tcp_conn_request(&tcp_request_sock_ops, + &tcp_request_sock_ipv4_ops, sk, skb); - isn = tcp_v4_init_sequence(skb); - } - if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) - goto drop_and_free; - - tcp_rsk(req)->snt_isn = isn; - tcp_rsk(req)->snt_synack = tcp_time_stamp; - tcp_openreq_init_rwin(req, sk, dst); - fastopen = !want_cookie && - tcp_try_fastopen(sk, skb, req, &foc, dst); - err = tcp_v4_send_synack(sk, dst, req, - skb_get_queue_mapping(skb), &foc); - if (!fastopen) { - if (err || want_cookie) - goto drop_and_free; - - tcp_rsk(req)->snt_synack = tcp_time_stamp; - tcp_rsk(req)->listener = NULL; - inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); - } - - return 0; - -drop_and_release: - dst_release(dst); -drop_and_free: - reqsk_free(req); drop: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); return 0; @@ -1439,6 +1336,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newinet->mc_ttl = ip_hdr(skb)->ttl; newinet->rcv_tos = ip_hdr(skb)->tos; inet_csk(newsk)->icsk_ext_hdr_len = 0; + inet_set_txhash(newsk); if (inet_opt) inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; newinet->inet_id = newtp->write_seq ^ jiffies; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index e68e0d4af6c9..1649988bd1b6 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -298,7 +298,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr; tw->tw_tclass = np->tclass; tw->tw_flowlabel = np->flow_label >> 12; - tw->tw_ipv6only = np->ipv6only; + tw->tw_ipv6only = sk->sk_ipv6only; } #endif diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 179b51e6bda3..8fcfc91964ec 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -916,6 +916,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, skb_orphan(skb); skb->sk = sk; skb->destructor = tcp_wfree; + skb_set_hash_from_sk(skb, sk); atomic_add(skb->truesize, &sk->sk_wmem_alloc); /* Build TCP header and checksum it. */ @@ -978,7 +979,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, if (likely(err <= 0)) return err; - tcp_enter_cwr(sk, 1); + tcp_enter_cwr(sk); return net_xmit_eval(err); } @@ -3301,3 +3302,18 @@ void tcp_send_probe0(struct sock *sk) TCP_RTO_MAX); } } + +int tcp_rtx_synack(struct sock *sk, struct request_sock *req) +{ + const struct tcp_request_sock_ops *af_ops = tcp_rsk(req)->af_specific; + struct flowi fl; + int res; + + res = af_ops->send_synack(sk, NULL, &fl, req, 0, NULL); + if (!res) { + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); + } + return res; +} +EXPORT_SYMBOL(tcp_rtx_synack); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 7d5a8661df76..668af516f094 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -600,19 +600,18 @@ static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk, int dif) { struct hlist_nulls_node *node; - struct sock *s = sk; unsigned short hnum = ntohs(loc_port); - sk_nulls_for_each_from(s, node) { - if (__udp_is_mcast_sock(net, s, + sk_nulls_for_each_from(sk, node) { + if (__udp_is_mcast_sock(net, sk, loc_port, loc_addr, rmt_port, rmt_addr, dif, hnum)) goto found; } - s = NULL; + sk = NULL; found: - return s; + return sk; } /* @@ -2526,79 +2525,3 @@ void __init udp_init(void) sysctl_udp_rmem_min = SK_MEM_QUANTUM; sysctl_udp_wmem_min = SK_MEM_QUANTUM; } - -struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, - netdev_features_t features) -{ - struct sk_buff *segs = ERR_PTR(-EINVAL); - u16 mac_offset = skb->mac_header; - int mac_len = skb->mac_len; - int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); - __be16 protocol = skb->protocol; - netdev_features_t enc_features; - int udp_offset, outer_hlen; - unsigned int oldlen; - bool need_csum; - - oldlen = (u16)~skb->len; - - if (unlikely(!pskb_may_pull(skb, tnl_hlen))) - goto out; - - skb->encapsulation = 0; - __skb_pull(skb, tnl_hlen); - skb_reset_mac_header(skb); - skb_set_network_header(skb, skb_inner_network_offset(skb)); - skb->mac_len = skb_inner_network_offset(skb); - skb->protocol = htons(ETH_P_TEB); - - need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM); - if (need_csum) - skb->encap_hdr_csum = 1; - - /* segment inner packet. */ - enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); - segs = skb_mac_gso_segment(skb, enc_features); - if (!segs || IS_ERR(segs)) { - skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset, - mac_len); - goto out; - } - - outer_hlen = skb_tnl_header_len(skb); - udp_offset = outer_hlen - tnl_hlen; - skb = segs; - do { - struct udphdr *uh; - int len; - - skb_reset_inner_headers(skb); - skb->encapsulation = 1; - - skb->mac_len = mac_len; - - skb_push(skb, outer_hlen); - skb_reset_mac_header(skb); - skb_set_network_header(skb, mac_len); - skb_set_transport_header(skb, udp_offset); - len = skb->len - udp_offset; - uh = udp_hdr(skb); - uh->len = htons(len); - - if (need_csum) { - __be32 delta = htonl(oldlen + len); - - uh->check = ~csum_fold((__force __wsum) - ((__force u32)uh->check + - (__force u32)delta)); - uh->check = gso_make_checksum(skb, ~uh->check); - - if (uh->check == 0) - uh->check = CSUM_MANGLED_0; - } - - skb->protocol = protocol; - } while ((skb = skb->next)); -out: - return segs; -} diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 546d2d439dda..4807544d018b 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -47,6 +47,82 @@ static int udp4_ufo_send_check(struct sk_buff *skb) return 0; } +struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + u16 mac_offset = skb->mac_header; + int mac_len = skb->mac_len; + int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); + __be16 protocol = skb->protocol; + netdev_features_t enc_features; + int udp_offset, outer_hlen; + unsigned int oldlen; + bool need_csum; + + oldlen = (u16)~skb->len; + + if (unlikely(!pskb_may_pull(skb, tnl_hlen))) + goto out; + + skb->encapsulation = 0; + __skb_pull(skb, tnl_hlen); + skb_reset_mac_header(skb); + skb_set_network_header(skb, skb_inner_network_offset(skb)); + skb->mac_len = skb_inner_network_offset(skb); + skb->protocol = htons(ETH_P_TEB); + + need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM); + if (need_csum) + skb->encap_hdr_csum = 1; + + /* segment inner packet. */ + enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); + segs = skb_mac_gso_segment(skb, enc_features); + if (!segs || IS_ERR(segs)) { + skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset, + mac_len); + goto out; + } + + outer_hlen = skb_tnl_header_len(skb); + udp_offset = outer_hlen - tnl_hlen; + skb = segs; + do { + struct udphdr *uh; + int len; + + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + + skb->mac_len = mac_len; + + skb_push(skb, outer_hlen); + skb_reset_mac_header(skb); + skb_set_network_header(skb, mac_len); + skb_set_transport_header(skb, udp_offset); + len = skb->len - udp_offset; + uh = udp_hdr(skb); + uh->len = htons(len); + + if (need_csum) { + __be32 delta = htonl(oldlen + len); + + uh->check = ~csum_fold((__force __wsum) + ((__force u32)uh->check + + (__force u32)delta)); + uh->check = gso_make_checksum(skb, ~uh->check); + + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + } + + skb->protocol = protocol; + } while ((skb = skb->next)); +out: + return segs; +} + static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, netdev_features_t features) { diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c new file mode 100644 index 000000000000..61ec1a65207e --- /dev/null +++ b/net/ipv4/udp_tunnel.c @@ -0,0 +1,100 @@ +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/socket.h> +#include <linux/udp.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <net/udp.h> +#include <net/udp_tunnel.h> +#include <net/net_namespace.h> + +int udp_sock_create(struct net *net, struct udp_port_cfg *cfg, + struct socket **sockp) +{ + int err = -EINVAL; + struct socket *sock = NULL; + +#if IS_ENABLED(CONFIG_IPV6) + if (cfg->family == AF_INET6) { + struct sockaddr_in6 udp6_addr; + + err = sock_create_kern(AF_INET6, SOCK_DGRAM, 0, &sock); + if (err < 0) + goto error; + + sk_change_net(sock->sk, net); + + udp6_addr.sin6_family = AF_INET6; + memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6, + sizeof(udp6_addr.sin6_addr)); + udp6_addr.sin6_port = cfg->local_udp_port; + err = kernel_bind(sock, (struct sockaddr *)&udp6_addr, + sizeof(udp6_addr)); + if (err < 0) + goto error; + + if (cfg->peer_udp_port) { + udp6_addr.sin6_family = AF_INET6; + memcpy(&udp6_addr.sin6_addr, &cfg->peer_ip6, + sizeof(udp6_addr.sin6_addr)); + udp6_addr.sin6_port = cfg->peer_udp_port; + err = kernel_connect(sock, + (struct sockaddr *)&udp6_addr, + sizeof(udp6_addr), 0); + } + if (err < 0) + goto error; + + udp_set_no_check6_tx(sock->sk, !cfg->use_udp6_tx_checksums); + udp_set_no_check6_rx(sock->sk, !cfg->use_udp6_rx_checksums); + } else +#endif + if (cfg->family == AF_INET) { + struct sockaddr_in udp_addr; + + err = sock_create_kern(AF_INET, SOCK_DGRAM, 0, &sock); + if (err < 0) + goto error; + + sk_change_net(sock->sk, net); + + udp_addr.sin_family = AF_INET; + udp_addr.sin_addr = cfg->local_ip; + udp_addr.sin_port = cfg->local_udp_port; + err = kernel_bind(sock, (struct sockaddr *)&udp_addr, + sizeof(udp_addr)); + if (err < 0) + goto error; + + if (cfg->peer_udp_port) { + udp_addr.sin_family = AF_INET; + udp_addr.sin_addr = cfg->peer_ip; + udp_addr.sin_port = cfg->peer_udp_port; + err = kernel_connect(sock, + (struct sockaddr *)&udp_addr, + sizeof(udp_addr), 0); + if (err < 0) + goto error; + } + + sock->sk->sk_no_check_tx = !cfg->use_udp_checksums; + } else { + return -EPFNOSUPPORT; + } + + + *sockp = sock; + + return 0; + +error: + if (sock) { + kernel_sock_shutdown(sock, SHUT_RDWR); + sk_release_kernel(sock->sk); + } + *sockp = NULL; + return err; +} +EXPORT_SYMBOL(udp_sock_create); + +MODULE_LICENSE("GPL"); |