From d75fe63a0708bd28eac5cda1212fa5fd037297dc Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Sat, 24 Feb 2024 16:41:21 +0800 Subject: ipv6: raw: remove useless input parameter in rawv6_err The input parameter 'opt' in rawv6_err() is not used. Therefore, remove it. Signed-off-by: Zhengchao Shao Reviewed-by: Eric Dumazet Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240224084121.2479603-1-shaozhengchao@huawei.com Signed-off-by: Jakub Kicinski --- net/ipv6/raw.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net/ipv6/raw.c') diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 03dbb874c363..479c4c2c1785 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -288,8 +288,7 @@ out: } static void rawv6_err(struct sock *sk, struct sk_buff *skb, - struct inet6_skb_parm *opt, - u8 type, u8 code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { bool recverr = inet6_test_bit(RECVERR6, sk); struct ipv6_pinfo *np = inet6_sk(sk); @@ -344,7 +343,7 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr, if (!raw_v6_match(net, sk, nexthdr, &ip6h->saddr, &ip6h->daddr, inet6_iif(skb), inet6_iif(skb))) continue; - rawv6_err(sk, skb, NULL, type, code, inner_offset, info); + rawv6_err(sk, skb, type, code, inner_offset, info); } rcu_read_unlock(); } -- cgit v1.2.3 From 2e26b6dfade4db750809b9231cce49498bb71767 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Tue, 27 Feb 2024 08:57:45 +0800 Subject: ipv6: raw: remove useless input parameter in rawv6_get/seticmpfilter The input parameter 'level' in rawv6_get/seticmpfilter is not used. Therefore, remove it. Signed-off-by: Zhengchao Shao Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/ipv6/raw.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net/ipv6/raw.c') diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 479c4c2c1785..76e6eb3b643d 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -934,7 +934,7 @@ do_confirm: goto done; } -static int rawv6_seticmpfilter(struct sock *sk, int level, int optname, +static int rawv6_seticmpfilter(struct sock *sk, int optname, sockptr_t optval, int optlen) { switch (optname) { @@ -951,7 +951,7 @@ static int rawv6_seticmpfilter(struct sock *sk, int level, int optname, return 0; } -static int rawv6_geticmpfilter(struct sock *sk, int level, int optname, +static int rawv6_geticmpfilter(struct sock *sk, int optname, char __user *optval, int __user *optlen) { int len; @@ -1037,7 +1037,7 @@ static int rawv6_setsockopt(struct sock *sk, int level, int optname, case SOL_ICMPV6: if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6) return -EOPNOTSUPP; - return rawv6_seticmpfilter(sk, level, optname, optval, optlen); + return rawv6_seticmpfilter(sk, optname, optval, optlen); case SOL_IPV6: if (optname == IPV6_CHECKSUM || optname == IPV6_HDRINCL) @@ -1098,7 +1098,7 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname, case SOL_ICMPV6: if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6) return -EOPNOTSUPP; - return rawv6_geticmpfilter(sk, level, optname, optval, optlen); + return rawv6_geticmpfilter(sk, optname, optval, optlen); case SOL_IPV6: if (optname == IPV6_CHECKSUM || optname == IPV6_HDRINCL) -- cgit v1.2.3 From 885c36e59f46375c138de18ff1692f18eff67b7f Mon Sep 17 00:00:00 2001 From: Abhishek Chauhan Date: Fri, 1 Mar 2024 12:13:48 -0800 Subject: net: Re-use and set mono_delivery_time bit for userspace tstamp packets Bridge driver today has no support to forward the userspace timestamp packets and ends up resetting the timestamp. ETF qdisc checks the packet coming from userspace and encounters to be 0 thereby dropping time sensitive packets. These changes will allow userspace timestamps packets to be forwarded from the bridge to NIC drivers. Setting the same bit (mono_delivery_time) to avoid dropping of userspace tstamp packets in the forwarding path. Existing functionality of mono_delivery_time remains unaltered here, instead just extended with userspace tstamp support for bridge forwarding path. Signed-off-by: Abhishek Chauhan Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/20240301201348.2815102-1-quic_abchauha@quicinc.com Signed-off-by: Paolo Abeni --- include/linux/skbuff.h | 6 +++--- net/ipv4/ip_output.c | 1 + net/ipv4/raw.c | 1 + net/ipv6/ip6_output.c | 2 +- net/ipv6/raw.c | 2 +- net/packet/af_packet.c | 4 +++- 6 files changed, 10 insertions(+), 6 deletions(-) (limited to 'net/ipv6/raw.c') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d577e0bee18d..3013355b63f5 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -822,9 +822,9 @@ typedef unsigned char *sk_buff_data_t; * @decrypted: Decrypted SKB * @slow_gro: state present at GRO time, slower prepare step required * @mono_delivery_time: When set, skb->tstamp has the - * delivery_time in mono clock base (i.e. EDT). Otherwise, the - * skb->tstamp has the (rcv) timestamp at ingress and - * delivery_time at egress. + * delivery_time in mono clock base (i.e., EDT) or a clock base chosen + * by SO_TXTIME. If zero, skb->tstamp has the (rcv) timestamp at + * ingress. * @napi_id: id of the NAPI struct this skb came from * @sender_cpu: (aka @napi_id) source CPU in XPS * @alloc_cpu: CPU which did the skb allocation. diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 1fe794967211..33f93dc730a3 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1458,6 +1458,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, skb->priority = (cork->tos != -1) ? cork->priority: READ_ONCE(sk->sk_priority); skb->mark = cork->mark; skb->tstamp = cork->transmit_time; + skb->mono_delivery_time = !!skb->tstamp; /* * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec * on dst refcount diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index a82fd102df05..494a6284bd7e 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -353,6 +353,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, skb->priority = READ_ONCE(sk->sk_priority); skb->mark = sockc->mark; skb->tstamp = sockc->transmit_time; + skb->mono_delivery_time = !!skb->tstamp; skb_dst_set(skb, &rt->dst); *rtp = NULL; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index b9dd3a66e423..02eeca5492cd 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1925,7 +1925,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, skb->priority = READ_ONCE(sk->sk_priority); skb->mark = cork->base.mark; skb->tstamp = cork->base.transmit_time; - + skb->mono_delivery_time = !!skb->tstamp; ip6_cork_steal_dst(skb, cork); IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); if (proto == IPPROTO_ICMPV6) { diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 76e6eb3b643d..779274055abf 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -615,7 +615,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, skb->priority = READ_ONCE(sk->sk_priority); skb->mark = sockc->mark; skb->tstamp = sockc->transmit_time; - + skb->mono_delivery_time = !!skb->tstamp; skb_put(skb, length); skb_reset_network_header(skb); iph = ipv6_hdr(skb); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c9bbc2686690..0db31ca4982d 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2057,7 +2057,7 @@ retry: skb->priority = READ_ONCE(sk->sk_priority); skb->mark = READ_ONCE(sk->sk_mark); skb->tstamp = sockc.transmit_time; - + skb->mono_delivery_time = !!skb->tstamp; skb_setup_tx_timestamp(skb, sockc.tsflags); if (unlikely(extra_len == 4)) @@ -2586,6 +2586,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb->priority = READ_ONCE(po->sk.sk_priority); skb->mark = READ_ONCE(po->sk.sk_mark); skb->tstamp = sockc->transmit_time; + skb->mono_delivery_time = !!skb->tstamp; skb_setup_tx_timestamp(skb, sockc->tsflags); skb_zcopy_set_nouarg(skb, ph.raw); @@ -3064,6 +3065,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) skb->priority = READ_ONCE(sk->sk_priority); skb->mark = sockc.mark; skb->tstamp = sockc.transmit_time; + skb->mono_delivery_time = !!skb->tstamp; if (unlikely(extra_len == 4)) skb->no_fcs = 1; -- cgit v1.2.3 From 026763ece881b4c636173c25d012cde085689027 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Mar 2024 16:29:43 +0000 Subject: ipv6: raw: check sk->sk_rcvbuf earlier There is no point cloning an skb and having to free the clone if the receive queue of the raw socket is full. Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/20240307162943.2523817-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/raw.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net/ipv6/raw.c') diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 779274055abf..ca49e6617afa 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -160,6 +160,13 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) if (!raw_v6_match(net, sk, nexthdr, daddr, saddr, inet6_iif(skb), inet6_sdif(skb))) continue; + + if (atomic_read(&sk->sk_rmem_alloc) >= + READ_ONCE(sk->sk_rcvbuf)) { + atomic_inc(&sk->sk_drops); + continue; + } + delivered = true; switch (nexthdr) { case IPPROTO_ICMPV6: -- cgit v1.2.3 From 35c3e27917568192927c785fc380f139255468b4 Mon Sep 17 00:00:00 2001 From: Abhishek Chauhan Date: Thu, 14 Mar 2024 12:24:04 -0700 Subject: Revert "net: Re-use and set mono_delivery_time bit for userspace tstamp packets" This reverts commit 885c36e59f46375c138de18ff1692f18eff67b7f. The patch currently broke the bpf selftest test_tc_dtime because uapi field __sk_buff->tstamp_type depends on skb->mono_delivery_time which does not necessarily mean mono with the original fix as the bit was re-used for userspace timestamp as well to avoid tstamp reset in the forwarding path. To solve this we need to keep mono_delivery_time as is and introduce another bit called user_delivery_time and fall back to the initial proposal of setting the user_delivery_time bit based on sk_clockid set from userspace. Fixes: 885c36e59f46 ("net: Re-use and set mono_delivery_time bit for userspace tstamp packets") Link: https://lore.kernel.org/netdev/bc037db4-58bb-4861-ac31-a361a93841d3@linux.dev/ Signed-off-by: Abhishek Chauhan Acked-by: Daniel Borkmann Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 +++--- net/ipv4/ip_output.c | 1 - net/ipv4/raw.c | 1 - net/ipv6/ip6_output.c | 2 +- net/ipv6/raw.c | 2 +- net/packet/af_packet.c | 4 +--- 6 files changed, 6 insertions(+), 10 deletions(-) (limited to 'net/ipv6/raw.c') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 3023bc2be6a1..7d56ce195120 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -822,9 +822,9 @@ typedef unsigned char *sk_buff_data_t; * @decrypted: Decrypted SKB * @slow_gro: state present at GRO time, slower prepare step required * @mono_delivery_time: When set, skb->tstamp has the - * delivery_time in mono clock base (i.e., EDT) or a clock base chosen - * by SO_TXTIME. If zero, skb->tstamp has the (rcv) timestamp at - * ingress. + * delivery_time in mono clock base (i.e. EDT). Otherwise, the + * skb->tstamp has the (rcv) timestamp at ingress and + * delivery_time at egress. * @napi_id: id of the NAPI struct this skb came from * @sender_cpu: (aka @napi_id) source CPU in XPS * @alloc_cpu: CPU which did the skb allocation. diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 33f93dc730a3..1fe794967211 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1458,7 +1458,6 @@ struct sk_buff *__ip_make_skb(struct sock *sk, skb->priority = (cork->tos != -1) ? cork->priority: READ_ONCE(sk->sk_priority); skb->mark = cork->mark; skb->tstamp = cork->transmit_time; - skb->mono_delivery_time = !!skb->tstamp; /* * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec * on dst refcount diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 42ac434cfcfa..12b3740393ba 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -360,7 +360,6 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, skb->priority = READ_ONCE(sk->sk_priority); skb->mark = sockc->mark; skb->tstamp = sockc->transmit_time; - skb->mono_delivery_time = !!skb->tstamp; skb_dst_set(skb, &rt->dst); *rtp = NULL; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 02eeca5492cd..b9dd3a66e423 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1925,7 +1925,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, skb->priority = READ_ONCE(sk->sk_priority); skb->mark = cork->base.mark; skb->tstamp = cork->base.transmit_time; - skb->mono_delivery_time = !!skb->tstamp; + ip6_cork_steal_dst(skb, cork); IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); if (proto == IPPROTO_ICMPV6) { diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index ca49e6617afa..0d896ca7b589 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -622,7 +622,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, skb->priority = READ_ONCE(sk->sk_priority); skb->mark = sockc->mark; skb->tstamp = sockc->transmit_time; - skb->mono_delivery_time = !!skb->tstamp; + skb_put(skb, length); skb_reset_network_header(skb); iph = ipv6_hdr(skb); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 7cfc7d301508..18f616f487ea 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2057,7 +2057,7 @@ retry: skb->priority = READ_ONCE(sk->sk_priority); skb->mark = READ_ONCE(sk->sk_mark); skb->tstamp = sockc.transmit_time; - skb->mono_delivery_time = !!skb->tstamp; + skb_setup_tx_timestamp(skb, sockc.tsflags); if (unlikely(extra_len == 4)) @@ -2586,7 +2586,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb->priority = READ_ONCE(po->sk.sk_priority); skb->mark = READ_ONCE(po->sk.sk_mark); skb->tstamp = sockc->transmit_time; - skb->mono_delivery_time = !!skb->tstamp; skb_setup_tx_timestamp(skb, sockc->tsflags); skb_zcopy_set_nouarg(skb, ph.raw); @@ -3065,7 +3064,6 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) skb->priority = READ_ONCE(sk->sk_priority); skb->mark = sockc.mark; skb->tstamp = sockc.transmit_time; - skb->mono_delivery_time = !!skb->tstamp; if (unlikely(extra_len == 4)) skb->no_fcs = 1; -- cgit v1.2.3