From b515d2637276a3810d6595e10ab02c13bfd0b63a Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 16 Apr 2021 11:27:59 +0200 Subject: xfrm: xfrm_state_mtu should return at least 1280 for ipv6 Jianwen reported that IPv6 Interoperability tests are failing in an IPsec case where one of the links between the IPsec peers has an MTU of 1280. The peer generates a packet larger than this MTU, the router replies with a "Packet too big" message indicating an MTU of 1280. When the peer tries to send another large packet, xfrm_state_mtu returns 1280 - ipsec_overhead, which causes ip6_setup_cork to fail with EINVAL. We can fix this by forcing xfrm_state_mtu to return IPV6_MIN_MTU when IPv6 is used. After going through IPsec, the packet will then be fragmented to obey the actual network's PMTU, just before leaving the host. Currently, TFC padding is capped to PMTU - overhead to avoid fragementation: after padding and encapsulation, we still fit within the PMTU. That behavior is preserved in this patch. Fixes: 91657eafb64b ("xfrm: take net hdr len into account for esp payload size calculation") Reported-by: Jianwen Ji Signed-off-by: Sabrina Dubroca Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'net/xfrm') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 4496f7efa220..c25586156c6a 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -2518,7 +2518,7 @@ void xfrm_state_delete_tunnel(struct xfrm_state *x) } EXPORT_SYMBOL(xfrm_state_delete_tunnel); -u32 xfrm_state_mtu(struct xfrm_state *x, int mtu) +u32 __xfrm_state_mtu(struct xfrm_state *x, int mtu) { const struct xfrm_type *type = READ_ONCE(x->type); struct crypto_aead *aead; @@ -2549,7 +2549,17 @@ u32 xfrm_state_mtu(struct xfrm_state *x, int mtu) return ((mtu - x->props.header_len - crypto_aead_authsize(aead) - net_adj) & ~(blksize - 1)) + net_adj - 2; } -EXPORT_SYMBOL_GPL(xfrm_state_mtu); +EXPORT_SYMBOL_GPL(__xfrm_state_mtu); + +u32 xfrm_state_mtu(struct xfrm_state *x, int mtu) +{ + mtu = __xfrm_state_mtu(x, mtu); + + if (x->props.family == AF_INET6 && mtu < IPV6_MIN_MTU) + return IPV6_MIN_MTU; + + return mtu; +} int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload) { -- cgit v1.2.3 From d7b0408934c749f546b01f2b33d07421a49b6f3e Mon Sep 17 00:00:00 2001 From: Varad Gautam Date: Fri, 28 May 2021 18:04:06 +0200 Subject: xfrm: policy: Read seqcount outside of rcu-read side in xfrm_policy_lookup_bytype xfrm_policy_lookup_bytype loops on seqcount mutex xfrm_policy_hash_generation within an RCU read side critical section. Although ill advised, this is fine if the loop is bounded. xfrm_policy_hash_generation wraps mutex hash_resize_mutex, which is used to serialize writers (xfrm_hash_resize, xfrm_hash_rebuild). This is fine too. On PREEMPT_RT=y, the read_seqcount_begin call within xfrm_policy_lookup_bytype emits a mutex lock/unlock for hash_resize_mutex. Mutex locking is fine, since RCU read side critical sections are allowed to sleep with PREEMPT_RT. xfrm_hash_resize can, however, block on synchronize_rcu while holding hash_resize_mutex. This leads to the following situation on PREEMPT_RT, where the writer is blocked on RCU grace period expiry, while the reader is blocked on a lock held by the writer: Thead 1 (xfrm_hash_resize) Thread 2 (xfrm_policy_lookup_bytype) rcu_read_lock(); mutex_lock(&hash_resize_mutex); read_seqcount_begin(&xfrm_policy_hash_generation); mutex_lock(&hash_resize_mutex); // block xfrm_bydst_resize(); synchronize_rcu(); // block Move the read_seqcount_begin call outside of the RCU read side critical section, and do an rcu_read_unlock/retry if we got stale data within the critical section. On non-PREEMPT_RT, this shortens the time spent within RCU read side critical section in case the seqcount needs a retry, and avoids unbounded looping. Fixes: 77cc278f7b20 ("xfrm: policy: Use sequence counters with associated lock") Signed-off-by: Varad Gautam Cc: linux-rt-users Cc: netdev@vger.kernel.org Cc: stable@vger.kernel.org # v4.9 Cc: Steffen Klassert Cc: Herbert Xu Cc: "David S. Miller" Cc: Jakub Kicinski Cc: Florian Westphal Cc: "Ahmed S. Darwish" Signed-off-by: Steffen Klassert Acked-by: Ahmed S. Darwish --- net/xfrm/xfrm_policy.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'net/xfrm') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index b74f28cabe24..8c56e3e59c3c 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2092,12 +2092,15 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, if (unlikely(!daddr || !saddr)) return NULL; - rcu_read_lock(); retry: - do { - sequence = read_seqcount_begin(&xfrm_policy_hash_generation); - chain = policy_hash_direct(net, daddr, saddr, family, dir); - } while (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)); + sequence = read_seqcount_begin(&xfrm_policy_hash_generation); + rcu_read_lock(); + + chain = policy_hash_direct(net, daddr, saddr, family, dir); + if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)) { + rcu_read_unlock(); + goto retry; + } ret = NULL; hlist_for_each_entry_rcu(pol, chain, bydst) { @@ -2128,11 +2131,15 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, } skip_inexact: - if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)) + if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)) { + rcu_read_unlock(); goto retry; + } - if (ret && !xfrm_pol_hold_rcu(ret)) + if (ret && !xfrm_pol_hold_rcu(ret)) { + rcu_read_unlock(); goto retry; + } fail: rcu_read_unlock(); -- cgit v1.2.3 From eebd49a4ffb420a991c606e54aa3c9f02857a334 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 29 May 2021 16:23:18 -0400 Subject: xfrm: remove the fragment check for ipv6 beet mode In commit 68dc022d04eb ("xfrm: BEET mode doesn't support fragments for inner packets"), it tried to fix the issue that in TX side the packet is fragmented before the ESP encapping while in the RX side the fragments always get reassembled before decapping with ESP. This is not true for IPv6. IPv6 is different, and it's using exthdr to save fragment info, as well as the ESP info. Exthdrs are added in TX and processed in RX both in order. So in the above case, the ESP decapping will be done earlier than the fragment reassembling in TX side. Here just remove the fragment check for the IPv6 inner packets to recover the fragments support for BEET mode. Fixes: 68dc022d04eb ("xfrm: BEET mode doesn't support fragments for inner packets") Reported-by: Xiumei Mu Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_output.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'net/xfrm') diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index e4cb0ff4dcf4..ac907b9d32d1 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -711,15 +711,8 @@ out: static int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb) { #if IS_ENABLED(CONFIG_IPV6) - unsigned int ptr = 0; int err; - if (x->outer_mode.encap == XFRM_MODE_BEET && - ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT, NULL, NULL) >= 0) { - net_warn_ratelimited("BEET mode doesn't support inner IPv6 fragments\n"); - return -EAFNOSUPPORT; - } - err = xfrm6_tunnel_check_size(skb); if (err) return err; -- cgit v1.2.3 From 6fd06963fa74197103cdbb4b494763127b3f2f34 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 7 Jun 2021 15:21:49 +0200 Subject: xfrm: Fix error reporting in xfrm_state_construct. When memory allocation for XFRMA_ENCAP or XFRMA_COADDR fails, the error will not be reported because the -ENOMEM assignment to the err variable is overwritten before. Fix this by moving these two in front of the function so that memory allocation failures will be reported. Reported-by: Tobias Brunner Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'net/xfrm') diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 5a0ef4361e43..817e714dedea 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -580,6 +580,20 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, copy_from_user_state(x, p); + if (attrs[XFRMA_ENCAP]) { + x->encap = kmemdup(nla_data(attrs[XFRMA_ENCAP]), + sizeof(*x->encap), GFP_KERNEL); + if (x->encap == NULL) + goto error; + } + + if (attrs[XFRMA_COADDR]) { + x->coaddr = kmemdup(nla_data(attrs[XFRMA_COADDR]), + sizeof(*x->coaddr), GFP_KERNEL); + if (x->coaddr == NULL) + goto error; + } + if (attrs[XFRMA_SA_EXTRA_FLAGS]) x->props.extra_flags = nla_get_u32(attrs[XFRMA_SA_EXTRA_FLAGS]); @@ -600,23 +614,9 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, attrs[XFRMA_ALG_COMP]))) goto error; - if (attrs[XFRMA_ENCAP]) { - x->encap = kmemdup(nla_data(attrs[XFRMA_ENCAP]), - sizeof(*x->encap), GFP_KERNEL); - if (x->encap == NULL) - goto error; - } - if (attrs[XFRMA_TFCPAD]) x->tfcpad = nla_get_u32(attrs[XFRMA_TFCPAD]); - if (attrs[XFRMA_COADDR]) { - x->coaddr = kmemdup(nla_data(attrs[XFRMA_COADDR]), - sizeof(*x->coaddr), GFP_KERNEL); - if (x->coaddr == NULL) - goto error; - } - xfrm_mark_get(attrs, &x->mark); xfrm_smark_init(attrs, &x->props.smark); -- cgit v1.2.3 From dd72fadf2186fc8a6018f97fe72f4d5ca05df440 Mon Sep 17 00:00:00 2001 From: Ayush Sawal Date: Tue, 22 Jun 2021 09:25:31 +0530 Subject: xfrm: Fix xfrm offload fallback fail case In case of xfrm offload, if xdo_dev_state_add() of driver returns -EOPNOTSUPP, xfrm offload fallback is failed. In xfrm state_add() both xso->dev and xso->real_dev are initialized to dev and when err(-EOPNOTSUPP) is returned only xso->dev is set to null. So in this scenario the condition in func validate_xmit_xfrm(), if ((x->xso.dev != dev) && (x->xso.real_dev == dev)) return skb; returns true, due to which skb is returned without calling esp_xmit() below which has fallback code. Hence the CRYPTO_FALLBACK is failing. So fixing this with by keeping x->xso.real_dev as NULL when err is returned in func xfrm_dev_state_add(). Fixes: bdfd2d1fa79a ("bonding/xfrm: use real_dev instead of slave_dev") Signed-off-by: Ayush Sawal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_device.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/xfrm') diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 6d6917b68856..e843b0d9e2a6 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -268,6 +268,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, xso->num_exthdrs = 0; xso->flags = 0; xso->dev = NULL; + xso->real_dev = NULL; dev_put(dev); if (err != -EOPNOTSUPP) -- cgit v1.2.3