summaryrefslogtreecommitdiff
path: root/net/netfilter
diff options
context:
space:
mode:
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/nf_conntrack_core.c18
-rw-r--r--net/netfilter/nf_conntrack_netlink.c3
-rw-r--r--net/netfilter/nf_conntrack_proto.c7
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c13
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c69
-rw-r--r--net/netfilter/nf_conntrack_standalone.c10
-rw-r--r--net/netfilter/nf_flow_table_core.c6
-rw-r--r--net/netfilter/nf_tables_api.c15
-rw-r--r--net/netfilter/nfnetlink_hook.c2
-rw-r--r--net/netfilter/nft_last.c32
-rw-r--r--net/netfilter/nft_nat.c4
11 files changed, 125 insertions, 54 deletions
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 96ba19fc8155..5c03e5106751 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -149,7 +149,15 @@ static void nf_conntrack_all_lock(void)
spin_lock(&nf_conntrack_locks_all_lock);
- nf_conntrack_locks_all = true;
+ /* For nf_contrack_locks_all, only the latest time when another
+ * CPU will see an update is controlled, by the "release" of the
+ * spin_lock below.
+ * The earliest time is not controlled, an thus KCSAN could detect
+ * a race when nf_conntract_lock() reads the variable.
+ * WRITE_ONCE() is used to ensure the compiler will not
+ * optimize the write.
+ */
+ WRITE_ONCE(nf_conntrack_locks_all, true);
for (i = 0; i < CONNTRACK_LOCKS; i++) {
spin_lock(&nf_conntrack_locks[i]);
@@ -662,8 +670,13 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
return false;
tstamp = nf_conn_tstamp_find(ct);
- if (tstamp && tstamp->stop == 0)
+ if (tstamp) {
+ s32 timeout = ct->timeout - nfct_time_stamp;
+
tstamp->stop = ktime_get_real_ns();
+ if (timeout < 0)
+ tstamp->stop -= jiffies_to_nsecs(-timeout);
+ }
if (nf_conntrack_event_report(IPCT_DESTROY, ct,
portid, report) < 0) {
@@ -2457,7 +2470,6 @@ i_see_dead_people:
}
list_for_each_entry(net, net_exit_list, exit_list) {
- nf_conntrack_proto_pernet_fini(net);
nf_conntrack_ecache_pernet_fini(net);
nf_conntrack_expect_pernet_fini(net);
free_percpu(net->ct.stat);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 4e1a9dba7077..e81af33b233b 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -218,6 +218,7 @@ static int ctnetlink_dump_helpinfo(struct sk_buff *skb,
if (!help)
return 0;
+ rcu_read_lock();
helper = rcu_dereference(help->helper);
if (!helper)
goto out;
@@ -233,9 +234,11 @@ static int ctnetlink_dump_helpinfo(struct sk_buff *skb,
nla_nest_end(skb, nest_helper);
out:
+ rcu_read_unlock();
return 0;
nla_put_failure:
+ rcu_read_unlock();
return -1;
}
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 55647409a9be..8f7a9837349c 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -697,13 +697,6 @@ void nf_conntrack_proto_pernet_init(struct net *net)
#endif
}
-void nf_conntrack_proto_pernet_fini(struct net *net)
-{
-#ifdef CONFIG_NF_CT_PROTO_GRE
- nf_ct_gre_keymap_flush(net);
-#endif
-}
-
module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
&nf_conntrack_htable_size, 0600);
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index db11e403d818..728eeb0aea87 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -55,19 +55,6 @@ static inline struct nf_gre_net *gre_pernet(struct net *net)
return &net->ct.nf_ct_proto.gre;
}
-void nf_ct_gre_keymap_flush(struct net *net)
-{
- struct nf_gre_net *net_gre = gre_pernet(net);
- struct nf_ct_gre_keymap *km, *tmp;
-
- spin_lock_bh(&keymap_lock);
- list_for_each_entry_safe(km, tmp, &net_gre->keymap_list, list) {
- list_del_rcu(&km->list);
- kfree_rcu(km, rcu);
- }
- spin_unlock_bh(&keymap_lock);
-}
-
static inline int gre_key_cmpfn(const struct nf_ct_gre_keymap *km,
const struct nf_conntrack_tuple *t)
{
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index f7e8baf59b51..3259416f2ea4 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -823,6 +823,22 @@ static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
return true;
}
+static bool tcp_can_early_drop(const struct nf_conn *ct)
+{
+ switch (ct->proto.tcp.state) {
+ case TCP_CONNTRACK_FIN_WAIT:
+ case TCP_CONNTRACK_LAST_ACK:
+ case TCP_CONNTRACK_TIME_WAIT:
+ case TCP_CONNTRACK_CLOSE:
+ case TCP_CONNTRACK_CLOSE_WAIT:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+}
+
/* Returns verdict for packet, or -1 for invalid. */
int nf_conntrack_tcp_packet(struct nf_conn *ct,
struct sk_buff *skb,
@@ -1030,10 +1046,30 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
if (index != TCP_RST_SET)
break;
- if (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) {
+ /* If we are closing, tuple might have been re-used already.
+ * last_index, last_ack, and all other ct fields used for
+ * sequence/window validation are outdated in that case.
+ *
+ * As the conntrack can already be expired by GC under pressure,
+ * just skip validation checks.
+ */
+ if (tcp_can_early_drop(ct))
+ goto in_window;
+
+ /* td_maxack might be outdated if we let a SYN through earlier */
+ if ((ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) &&
+ ct->proto.tcp.last_index != TCP_SYN_SET) {
u32 seq = ntohl(th->seq);
- if (before(seq, ct->proto.tcp.seen[!dir].td_maxack)) {
+ /* If we are not in established state and SEQ=0 this is most
+ * likely an answer to a SYN we let go through above (last_index
+ * can be updated due to out-of-order ACKs).
+ */
+ if (seq == 0 && !nf_conntrack_tcp_established(ct))
+ break;
+
+ if (before(seq, ct->proto.tcp.seen[!dir].td_maxack) &&
+ !tn->tcp_ignore_invalid_rst) {
/* Invalid RST */
spin_unlock_bh(&ct->lock);
nf_ct_l4proto_log_invalid(skb, ct, state, "invalid rst");
@@ -1134,6 +1170,16 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
nf_ct_kill_acct(ct, ctinfo, skb);
return NF_ACCEPT;
}
+
+ if (index == TCP_SYN_SET && old_state == TCP_CONNTRACK_SYN_SENT) {
+ /* do not renew timeout on SYN retransmit.
+ *
+ * Else port reuse by client or NAT middlebox can keep
+ * entry alive indefinitely (including nat info).
+ */
+ return NF_ACCEPT;
+ }
+
/* ESTABLISHED without SEEN_REPLY, i.e. mid-connection
* pickup with loose=1. Avoid large ESTABLISHED timeout.
*/
@@ -1155,22 +1201,6 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
return NF_ACCEPT;
}
-static bool tcp_can_early_drop(const struct nf_conn *ct)
-{
- switch (ct->proto.tcp.state) {
- case TCP_CONNTRACK_FIN_WAIT:
- case TCP_CONNTRACK_LAST_ACK:
- case TCP_CONNTRACK_TIME_WAIT:
- case TCP_CONNTRACK_CLOSE:
- case TCP_CONNTRACK_CLOSE_WAIT:
- return true;
- default:
- break;
- }
-
- return false;
-}
-
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
#include <linux/netfilter/nfnetlink.h>
@@ -1437,6 +1467,9 @@ void nf_conntrack_tcp_init_net(struct net *net)
*/
tn->tcp_be_liberal = 0;
+ /* If it's non-zero, we turn off RST sequence number check */
+ tn->tcp_ignore_invalid_rst = 0;
+
/* Max number of the retransmitted packets without receiving an (acceptable)
* ACK from the destination. If this number is reached, a shorter timer
* will be started.
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index f57a951c9b5e..214d9f9e499b 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -579,6 +579,7 @@ enum nf_ct_sysctl_index {
#endif
NF_SYSCTL_CT_PROTO_TCP_LOOSE,
NF_SYSCTL_CT_PROTO_TCP_LIBERAL,
+ NF_SYSCTL_CT_PROTO_TCP_IGNORE_INVALID_RST,
NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS,
NF_SYSCTL_CT_PROTO_TIMEOUT_UDP,
NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM,
@@ -798,6 +799,14 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
+ [NF_SYSCTL_CT_PROTO_TCP_IGNORE_INVALID_RST] = {
+ .procname = "nf_conntrack_tcp_ignore_invalid_rst",
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
[NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS] = {
.procname = "nf_conntrack_tcp_max_retrans",
.maxlen = sizeof(u8),
@@ -1004,6 +1013,7 @@ static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net,
XASSIGN(LOOSE, &tn->tcp_loose);
XASSIGN(LIBERAL, &tn->tcp_be_liberal);
XASSIGN(MAX_RETRANS, &tn->tcp_max_retrans);
+ XASSIGN(IGNORE_INVALID_RST, &tn->tcp_ignore_invalid_rst);
#undef XASSIGN
#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 1e50908b1b7e..551976e4284c 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -331,7 +331,11 @@ EXPORT_SYMBOL_GPL(flow_offload_add);
void flow_offload_refresh(struct nf_flowtable *flow_table,
struct flow_offload *flow)
{
- flow->timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
+ u32 timeout;
+
+ timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
+ if (READ_ONCE(flow->timeout) != timeout)
+ WRITE_ONCE(flow->timeout, timeout);
if (likely(!nf_flowtable_hw_offload(flow_table)))
return;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 390d4466567f..081437dd75b7 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3446,7 +3446,8 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
return 0;
err_destroy_flow_rule:
- nft_flow_rule_destroy(flow);
+ if (flow)
+ nft_flow_rule_destroy(flow);
err_release_rule:
nf_tables_rule_release(&ctx, rule);
err_release_expr:
@@ -8444,6 +8445,16 @@ static int nf_tables_commit_audit_alloc(struct list_head *adl,
return 0;
}
+static void nf_tables_commit_audit_free(struct list_head *adl)
+{
+ struct nft_audit_data *adp, *adn;
+
+ list_for_each_entry_safe(adp, adn, adl, list) {
+ list_del(&adp->list);
+ kfree(adp);
+ }
+}
+
static void nf_tables_commit_audit_collect(struct list_head *adl,
struct nft_table *table, u32 op)
{
@@ -8508,6 +8519,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
ret = nf_tables_commit_audit_alloc(&adl, trans->ctx.table);
if (ret) {
nf_tables_commit_chain_prepare_cancel(net);
+ nf_tables_commit_audit_free(&adl);
return ret;
}
if (trans->msg_type == NFT_MSG_NEWRULE ||
@@ -8517,6 +8529,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
ret = nf_tables_commit_chain_prepare(net, chain);
if (ret < 0) {
nf_tables_commit_chain_prepare_cancel(net);
+ nf_tables_commit_audit_free(&adl);
return ret;
}
}
diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c
index 50b4e3c9347a..202f57d17bab 100644
--- a/net/netfilter/nfnetlink_hook.c
+++ b/net/netfilter/nfnetlink_hook.c
@@ -174,7 +174,9 @@ static const struct nf_hook_entries *
nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *dev)
{
const struct nf_hook_entries *hook_head = NULL;
+#ifdef CONFIG_NETFILTER_INGRESS
struct net_device *netdev;
+#endif
switch (pf) {
case NFPROTO_IPV4:
diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c
index 913ac45167f2..304e33cbed9b 100644
--- a/net/netfilter/nft_last.c
+++ b/net/netfilter/nft_last.c
@@ -23,15 +23,21 @@ static int nft_last_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
{
struct nft_last_priv *priv = nft_expr_priv(expr);
u64 last_jiffies;
+ u32 last_set = 0;
int err;
- if (tb[NFTA_LAST_MSECS]) {
+ if (tb[NFTA_LAST_SET]) {
+ last_set = ntohl(nla_get_be32(tb[NFTA_LAST_SET]));
+ if (last_set == 1)
+ priv->last_set = 1;
+ }
+
+ if (last_set && tb[NFTA_LAST_MSECS]) {
err = nf_msecs_to_jiffies64(tb[NFTA_LAST_MSECS], &last_jiffies);
if (err < 0)
return err;
- priv->last_jiffies = jiffies + (unsigned long)last_jiffies;
- priv->last_set = 1;
+ priv->last_jiffies = jiffies - (unsigned long)last_jiffies;
}
return 0;
@@ -42,24 +48,30 @@ static void nft_last_eval(const struct nft_expr *expr,
{
struct nft_last_priv *priv = nft_expr_priv(expr);
- priv->last_jiffies = jiffies;
- priv->last_set = 1;
+ if (READ_ONCE(priv->last_jiffies) != jiffies)
+ WRITE_ONCE(priv->last_jiffies, jiffies);
+ if (READ_ONCE(priv->last_set) == 0)
+ WRITE_ONCE(priv->last_set, 1);
}
static int nft_last_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
struct nft_last_priv *priv = nft_expr_priv(expr);
+ unsigned long last_jiffies = READ_ONCE(priv->last_jiffies);
+ u32 last_set = READ_ONCE(priv->last_set);
__be64 msecs;
- if (time_before(jiffies, priv->last_jiffies))
- priv->last_set = 0;
+ if (time_before(jiffies, last_jiffies)) {
+ WRITE_ONCE(priv->last_set, 0);
+ last_set = 0;
+ }
- if (priv->last_set)
- msecs = nf_jiffies64_to_msecs(jiffies - priv->last_jiffies);
+ if (last_set)
+ msecs = nf_jiffies64_to_msecs(jiffies - last_jiffies);
else
msecs = 0;
- if (nla_put_be32(skb, NFTA_LAST_SET, htonl(priv->last_set)) ||
+ if (nla_put_be32(skb, NFTA_LAST_SET, htonl(last_set)) ||
nla_put_be64(skb, NFTA_LAST_MSECS, msecs, NFTA_LAST_PAD))
goto nla_put_failure;
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 0840c635b752..be1595d6979d 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -201,7 +201,9 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
alen = sizeof_field(struct nf_nat_range, min_addr.ip6);
break;
default:
- return -EAFNOSUPPORT;
+ if (tb[NFTA_NAT_REG_ADDR_MIN])
+ return -EAFNOSUPPORT;
+ break;
}
priv->family = family;