diff options
Diffstat (limited to 'net')
158 files changed, 1725 insertions, 1052 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index e45187b88220..41be38264493 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -131,7 +131,8 @@ int vlan_check_real_dev(struct net_device *real_dev, { const char *name = real_dev->name; - if (real_dev->features & NETIF_F_VLAN_CHALLENGED) { + if (real_dev->features & NETIF_F_VLAN_CHALLENGED || + real_dev->type != ARPHRD_ETHER) { pr_info("VLANs not supported on %s\n", name); NL_SET_ERR_MSG_MOD(extack, "VLANs not supported on device"); return -EOPNOTSUPP; diff --git a/net/atm/lec.c b/net/atm/lec.c index ffef658862db..a948dd47c3f3 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -181,6 +181,7 @@ static void lec_send(struct atm_vcc *vcc, struct sk_buff *skb) { struct net_device *dev = skb->dev; + unsigned int len = skb->len; ATM_SKB(skb)->vcc = vcc; atm_account_tx(vcc, skb); @@ -191,7 +192,7 @@ lec_send(struct atm_vcc *vcc, struct sk_buff *skb) } dev->stats.tx_packets++; - dev->stats.tx_bytes += skb->len; + dev->stats.tx_bytes += len; } static void lec_tx_timeout(struct net_device *dev, unsigned int txqueue) diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index aa6c714892ec..9f3b8b682adb 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -685,6 +685,15 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, break; } + if (ax25->ax25_dev) { + if (dev == ax25->ax25_dev->dev) { + rcu_read_unlock(); + break; + } + netdev_put(ax25->ax25_dev->dev, &ax25->dev_tracker); + ax25_dev_put(ax25->ax25_dev); + } + ax25->ax25_dev = ax25_dev_ax25dev(dev); if (!ax25->ax25_dev) { rcu_read_unlock(); @@ -692,6 +701,8 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, break; } ax25_fillin_cb(ax25, ax25->ax25_dev); + netdev_hold(dev, &ax25->dev_tracker, GFP_ATOMIC); + ax25_dev_hold(ax25->ax25_dev); rcu_read_unlock(); break; diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 07ae5dd1f150..b12645949ae5 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -325,8 +325,7 @@ batadv_iv_ogm_aggr_packet(int buff_pos, int packet_len, /* check if there is enough space for the optional TVLV */ next_buff_pos += ntohs(ogm_packet->tvlv_len); - return (next_buff_pos <= packet_len) && - (next_buff_pos <= BATADV_MAX_AGGREGATION_BYTES); + return next_buff_pos <= packet_len; } /* send a batman ogm to a given interface */ diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index ac11f1f08db0..d35479c465e2 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -113,8 +113,6 @@ static void batadv_v_hardif_neigh_init(struct batadv_hardif_neigh_node *hardif_neigh) { ewma_throughput_init(&hardif_neigh->bat_v.throughput); - INIT_WORK(&hardif_neigh->bat_v.metric_work, - batadv_v_elp_throughput_metric_update); } /** diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index 1d704574e6bf..b065578b4436 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -18,6 +18,7 @@ #include <linux/if_ether.h> #include <linux/jiffies.h> #include <linux/kref.h> +#include <linux/list.h> #include <linux/minmax.h> #include <linux/netdevice.h> #include <linux/nl80211.h> @@ -26,6 +27,7 @@ #include <linux/rcupdate.h> #include <linux/rtnetlink.h> #include <linux/skbuff.h> +#include <linux/slab.h> #include <linux/stddef.h> #include <linux/string.h> #include <linux/types.h> @@ -42,6 +44,18 @@ #include "send.h" /** + * struct batadv_v_metric_queue_entry - list of hardif neighbors which require + * and metric update + */ +struct batadv_v_metric_queue_entry { + /** @hardif_neigh: hardif neighbor scheduled for metric update */ + struct batadv_hardif_neigh_node *hardif_neigh; + + /** @list: list node for metric_queue */ + struct list_head list; +}; + +/** * batadv_v_elp_start_timer() - restart timer for ELP periodic work * @hard_iface: the interface for which the timer has to be reset */ @@ -59,25 +73,36 @@ static void batadv_v_elp_start_timer(struct batadv_hard_iface *hard_iface) /** * batadv_v_elp_get_throughput() - get the throughput towards a neighbour * @neigh: the neighbour for which the throughput has to be obtained + * @pthroughput: calculated throughput towards the given neighbour in multiples + * of 100kpbs (a value of '1' equals 0.1Mbps, '10' equals 1Mbps, etc). * - * Return: The throughput towards the given neighbour in multiples of 100kpbs - * (a value of '1' equals 0.1Mbps, '10' equals 1Mbps, etc). + * Return: true when value behind @pthroughput was set */ -static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh) +static bool batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh, + u32 *pthroughput) { struct batadv_hard_iface *hard_iface = neigh->if_incoming; + struct net_device *soft_iface = hard_iface->soft_iface; struct ethtool_link_ksettings link_settings; struct net_device *real_netdev; struct station_info sinfo; u32 throughput; int ret; + /* don't query throughput when no longer associated with any + * batman-adv interface + */ + if (!soft_iface) + return false; + /* if the user specified a customised value for this interface, then * return it directly */ throughput = atomic_read(&hard_iface->bat_v.throughput_override); - if (throughput != 0) - return throughput; + if (throughput != 0) { + *pthroughput = throughput; + return true; + } /* if this is a wireless device, then ask its throughput through * cfg80211 API @@ -104,27 +129,39 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh) * possible to delete this neighbor. For now set * the throughput metric to 0. */ - return 0; + *pthroughput = 0; + return true; } if (ret) goto default_throughput; - if (sinfo.filled & BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT)) - return sinfo.expected_throughput / 100; + if (sinfo.filled & BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT)) { + *pthroughput = sinfo.expected_throughput / 100; + return true; + } /* try to estimate the expected throughput based on reported tx * rates */ - if (sinfo.filled & BIT(NL80211_STA_INFO_TX_BITRATE)) - return cfg80211_calculate_bitrate(&sinfo.txrate) / 3; + if (sinfo.filled & BIT(NL80211_STA_INFO_TX_BITRATE)) { + *pthroughput = cfg80211_calculate_bitrate(&sinfo.txrate) / 3; + return true; + } goto default_throughput; } + /* only use rtnl_trylock because the elp worker will be cancelled while + * the rntl_lock is held. the cancel_delayed_work_sync() would otherwise + * wait forever when the elp work_item was started and it is then also + * trying to rtnl_lock + */ + if (!rtnl_trylock()) + return false; + /* if not a wifi interface, check if this device provides data via * ethtool (e.g. an Ethernet adapter) */ - rtnl_lock(); ret = __ethtool_get_link_ksettings(hard_iface->net_dev, &link_settings); rtnl_unlock(); if (ret == 0) { @@ -135,13 +172,15 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh) hard_iface->bat_v.flags &= ~BATADV_FULL_DUPLEX; throughput = link_settings.base.speed; - if (throughput && throughput != SPEED_UNKNOWN) - return throughput * 10; + if (throughput && throughput != SPEED_UNKNOWN) { + *pthroughput = throughput * 10; + return true; + } } default_throughput: if (!(hard_iface->bat_v.flags & BATADV_WARNING_DEFAULT)) { - batadv_info(hard_iface->soft_iface, + batadv_info(soft_iface, "WiFi driver or ethtool info does not provide information about link speeds on interface %s, therefore defaulting to hardcoded throughput values of %u.%1u Mbps. Consider overriding the throughput manually or checking your driver.\n", hard_iface->net_dev->name, BATADV_THROUGHPUT_DEFAULT_VALUE / 10, @@ -150,31 +189,26 @@ default_throughput: } /* if none of the above cases apply, return the base_throughput */ - return BATADV_THROUGHPUT_DEFAULT_VALUE; + *pthroughput = BATADV_THROUGHPUT_DEFAULT_VALUE; + return true; } /** * batadv_v_elp_throughput_metric_update() - worker updating the throughput * metric of a single hop neighbour - * @work: the work queue item + * @neigh: the neighbour to probe */ -void batadv_v_elp_throughput_metric_update(struct work_struct *work) +static void +batadv_v_elp_throughput_metric_update(struct batadv_hardif_neigh_node *neigh) { - struct batadv_hardif_neigh_node_bat_v *neigh_bat_v; - struct batadv_hardif_neigh_node *neigh; - - neigh_bat_v = container_of(work, struct batadv_hardif_neigh_node_bat_v, - metric_work); - neigh = container_of(neigh_bat_v, struct batadv_hardif_neigh_node, - bat_v); + u32 throughput; + bool valid; - ewma_throughput_add(&neigh->bat_v.throughput, - batadv_v_elp_get_throughput(neigh)); + valid = batadv_v_elp_get_throughput(neigh, &throughput); + if (!valid) + return; - /* decrement refcounter to balance increment performed before scheduling - * this task - */ - batadv_hardif_neigh_put(neigh); + ewma_throughput_add(&neigh->bat_v.throughput, throughput); } /** @@ -248,14 +282,16 @@ batadv_v_elp_wifi_neigh_probe(struct batadv_hardif_neigh_node *neigh) */ static void batadv_v_elp_periodic_work(struct work_struct *work) { + struct batadv_v_metric_queue_entry *metric_entry; + struct batadv_v_metric_queue_entry *metric_safe; struct batadv_hardif_neigh_node *hardif_neigh; struct batadv_hard_iface *hard_iface; struct batadv_hard_iface_bat_v *bat_v; struct batadv_elp_packet *elp_packet; + struct list_head metric_queue; struct batadv_priv *bat_priv; struct sk_buff *skb; u32 elp_interval; - bool ret; bat_v = container_of(work, struct batadv_hard_iface_bat_v, elp_wq.work); hard_iface = container_of(bat_v, struct batadv_hard_iface, bat_v); @@ -291,6 +327,8 @@ static void batadv_v_elp_periodic_work(struct work_struct *work) atomic_inc(&hard_iface->bat_v.elp_seqno); + INIT_LIST_HEAD(&metric_queue); + /* The throughput metric is updated on each sent packet. This way, if a * node is dead and no longer sends packets, batman-adv is still able to * react timely to its death. @@ -315,16 +353,28 @@ static void batadv_v_elp_periodic_work(struct work_struct *work) /* Reading the estimated throughput from cfg80211 is a task that * may sleep and that is not allowed in an rcu protected - * context. Therefore schedule a task for that. + * context. Therefore add it to metric_queue and process it + * outside rcu protected context. */ - ret = queue_work(batadv_event_workqueue, - &hardif_neigh->bat_v.metric_work); - - if (!ret) + metric_entry = kzalloc(sizeof(*metric_entry), GFP_ATOMIC); + if (!metric_entry) { batadv_hardif_neigh_put(hardif_neigh); + continue; + } + + metric_entry->hardif_neigh = hardif_neigh; + list_add(&metric_entry->list, &metric_queue); } rcu_read_unlock(); + list_for_each_entry_safe(metric_entry, metric_safe, &metric_queue, list) { + batadv_v_elp_throughput_metric_update(metric_entry->hardif_neigh); + + batadv_hardif_neigh_put(metric_entry->hardif_neigh); + list_del(&metric_entry->list); + kfree(metric_entry); + } + restart_timer: batadv_v_elp_start_timer(hard_iface); out: diff --git a/net/batman-adv/bat_v_elp.h b/net/batman-adv/bat_v_elp.h index 9e2740195fa2..c9cb0a307100 100644 --- a/net/batman-adv/bat_v_elp.h +++ b/net/batman-adv/bat_v_elp.h @@ -10,7 +10,6 @@ #include "main.h" #include <linux/skbuff.h> -#include <linux/workqueue.h> int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface); void batadv_v_elp_iface_disable(struct batadv_hard_iface *hard_iface); @@ -19,6 +18,5 @@ void batadv_v_elp_iface_activate(struct batadv_hard_iface *primary_iface, void batadv_v_elp_primary_iface_set(struct batadv_hard_iface *primary_iface); int batadv_v_elp_packet_recv(struct sk_buff *skb, struct batadv_hard_iface *if_incoming); -void batadv_v_elp_throughput_metric_update(struct work_struct *work); #endif /* _NET_BATMAN_ADV_BAT_V_ELP_H_ */ diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index e503ee0d896b..8f89ffe6020c 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -839,8 +839,7 @@ batadv_v_ogm_aggr_packet(int buff_pos, int packet_len, /* check if there is enough space for the optional TVLV */ next_buff_pos += ntohs(ogm2_packet->tvlv_len); - return (next_buff_pos <= packet_len) && - (next_buff_pos <= BATADV_MAX_AGGREGATION_BYTES); + return next_buff_pos <= packet_len; } /** diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 3c0a14a582e4..d4b71d34310f 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -3937,23 +3937,21 @@ static void batadv_tt_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, struct batadv_tvlv_tt_change *tt_change; struct batadv_tvlv_tt_data *tt_data; u16 num_entries, num_vlan; - size_t flex_size; + size_t tt_data_sz; if (tvlv_value_len < sizeof(*tt_data)) return; tt_data = tvlv_value; - tvlv_value_len -= sizeof(*tt_data); - num_vlan = ntohs(tt_data->num_vlan); - flex_size = flex_array_size(tt_data, vlan_data, num_vlan); - if (tvlv_value_len < flex_size) + tt_data_sz = struct_size(tt_data, vlan_data, num_vlan); + if (tvlv_value_len < tt_data_sz) return; tt_change = (struct batadv_tvlv_tt_change *)((void *)tt_data - + flex_size); - tvlv_value_len -= flex_size; + + tt_data_sz); + tvlv_value_len -= tt_data_sz; num_entries = batadv_tt_entries(tvlv_value_len); diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index f491bff8c51b..fe89f08533fe 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -596,9 +596,6 @@ struct batadv_hardif_neigh_node_bat_v { * neighbor */ unsigned long last_unicast_tx; - - /** @metric_work: work queue callback item for metric update */ - struct work_struct metric_work; }; /** diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 50cfec8ccac4..3c29778171c5 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -825,11 +825,16 @@ static struct sk_buff *chan_alloc_skb_cb(struct l2cap_chan *chan, unsigned long hdr_len, unsigned long len, int nb) { + struct sk_buff *skb; + /* Note that we must allocate using GFP_ATOMIC here as * this function is called originally from netdev hard xmit * function in atomic context. */ - return bt_skb_alloc(hdr_len + len, GFP_ATOMIC); + skb = bt_skb_alloc(hdr_len + len, GFP_ATOMIC); + if (!skb) + return ERR_PTR(-ENOMEM); + return skb; } static void chan_suspend_cb(struct l2cap_chan *chan) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index e7ec12437c8b..012fc107901a 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -57,6 +57,7 @@ DEFINE_RWLOCK(hci_dev_list_lock); /* HCI callback list */ LIST_HEAD(hci_cb_list); +DEFINE_MUTEX(hci_cb_list_lock); /* HCI ID Numbering */ static DEFINE_IDA(hci_index_ida); @@ -2972,7 +2973,9 @@ int hci_register_cb(struct hci_cb *cb) { BT_DBG("%p name %s", cb, cb->name); - list_add_tail_rcu(&cb->list, &hci_cb_list); + mutex_lock(&hci_cb_list_lock); + list_add_tail(&cb->list, &hci_cb_list); + mutex_unlock(&hci_cb_list_lock); return 0; } @@ -2982,8 +2985,9 @@ int hci_unregister_cb(struct hci_cb *cb) { BT_DBG("%p name %s", cb, cb->name); - list_del_rcu(&cb->list); - synchronize_rcu(); + mutex_lock(&hci_cb_list_lock); + list_del(&cb->list); + mutex_unlock(&hci_cb_list_lock); return 0; } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 2cc7a9306350..903b0b52692a 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3391,23 +3391,30 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, void *data, hci_update_scan(hdev); } - params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type); - if (params) { - switch (params->auto_connect) { - case HCI_AUTO_CONN_LINK_LOSS: - if (ev->reason != HCI_ERROR_CONNECTION_TIMEOUT) + /* Re-enable passive scanning if disconnected device is marked + * as auto-connectable. + */ + if (conn->type == LE_LINK) { + params = hci_conn_params_lookup(hdev, &conn->dst, + conn->dst_type); + if (params) { + switch (params->auto_connect) { + case HCI_AUTO_CONN_LINK_LOSS: + if (ev->reason != HCI_ERROR_CONNECTION_TIMEOUT) + break; + fallthrough; + + case HCI_AUTO_CONN_DIRECT: + case HCI_AUTO_CONN_ALWAYS: + hci_pend_le_list_del_init(params); + hci_pend_le_list_add(params, + &hdev->pend_le_conns); + hci_update_passive_scan(hdev); break; - fallthrough; - case HCI_AUTO_CONN_DIRECT: - case HCI_AUTO_CONN_ALWAYS: - hci_pend_le_list_del_init(params); - hci_pend_le_list_add(params, &hdev->pend_le_conns); - hci_update_passive_scan(hdev); - break; - - default: - break; + default: + break; + } } } diff --git a/net/bluetooth/hidp/Kconfig b/net/bluetooth/hidp/Kconfig index 6746be07e222..e08aae35351a 100644 --- a/net/bluetooth/hidp/Kconfig +++ b/net/bluetooth/hidp/Kconfig @@ -1,8 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config BT_HIDP tristate "HIDP protocol support" - depends on BT_BREDR && INPUT && HID_SUPPORT - select HID + depends on BT_BREDR && HID help HIDP (Human Interface Device Protocol) is a transport layer for HID reports. HIDP is required for the Bluetooth Human diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 44acddf58a0c..0cb52a3308ba 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -2187,11 +2187,6 @@ done: return HCI_LM_ACCEPT; } -static bool iso_match(struct hci_conn *hcon) -{ - return hcon->type == ISO_LINK || hcon->type == LE_LINK; -} - static void iso_connect_cfm(struct hci_conn *hcon, __u8 status) { if (hcon->type != ISO_LINK) { @@ -2373,7 +2368,6 @@ drop: static struct hci_cb iso_cb = { .name = "ISO", - .match = iso_match, .connect_cfm = iso_connect_cfm, .disconn_cfm = iso_disconn_cfm, }; diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 27b4c4a2ba1f..c27ea70f71e1 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -119,7 +119,6 @@ static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn, { struct l2cap_chan *c; - mutex_lock(&conn->chan_lock); c = __l2cap_get_chan_by_scid(conn, cid); if (c) { /* Only lock if chan reference is not 0 */ @@ -127,7 +126,6 @@ static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn, if (c) l2cap_chan_lock(c); } - mutex_unlock(&conn->chan_lock); return c; } @@ -140,7 +138,6 @@ static struct l2cap_chan *l2cap_get_chan_by_dcid(struct l2cap_conn *conn, { struct l2cap_chan *c; - mutex_lock(&conn->chan_lock); c = __l2cap_get_chan_by_dcid(conn, cid); if (c) { /* Only lock if chan reference is not 0 */ @@ -148,7 +145,6 @@ static struct l2cap_chan *l2cap_get_chan_by_dcid(struct l2cap_conn *conn, if (c) l2cap_chan_lock(c); } - mutex_unlock(&conn->chan_lock); return c; } @@ -418,7 +414,7 @@ static void l2cap_chan_timeout(struct work_struct *work) if (!conn) return; - mutex_lock(&conn->chan_lock); + mutex_lock(&conn->lock); /* __set_chan_timer() calls l2cap_chan_hold(chan) while scheduling * this work. No need to call l2cap_chan_hold(chan) here again. */ @@ -439,7 +435,7 @@ static void l2cap_chan_timeout(struct work_struct *work) l2cap_chan_unlock(chan); l2cap_chan_put(chan); - mutex_unlock(&conn->chan_lock); + mutex_unlock(&conn->lock); } struct l2cap_chan *l2cap_chan_create(void) @@ -636,14 +632,15 @@ void __l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) test_bit(FLAG_HOLD_HCI_CONN, &chan->flags)) hci_conn_hold(conn->hcon); - list_add(&chan->list, &conn->chan_l); + /* Append to the list since the order matters for ECRED */ + list_add_tail(&chan->list, &conn->chan_l); } void l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) { - mutex_lock(&conn->chan_lock); + mutex_lock(&conn->lock); __l2cap_chan_add(conn, chan); - mutex_unlock(&conn->chan_lock); + mutex_unlock(&conn->lock); } void l2cap_chan_del(struct l2cap_chan *chan, int err) @@ -731,9 +728,9 @@ void l2cap_chan_list(struct l2cap_conn *conn, l2cap_chan_func_t func, if (!conn) return; - mutex_lock(&conn->chan_lock); + mutex_lock(&conn->lock); __l2cap_chan_list(conn, func, data); - mutex_unlock(&conn->chan_lock); + mutex_unlock(&conn->lock); } EXPORT_SYMBOL_GPL(l2cap_chan_list); @@ -745,7 +742,7 @@ static void l2cap_conn_update_id_addr(struct work_struct *work) struct hci_conn *hcon = conn->hcon; struct l2cap_chan *chan; - mutex_lock(&conn->chan_lock); + mutex_lock(&conn->lock); list_for_each_entry(chan, &conn->chan_l, list) { l2cap_chan_lock(chan); @@ -754,7 +751,7 @@ static void l2cap_conn_update_id_addr(struct work_struct *work) l2cap_chan_unlock(chan); } - mutex_unlock(&conn->chan_lock); + mutex_unlock(&conn->lock); } static void l2cap_chan_le_connect_reject(struct l2cap_chan *chan) @@ -948,6 +945,16 @@ static u8 l2cap_get_ident(struct l2cap_conn *conn) return id; } +static void l2cap_send_acl(struct l2cap_conn *conn, struct sk_buff *skb, + u8 flags) +{ + /* Check if the hcon still valid before attempting to send */ + if (hci_conn_valid(conn->hcon->hdev, conn->hcon)) + hci_send_acl(conn->hchan, skb, flags); + else + kfree_skb(skb); +} + static void l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len, void *data) { @@ -970,7 +977,7 @@ static void l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len, bt_cb(skb)->force_active = BT_POWER_FORCE_ACTIVE_ON; skb->priority = HCI_PRIO_MAX; - hci_send_acl(conn->hchan, skb, flags); + l2cap_send_acl(conn, skb, flags); } static void l2cap_do_send(struct l2cap_chan *chan, struct sk_buff *skb) @@ -1497,8 +1504,6 @@ static void l2cap_conn_start(struct l2cap_conn *conn) BT_DBG("conn %p", conn); - mutex_lock(&conn->chan_lock); - list_for_each_entry_safe(chan, tmp, &conn->chan_l, list) { l2cap_chan_lock(chan); @@ -1567,8 +1572,6 @@ static void l2cap_conn_start(struct l2cap_conn *conn) l2cap_chan_unlock(chan); } - - mutex_unlock(&conn->chan_lock); } static void l2cap_le_conn_ready(struct l2cap_conn *conn) @@ -1614,7 +1617,7 @@ static void l2cap_conn_ready(struct l2cap_conn *conn) if (hcon->type == ACL_LINK) l2cap_request_info(conn); - mutex_lock(&conn->chan_lock); + mutex_lock(&conn->lock); list_for_each_entry(chan, &conn->chan_l, list) { @@ -1632,7 +1635,7 @@ static void l2cap_conn_ready(struct l2cap_conn *conn) l2cap_chan_unlock(chan); } - mutex_unlock(&conn->chan_lock); + mutex_unlock(&conn->lock); if (hcon->type == LE_LINK) l2cap_le_conn_ready(conn); @@ -1647,14 +1650,10 @@ static void l2cap_conn_unreliable(struct l2cap_conn *conn, int err) BT_DBG("conn %p", conn); - mutex_lock(&conn->chan_lock); - list_for_each_entry(chan, &conn->chan_l, list) { if (test_bit(FLAG_FORCE_RELIABLE, &chan->flags)) l2cap_chan_set_err(chan, err); } - - mutex_unlock(&conn->chan_lock); } static void l2cap_info_timeout(struct work_struct *work) @@ -1665,7 +1664,9 @@ static void l2cap_info_timeout(struct work_struct *work) conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; conn->info_ident = 0; + mutex_lock(&conn->lock); l2cap_conn_start(conn); + mutex_unlock(&conn->lock); } /* @@ -1757,6 +1758,8 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err) BT_DBG("hcon %p conn %p, err %d", hcon, conn, err); + mutex_lock(&conn->lock); + kfree_skb(conn->rx_skb); skb_queue_purge(&conn->pending_rx); @@ -1775,8 +1778,6 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err) /* Force the connection to be immediately dropped */ hcon->disc_timeout = 0; - mutex_lock(&conn->chan_lock); - /* Kill channels */ list_for_each_entry_safe(chan, l, &conn->chan_l, list) { l2cap_chan_hold(chan); @@ -1790,15 +1791,14 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err) l2cap_chan_put(chan); } - mutex_unlock(&conn->chan_lock); - - hci_chan_del(conn->hchan); - if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) cancel_delayed_work_sync(&conn->info_timer); - hcon->l2cap_data = NULL; + hci_chan_del(conn->hchan); conn->hchan = NULL; + + hcon->l2cap_data = NULL; + mutex_unlock(&conn->lock); l2cap_conn_put(conn); } @@ -2916,8 +2916,6 @@ static void l2cap_raw_recv(struct l2cap_conn *conn, struct sk_buff *skb) BT_DBG("conn %p", conn); - mutex_lock(&conn->chan_lock); - list_for_each_entry(chan, &conn->chan_l, list) { if (chan->chan_type != L2CAP_CHAN_RAW) continue; @@ -2932,8 +2930,6 @@ static void l2cap_raw_recv(struct l2cap_conn *conn, struct sk_buff *skb) if (chan->ops->recv(chan, nskb)) kfree_skb(nskb); } - - mutex_unlock(&conn->chan_lock); } /* ---- L2CAP signalling commands ---- */ @@ -3776,7 +3772,11 @@ static void l2cap_ecred_rsp_defer(struct l2cap_chan *chan, void *data) struct l2cap_ecred_conn_rsp *rsp_flex = container_of(&rsp->pdu.rsp, struct l2cap_ecred_conn_rsp, hdr); - if (test_bit(FLAG_ECRED_CONN_REQ_SENT, &chan->flags)) + /* Check if channel for outgoing connection or if it wasn't deferred + * since in those cases it must be skipped. + */ + if (test_bit(FLAG_ECRED_CONN_REQ_SENT, &chan->flags) || + !test_and_clear_bit(FLAG_DEFER_SETUP, &chan->flags)) return; /* Reset ident so only one response is sent */ @@ -3952,7 +3952,6 @@ static void l2cap_connect(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, goto response; } - mutex_lock(&conn->chan_lock); l2cap_chan_lock(pchan); /* Check if the ACL is secure enough (if not SDP) */ @@ -4059,7 +4058,6 @@ response: } l2cap_chan_unlock(pchan); - mutex_unlock(&conn->chan_lock); l2cap_chan_put(pchan); } @@ -4098,27 +4096,19 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn, BT_DBG("dcid 0x%4.4x scid 0x%4.4x result 0x%2.2x status 0x%2.2x", dcid, scid, result, status); - mutex_lock(&conn->chan_lock); - if (scid) { chan = __l2cap_get_chan_by_scid(conn, scid); - if (!chan) { - err = -EBADSLT; - goto unlock; - } + if (!chan) + return -EBADSLT; } else { chan = __l2cap_get_chan_by_ident(conn, cmd->ident); - if (!chan) { - err = -EBADSLT; - goto unlock; - } + if (!chan) + return -EBADSLT; } chan = l2cap_chan_hold_unless_zero(chan); - if (!chan) { - err = -EBADSLT; - goto unlock; - } + if (!chan) + return -EBADSLT; err = 0; @@ -4156,9 +4146,6 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn, l2cap_chan_unlock(chan); l2cap_chan_put(chan); -unlock: - mutex_unlock(&conn->chan_lock); - return err; } @@ -4446,11 +4433,7 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn, chan->ops->set_shutdown(chan); - l2cap_chan_unlock(chan); - mutex_lock(&conn->chan_lock); - l2cap_chan_lock(chan); l2cap_chan_del(chan, ECONNRESET); - mutex_unlock(&conn->chan_lock); chan->ops->close(chan); @@ -4487,11 +4470,7 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, return 0; } - l2cap_chan_unlock(chan); - mutex_lock(&conn->chan_lock); - l2cap_chan_lock(chan); l2cap_chan_del(chan, 0); - mutex_unlock(&conn->chan_lock); chan->ops->close(chan); @@ -4689,13 +4668,9 @@ static int l2cap_le_connect_rsp(struct l2cap_conn *conn, BT_DBG("dcid 0x%4.4x mtu %u mps %u credits %u result 0x%2.2x", dcid, mtu, mps, credits, result); - mutex_lock(&conn->chan_lock); - chan = __l2cap_get_chan_by_ident(conn, cmd->ident); - if (!chan) { - err = -EBADSLT; - goto unlock; - } + if (!chan) + return -EBADSLT; err = 0; @@ -4743,9 +4718,6 @@ static int l2cap_le_connect_rsp(struct l2cap_conn *conn, l2cap_chan_unlock(chan); -unlock: - mutex_unlock(&conn->chan_lock); - return err; } @@ -4857,7 +4829,6 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn, goto response; } - mutex_lock(&conn->chan_lock); l2cap_chan_lock(pchan); if (!smp_sufficient_security(conn->hcon, pchan->sec_level, @@ -4923,7 +4894,6 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn, response_unlock: l2cap_chan_unlock(pchan); - mutex_unlock(&conn->chan_lock); l2cap_chan_put(pchan); if (result == L2CAP_CR_PEND) @@ -5057,7 +5027,6 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, goto response; } - mutex_lock(&conn->chan_lock); l2cap_chan_lock(pchan); if (!smp_sufficient_security(conn->hcon, pchan->sec_level, @@ -5132,7 +5101,6 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, unlock: l2cap_chan_unlock(pchan); - mutex_unlock(&conn->chan_lock); l2cap_chan_put(pchan); response: @@ -5169,8 +5137,6 @@ static inline int l2cap_ecred_conn_rsp(struct l2cap_conn *conn, BT_DBG("mtu %u mps %u credits %u result 0x%4.4x", mtu, mps, credits, result); - mutex_lock(&conn->chan_lock); - cmd_len -= sizeof(*rsp); list_for_each_entry_safe(chan, tmp, &conn->chan_l, list) { @@ -5256,8 +5222,6 @@ static inline int l2cap_ecred_conn_rsp(struct l2cap_conn *conn, l2cap_chan_unlock(chan); } - mutex_unlock(&conn->chan_lock); - return err; } @@ -5370,8 +5334,6 @@ static inline int l2cap_le_command_rej(struct l2cap_conn *conn, if (cmd_len < sizeof(*rej)) return -EPROTO; - mutex_lock(&conn->chan_lock); - chan = __l2cap_get_chan_by_ident(conn, cmd->ident); if (!chan) goto done; @@ -5386,7 +5348,6 @@ static inline int l2cap_le_command_rej(struct l2cap_conn *conn, l2cap_chan_put(chan); done: - mutex_unlock(&conn->chan_lock); return 0; } @@ -6841,8 +6802,12 @@ static void process_pending_rx(struct work_struct *work) BT_DBG(""); + mutex_lock(&conn->lock); + while ((skb = skb_dequeue(&conn->pending_rx))) l2cap_recv_frame(conn, skb); + + mutex_unlock(&conn->lock); } static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon) @@ -6881,7 +6846,7 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon) conn->local_fixed_chan |= L2CAP_FC_SMP_BREDR; mutex_init(&conn->ident_lock); - mutex_init(&conn->chan_lock); + mutex_init(&conn->lock); INIT_LIST_HEAD(&conn->chan_l); INIT_LIST_HEAD(&conn->users); @@ -7072,7 +7037,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, } } - mutex_lock(&conn->chan_lock); + mutex_lock(&conn->lock); l2cap_chan_lock(chan); if (cid && __l2cap_get_chan_by_dcid(conn, cid)) { @@ -7113,7 +7078,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, chan_unlock: l2cap_chan_unlock(chan); - mutex_unlock(&conn->chan_lock); + mutex_unlock(&conn->lock); done: hci_dev_unlock(hdev); hci_dev_put(hdev); @@ -7217,11 +7182,6 @@ static struct l2cap_chan *l2cap_global_fixed_chan(struct l2cap_chan *c, return NULL; } -static bool l2cap_match(struct hci_conn *hcon) -{ - return hcon->type == ACL_LINK || hcon->type == LE_LINK; -} - static void l2cap_connect_cfm(struct hci_conn *hcon, u8 status) { struct hci_dev *hdev = hcon->hdev; @@ -7229,6 +7189,9 @@ static void l2cap_connect_cfm(struct hci_conn *hcon, u8 status) struct l2cap_chan *pchan; u8 dst_type; + if (hcon->type != ACL_LINK && hcon->type != LE_LINK) + return; + BT_DBG("hcon %p bdaddr %pMR status %d", hcon, &hcon->dst, status); if (status) { @@ -7293,6 +7256,9 @@ int l2cap_disconn_ind(struct hci_conn *hcon) static void l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason) { + if (hcon->type != ACL_LINK && hcon->type != LE_LINK) + return; + BT_DBG("hcon %p reason %d", hcon, reason); l2cap_conn_del(hcon, bt_to_errno(reason)); @@ -7325,7 +7291,7 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) BT_DBG("conn %p status 0x%2.2x encrypt %u", conn, status, encrypt); - mutex_lock(&conn->chan_lock); + mutex_lock(&conn->lock); list_for_each_entry(chan, &conn->chan_l, list) { l2cap_chan_lock(chan); @@ -7399,7 +7365,7 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) l2cap_chan_unlock(chan); } - mutex_unlock(&conn->chan_lock); + mutex_unlock(&conn->lock); } /* Append fragment into frame respecting the maximum len of rx_skb */ @@ -7466,19 +7432,45 @@ static void l2cap_recv_reset(struct l2cap_conn *conn) conn->rx_len = 0; } +struct l2cap_conn *l2cap_conn_hold_unless_zero(struct l2cap_conn *c) +{ + if (!c) + return NULL; + + BT_DBG("conn %p orig refcnt %u", c, kref_read(&c->ref)); + + if (!kref_get_unless_zero(&c->ref)) + return NULL; + + return c; +} + void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) { - struct l2cap_conn *conn = hcon->l2cap_data; + struct l2cap_conn *conn; int len; + /* Lock hdev to access l2cap_data to avoid race with l2cap_conn_del */ + hci_dev_lock(hcon->hdev); + + conn = hcon->l2cap_data; + if (!conn) conn = l2cap_conn_add(hcon); - if (!conn) - goto drop; + conn = l2cap_conn_hold_unless_zero(conn); + + hci_dev_unlock(hcon->hdev); + + if (!conn) { + kfree_skb(skb); + return; + } BT_DBG("conn %p len %u flags 0x%x", conn, skb->len, flags); + mutex_lock(&conn->lock); + switch (flags) { case ACL_START: case ACL_START_NO_FLUSH: @@ -7503,7 +7495,7 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) if (len == skb->len) { /* Complete frame received */ l2cap_recv_frame(conn, skb); - return; + goto unlock; } BT_DBG("Start: total len %d, frag len %u", len, skb->len); @@ -7567,11 +7559,13 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) drop: kfree_skb(skb); +unlock: + mutex_unlock(&conn->lock); + l2cap_conn_put(conn); } static struct hci_cb l2cap_cb = { .name = "L2CAP", - .match = l2cap_match, .connect_cfm = l2cap_connect_cfm, .disconn_cfm = l2cap_disconn_cfm, .security_cfm = l2cap_security_cfm, diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 46ea0bee2259..acd11b268b98 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1326,9 +1326,10 @@ static int l2cap_sock_shutdown(struct socket *sock, int how) /* prevent sk structure from being freed whilst unlocked */ sock_hold(sk); - chan = l2cap_pi(sk)->chan; /* prevent chan structure from being freed whilst unlocked */ - l2cap_chan_hold(chan); + chan = l2cap_chan_hold_unless_zero(l2cap_pi(sk)->chan); + if (!chan) + goto shutdown_already; BT_DBG("chan %p state %s", chan, state_to_string(chan->state)); @@ -1358,22 +1359,20 @@ static int l2cap_sock_shutdown(struct socket *sock, int how) release_sock(sk); l2cap_chan_lock(chan); - conn = chan->conn; - if (conn) - /* prevent conn structure from being freed */ - l2cap_conn_get(conn); + /* prevent conn structure from being freed */ + conn = l2cap_conn_hold_unless_zero(chan->conn); l2cap_chan_unlock(chan); if (conn) /* mutex lock must be taken before l2cap_chan_lock() */ - mutex_lock(&conn->chan_lock); + mutex_lock(&conn->lock); l2cap_chan_lock(chan); l2cap_chan_close(chan, 0); l2cap_chan_unlock(chan); if (conn) { - mutex_unlock(&conn->chan_lock); + mutex_unlock(&conn->lock); l2cap_conn_put(conn); } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index f53304cb09db..621c555f639b 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -9660,6 +9660,9 @@ void mgmt_device_connected(struct hci_dev *hdev, struct hci_conn *conn, sizeof(*ev) + (name ? eir_precalc_len(name_len) : 0) + eir_precalc_len(sizeof(conn->dev_class))); + if (!skb) + return; + ev = skb_put(skb, sizeof(*ev)); bacpy(&ev->addr.bdaddr, &conn->dst); ev->addr.type = link_to_bdaddr(conn->type, conn->dst_type); @@ -10413,6 +10416,8 @@ void mgmt_remote_name(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, skb = mgmt_alloc_skb(hdev, MGMT_EV_DEVICE_FOUND, sizeof(*ev) + (name ? eir_precalc_len(name_len) : 0)); + if (!skb) + return; ev = skb_put(skb, sizeof(*ev)); bacpy(&ev->addr.bdaddr, bdaddr); diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 4c56ca5a216c..ad5177e3a69b 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -2134,11 +2134,6 @@ static int rfcomm_run(void *unused) return 0; } -static bool rfcomm_match(struct hci_conn *hcon) -{ - return hcon->type == ACL_LINK; -} - static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt) { struct rfcomm_session *s; @@ -2185,7 +2180,6 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt) static struct hci_cb rfcomm_cb = { .name = "RFCOMM", - .match = rfcomm_match, .security_cfm = rfcomm_security_cfm }; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index aa7bfe26cb40..5d1bc0d6aee0 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -107,6 +107,14 @@ static void sco_conn_put(struct sco_conn *conn) kref_put(&conn->ref, sco_conn_free); } +static struct sco_conn *sco_conn_hold(struct sco_conn *conn) +{ + BT_DBG("conn %p refcnt %u", conn, kref_read(&conn->ref)); + + kref_get(&conn->ref); + return conn; +} + static struct sco_conn *sco_conn_hold_unless_zero(struct sco_conn *conn) { if (!conn) @@ -1353,6 +1361,7 @@ static void sco_conn_ready(struct sco_conn *conn) bacpy(&sco_pi(sk)->src, &conn->hcon->src); bacpy(&sco_pi(sk)->dst, &conn->hcon->dst); + sco_conn_hold(conn); hci_conn_hold(conn->hcon); __sco_chan_add(conn, sk, parent); @@ -1398,27 +1407,30 @@ int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) return lm; } -static bool sco_match(struct hci_conn *hcon) -{ - return hcon->type == SCO_LINK || hcon->type == ESCO_LINK; -} - static void sco_connect_cfm(struct hci_conn *hcon, __u8 status) { + if (hcon->type != SCO_LINK && hcon->type != ESCO_LINK) + return; + BT_DBG("hcon %p bdaddr %pMR status %u", hcon, &hcon->dst, status); if (!status) { struct sco_conn *conn; conn = sco_conn_add(hcon); - if (conn) + if (conn) { sco_conn_ready(conn); + sco_conn_put(conn); + } } else sco_conn_del(hcon, bt_to_errno(status)); } static void sco_disconn_cfm(struct hci_conn *hcon, __u8 reason) { + if (hcon->type != SCO_LINK && hcon->type != ESCO_LINK) + return; + BT_DBG("hcon %p reason %d", hcon, reason); sco_conn_del(hcon, bt_to_errno(reason)); @@ -1444,7 +1456,6 @@ drop: static struct hci_cb sco_cb = { .name = "SCO", - .match = sco_match, .connect_cfm = sco_connect_cfm, .disconn_cfm = sco_disconn_cfm, }; diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 8f6f7db48d4e..7cb192cbd65f 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -660,12 +660,9 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size, void __user *data_in = u64_to_user_ptr(kattr->test.data_in); void *data; - if (size < ETH_HLEN || size > PAGE_SIZE - headroom - tailroom) + if (user_size < ETH_HLEN || user_size > PAGE_SIZE - headroom - tailroom) return ERR_PTR(-EINVAL); - if (user_size > size) - return ERR_PTR(-EMSGSIZE); - size = SKB_DATA_ALIGN(size); data = kzalloc(size + headroom + tailroom, GFP_USER); if (!data) diff --git a/net/can/af_can.c b/net/can/af_can.c index 01f3fbb3b67d..65230e81fa08 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -287,8 +287,8 @@ int can_send(struct sk_buff *skb, int loop) netif_rx(newskb); /* update statistics */ - pkg_stats->tx_frames++; - pkg_stats->tx_frames_delta++; + atomic_long_inc(&pkg_stats->tx_frames); + atomic_long_inc(&pkg_stats->tx_frames_delta); return 0; @@ -647,8 +647,8 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev) int matches; /* update statistics */ - pkg_stats->rx_frames++; - pkg_stats->rx_frames_delta++; + atomic_long_inc(&pkg_stats->rx_frames); + atomic_long_inc(&pkg_stats->rx_frames_delta); /* create non-zero unique skb identifier together with *skb */ while (!(can_skb_prv(skb)->skbcnt)) @@ -669,8 +669,8 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev) consume_skb(skb); if (matches > 0) { - pkg_stats->matches++; - pkg_stats->matches_delta++; + atomic_long_inc(&pkg_stats->matches); + atomic_long_inc(&pkg_stats->matches_delta); } } diff --git a/net/can/af_can.h b/net/can/af_can.h index 7c2d9161e224..22f3352c77fe 100644 --- a/net/can/af_can.h +++ b/net/can/af_can.h @@ -66,9 +66,9 @@ struct receiver { struct can_pkg_stats { unsigned long jiffies_init; - unsigned long rx_frames; - unsigned long tx_frames; - unsigned long matches; + atomic_long_t rx_frames; + atomic_long_t tx_frames; + atomic_long_t matches; unsigned long total_rx_rate; unsigned long total_tx_rate; @@ -82,9 +82,9 @@ struct can_pkg_stats { unsigned long max_tx_rate; unsigned long max_rx_match_ratio; - unsigned long rx_frames_delta; - unsigned long tx_frames_delta; - unsigned long matches_delta; + atomic_long_t rx_frames_delta; + atomic_long_t tx_frames_delta; + atomic_long_t matches_delta; }; /* persistent statistics */ diff --git a/net/can/bcm.c b/net/can/bcm.c index 217049fa496e..526cb6cd901f 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1011,13 +1011,12 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, op->ifindex = ifindex; /* initialize uninitialized (kzalloc) structure */ - hrtimer_init(&op->timer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL_SOFT); - op->timer.function = bcm_tx_timeout_handler; + hrtimer_setup(&op->timer, bcm_tx_timeout_handler, CLOCK_MONOTONIC, + HRTIMER_MODE_REL_SOFT); /* currently unused in tx_ops */ - hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL_SOFT); + hrtimer_setup(&op->thrtimer, hrtimer_dummy_timeout, CLOCK_MONOTONIC, + HRTIMER_MODE_REL_SOFT); /* add this bcm_op to the list of the tx_ops */ list_add(&op->list, &bo->tx_ops); @@ -1192,13 +1191,10 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, op->rx_ifindex = ifindex; /* initialize uninitialized (kzalloc) structure */ - hrtimer_init(&op->timer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL_SOFT); - op->timer.function = bcm_rx_timeout_handler; - - hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL_SOFT); - op->thrtimer.function = bcm_rx_thr_handler; + hrtimer_setup(&op->timer, bcm_rx_timeout_handler, CLOCK_MONOTONIC, + HRTIMER_MODE_REL_SOFT); + hrtimer_setup(&op->thrtimer, bcm_rx_thr_handler, CLOCK_MONOTONIC, + HRTIMER_MODE_REL_SOFT); /* add this bcm_op to the list of the rx_ops */ list_add(&op->list, &bo->rx_ops); diff --git a/net/can/isotp.c b/net/can/isotp.c index 16046931542a..442c343afe1f 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -1634,12 +1634,10 @@ static int isotp_init(struct sock *sk) so->rx.buflen = ARRAY_SIZE(so->rx.sbuf); so->tx.buflen = ARRAY_SIZE(so->tx.sbuf); - hrtimer_init(&so->rxtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); - so->rxtimer.function = isotp_rx_timer_handler; - hrtimer_init(&so->txtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); - so->txtimer.function = isotp_tx_timer_handler; - hrtimer_init(&so->txfrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); - so->txfrtimer.function = isotp_txfr_timer_handler; + hrtimer_setup(&so->rxtimer, isotp_rx_timer_handler, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); + hrtimer_setup(&so->txtimer, isotp_tx_timer_handler, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); + hrtimer_setup(&so->txfrtimer, isotp_txfr_timer_handler, CLOCK_MONOTONIC, + HRTIMER_MODE_REL_SOFT); init_waitqueue_head(&so->wait); spin_lock_init(&so->rx_lock); diff --git a/net/can/j1939/bus.c b/net/can/j1939/bus.c index 486687901602..39844f14eed8 100644 --- a/net/can/j1939/bus.c +++ b/net/can/j1939/bus.c @@ -158,8 +158,8 @@ struct j1939_ecu *j1939_ecu_create_locked(struct j1939_priv *priv, name_t name) ecu->addr = J1939_IDLE_ADDR; ecu->name = name; - hrtimer_init(&ecu->ac_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); - ecu->ac_timer.function = j1939_ecu_timer_handler; + hrtimer_setup(&ecu->ac_timer, j1939_ecu_timer_handler, CLOCK_MONOTONIC, + HRTIMER_MODE_REL_SOFT); INIT_LIST_HEAD(&ecu->list); j1939_priv_get(priv); diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index 305dd72c844c..17226b2341d0 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -1132,7 +1132,7 @@ static int j1939_sk_send_loop(struct j1939_priv *priv, struct sock *sk, todo_size = size; - while (todo_size) { + do { struct j1939_sk_buff_cb *skcb; segment_size = min_t(size_t, J1939_MAX_TP_PACKET_SIZE, @@ -1177,7 +1177,7 @@ static int j1939_sk_send_loop(struct j1939_priv *priv, struct sock *sk, todo_size -= segment_size; session->total_queued_size += segment_size; - } + } while (todo_size); switch (ret) { case 0: /* OK */ diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index 95f7a7e65a73..fbf5c8001c9d 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -382,8 +382,9 @@ sk_buff *j1939_session_skb_get_by_offset(struct j1939_session *session, skb_queue_walk(&session->skb_queue, do_skb) { do_skcb = j1939_skb_to_cb(do_skb); - if (offset_start >= do_skcb->offset && - offset_start < (do_skcb->offset + do_skb->len)) { + if ((offset_start >= do_skcb->offset && + offset_start < (do_skcb->offset + do_skb->len)) || + (offset_start == 0 && do_skcb->offset == 0 && do_skb->len == 0)) { skb = do_skb; } } @@ -1510,12 +1511,8 @@ static struct j1939_session *j1939_session_new(struct j1939_priv *priv, skcb = j1939_skb_to_cb(skb); memcpy(&session->skcb, skcb, sizeof(session->skcb)); - hrtimer_init(&session->txtimer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL_SOFT); - session->txtimer.function = j1939_tp_txtimer; - hrtimer_init(&session->rxtimer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL_SOFT); - session->rxtimer.function = j1939_tp_rxtimer; + hrtimer_setup(&session->txtimer, j1939_tp_txtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); + hrtimer_setup(&session->rxtimer, j1939_tp_rxtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); netdev_dbg(priv->ndev, "%s: 0x%p: sa: %02x, da: %02x\n", __func__, session, skcb->addr.sa, skcb->addr.da); diff --git a/net/can/proc.c b/net/can/proc.c index bbce97825f13..25fdf060e30d 100644 --- a/net/can/proc.c +++ b/net/can/proc.c @@ -118,6 +118,13 @@ void can_stat_update(struct timer_list *t) struct can_pkg_stats *pkg_stats = net->can.pkg_stats; unsigned long j = jiffies; /* snapshot */ + long rx_frames = atomic_long_read(&pkg_stats->rx_frames); + long tx_frames = atomic_long_read(&pkg_stats->tx_frames); + long matches = atomic_long_read(&pkg_stats->matches); + long rx_frames_delta = atomic_long_read(&pkg_stats->rx_frames_delta); + long tx_frames_delta = atomic_long_read(&pkg_stats->tx_frames_delta); + long matches_delta = atomic_long_read(&pkg_stats->matches_delta); + /* restart counting in timer context on user request */ if (user_reset) can_init_stats(net); @@ -127,35 +134,33 @@ void can_stat_update(struct timer_list *t) can_init_stats(net); /* prevent overflow in calc_rate() */ - if (pkg_stats->rx_frames > (ULONG_MAX / HZ)) + if (rx_frames > (LONG_MAX / HZ)) can_init_stats(net); /* prevent overflow in calc_rate() */ - if (pkg_stats->tx_frames > (ULONG_MAX / HZ)) + if (tx_frames > (LONG_MAX / HZ)) can_init_stats(net); /* matches overflow - very improbable */ - if (pkg_stats->matches > (ULONG_MAX / 100)) + if (matches > (LONG_MAX / 100)) can_init_stats(net); /* calc total values */ - if (pkg_stats->rx_frames) - pkg_stats->total_rx_match_ratio = (pkg_stats->matches * 100) / - pkg_stats->rx_frames; + if (rx_frames) + pkg_stats->total_rx_match_ratio = (matches * 100) / rx_frames; pkg_stats->total_tx_rate = calc_rate(pkg_stats->jiffies_init, j, - pkg_stats->tx_frames); + tx_frames); pkg_stats->total_rx_rate = calc_rate(pkg_stats->jiffies_init, j, - pkg_stats->rx_frames); + rx_frames); /* calc current values */ - if (pkg_stats->rx_frames_delta) + if (rx_frames_delta) pkg_stats->current_rx_match_ratio = - (pkg_stats->matches_delta * 100) / - pkg_stats->rx_frames_delta; + (matches_delta * 100) / rx_frames_delta; - pkg_stats->current_tx_rate = calc_rate(0, HZ, pkg_stats->tx_frames_delta); - pkg_stats->current_rx_rate = calc_rate(0, HZ, pkg_stats->rx_frames_delta); + pkg_stats->current_tx_rate = calc_rate(0, HZ, tx_frames_delta); + pkg_stats->current_rx_rate = calc_rate(0, HZ, rx_frames_delta); /* check / update maximum values */ if (pkg_stats->max_tx_rate < pkg_stats->current_tx_rate) @@ -168,9 +173,9 @@ void can_stat_update(struct timer_list *t) pkg_stats->max_rx_match_ratio = pkg_stats->current_rx_match_ratio; /* clear values for 'current rate' calculation */ - pkg_stats->tx_frames_delta = 0; - pkg_stats->rx_frames_delta = 0; - pkg_stats->matches_delta = 0; + atomic_long_set(&pkg_stats->tx_frames_delta, 0); + atomic_long_set(&pkg_stats->rx_frames_delta, 0); + atomic_long_set(&pkg_stats->matches_delta, 0); /* restart timer (one second) */ mod_timer(&net->can.stattimer, round_jiffies(jiffies + HZ)); @@ -214,9 +219,12 @@ static int can_stats_proc_show(struct seq_file *m, void *v) struct can_rcv_lists_stats *rcv_lists_stats = net->can.rcv_lists_stats; seq_putc(m, '\n'); - seq_printf(m, " %8ld transmitted frames (TXF)\n", pkg_stats->tx_frames); - seq_printf(m, " %8ld received frames (RXF)\n", pkg_stats->rx_frames); - seq_printf(m, " %8ld matched frames (RXMF)\n", pkg_stats->matches); + seq_printf(m, " %8ld transmitted frames (TXF)\n", + atomic_long_read(&pkg_stats->tx_frames)); + seq_printf(m, " %8ld received frames (RXF)\n", + atomic_long_read(&pkg_stats->rx_frames)); + seq_printf(m, " %8ld matched frames (RXMF)\n", + atomic_long_read(&pkg_stats->matches)); seq_putc(m, '\n'); diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 7d41cde1bcca..2e538399757f 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -355,11 +355,6 @@ const struct bpf_func_proto bpf_sk_storage_delete_proto = { static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog) { - const struct btf *btf_vmlinux; - const struct btf_type *t; - const char *tname; - u32 btf_id; - if (prog->aux->dst_prog) return false; @@ -374,13 +369,7 @@ static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog) return true; case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: - btf_vmlinux = bpf_get_btf_vmlinux(); - if (IS_ERR_OR_NULL(btf_vmlinux)) - return false; - btf_id = prog->aux->attach_btf_id; - t = btf_type_by_id(btf_vmlinux, btf_id); - tname = btf_name_by_offset(btf_vmlinux, t->name_off); - return !!strncmp(tname, "bpf_sk_storage", + return !!strncmp(prog->aux->attach_func_name, "bpf_sk_storage", strlen("bpf_sk_storage")); default: return false; diff --git a/net/core/dev.c b/net/core/dev.c index c0021cbd28fc..901514e42d15 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1121,6 +1121,12 @@ out: return ret; } +static bool dev_addr_cmp(struct net_device *dev, unsigned short type, + const char *ha) +{ + return dev->type == type && !memcmp(dev->dev_addr, ha, dev->addr_len); +} + /** * dev_getbyhwaddr_rcu - find a device by its hardware address * @net: the applicable net namespace @@ -1129,7 +1135,7 @@ out: * * Search for an interface by MAC address. Returns NULL if the device * is not found or a pointer to the device. - * The caller must hold RCU or RTNL. + * The caller must hold RCU. * The returned device has not had its ref count increased * and the caller must therefore be careful about locking * @@ -1141,14 +1147,39 @@ struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type, struct net_device *dev; for_each_netdev_rcu(net, dev) - if (dev->type == type && - !memcmp(dev->dev_addr, ha, dev->addr_len)) + if (dev_addr_cmp(dev, type, ha)) return dev; return NULL; } EXPORT_SYMBOL(dev_getbyhwaddr_rcu); +/** + * dev_getbyhwaddr() - find a device by its hardware address + * @net: the applicable net namespace + * @type: media type of device + * @ha: hardware address + * + * Similar to dev_getbyhwaddr_rcu(), but the owner needs to hold + * rtnl_lock. + * + * Context: rtnl_lock() must be held. + * Return: pointer to the net_device, or NULL if not found + */ +struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, + const char *ha) +{ + struct net_device *dev; + + ASSERT_RTNL(); + for_each_netdev(net, dev) + if (dev_addr_cmp(dev, type, ha)) + return dev; + + return NULL; +} +EXPORT_SYMBOL(dev_getbyhwaddr); + struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) { struct net_device *dev, *ret = NULL; @@ -2070,20 +2101,55 @@ static void __move_netdevice_notifier_net(struct net *src_net, __register_netdevice_notifier_net(dst_net, nb, true); } +static void rtnl_net_dev_lock(struct net_device *dev) +{ + bool again; + + do { + struct net *net; + + again = false; + + /* netns might be being dismantled. */ + rcu_read_lock(); + net = dev_net_rcu(dev); + net_passive_inc(net); + rcu_read_unlock(); + + rtnl_net_lock(net); + +#ifdef CONFIG_NET_NS + /* dev might have been moved to another netns. */ + if (!net_eq(net, rcu_access_pointer(dev->nd_net.net))) { + rtnl_net_unlock(net); + net_passive_dec(net); + again = true; + } +#endif + } while (again); +} + +static void rtnl_net_dev_unlock(struct net_device *dev) +{ + struct net *net = dev_net(dev); + + rtnl_net_unlock(net); + net_passive_dec(net); +} + int register_netdevice_notifier_dev_net(struct net_device *dev, struct notifier_block *nb, struct netdev_net_notifier *nn) { - struct net *net = dev_net(dev); int err; - rtnl_net_lock(net); - err = __register_netdevice_notifier_net(net, nb, false); + rtnl_net_dev_lock(dev); + err = __register_netdevice_notifier_net(dev_net(dev), nb, false); if (!err) { nn->nb = nb; list_add(&nn->list, &dev->net_notifier_list); } - rtnl_net_unlock(net); + rtnl_net_dev_unlock(dev); return err; } @@ -2093,13 +2159,12 @@ int unregister_netdevice_notifier_dev_net(struct net_device *dev, struct notifier_block *nb, struct netdev_net_notifier *nn) { - struct net *net = dev_net(dev); int err; - rtnl_net_lock(net); + rtnl_net_dev_lock(dev); list_del(&nn->list); - err = __unregister_netdevice_notifier_net(net, nb); - rtnl_net_unlock(net); + err = __unregister_netdevice_notifier_net(dev_net(dev), nb); + rtnl_net_dev_unlock(dev); return err; } @@ -3807,6 +3872,9 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device { netdev_features_t features; + if (!skb_frags_readable(skb)) + goto out_kfree_skb; + features = netif_skb_features(skb); skb = validate_xmit_vlan(skb, features); if (unlikely(!skb)) @@ -4692,7 +4760,7 @@ use_local_napi: * we have to raise NET_RX_SOFTIRQ. */ if (!sd->in_net_rx_action) - __raise_softirq_irqoff(NET_RX_SOFTIRQ); + raise_softirq_irqoff(NET_RX_SOFTIRQ); } #ifdef CONFIG_RPS @@ -6920,6 +6988,23 @@ netif_napi_dev_list_add(struct net_device *dev, struct napi_struct *napi) list_add_rcu(&napi->dev_list, higher); /* adds after higher */ } +/* Double check that napi_get_frags() allocates skbs with + * skb->head being backed by slab, not a page fragment. + * This is to make sure bug fixed in 3226b158e67c + * ("net: avoid 32 x truesize under-estimation for tiny skbs") + * does not accidentally come back. + */ +static void napi_get_frags_check(struct napi_struct *napi) +{ + struct sk_buff *skb; + + local_bh_disable(); + skb = napi_get_frags(napi); + WARN_ON_ONCE(skb && skb->head_frag); + napi_free_frags(napi); + local_bh_enable(); +} + void netif_napi_add_weight_locked(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), @@ -6931,8 +7016,7 @@ void netif_napi_add_weight_locked(struct net_device *dev, INIT_LIST_HEAD(&napi->poll_list); INIT_HLIST_NODE(&napi->napi_hash_node); - hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); - napi->timer.function = napi_watchdog; + hrtimer_setup(&napi->timer, napi_watchdog, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); init_gro_hash(napi); napi->skb = NULL; INIT_LIST_HEAD(&napi->rx_list); @@ -11286,6 +11370,20 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, const struct net_device_ops *ops = dev->netdev_ops; const struct net_device_core_stats __percpu *p; + /* + * IPv{4,6} and udp tunnels share common stat helpers and use + * different stat type (NETDEV_PCPU_STAT_TSTATS vs + * NETDEV_PCPU_STAT_DSTATS). Ensure the accounting is consistent. + */ + BUILD_BUG_ON(offsetof(struct pcpu_sw_netstats, rx_bytes) != + offsetof(struct pcpu_dstats, rx_bytes)); + BUILD_BUG_ON(offsetof(struct pcpu_sw_netstats, rx_packets) != + offsetof(struct pcpu_dstats, rx_packets)); + BUILD_BUG_ON(offsetof(struct pcpu_sw_netstats, tx_bytes) != + offsetof(struct pcpu_dstats, tx_bytes)); + BUILD_BUG_ON(offsetof(struct pcpu_sw_netstats, tx_packets) != + offsetof(struct pcpu_dstats, tx_packets)); + if (ops->ndo_get_stats64) { memset(storage, 0, sizeof(*storage)); ops->ndo_get_stats64(dev, storage); @@ -11866,11 +11964,9 @@ EXPORT_SYMBOL(unregister_netdevice_many); */ void unregister_netdev(struct net_device *dev) { - struct net *net = dev_net(dev); - - rtnl_net_lock(net); + rtnl_net_dev_lock(dev); unregister_netdevice(dev); - rtnl_net_unlock(net); + rtnl_net_dev_unlock(dev); } EXPORT_SYMBOL(unregister_netdev); diff --git a/net/core/devmem.c b/net/core/devmem.c index 3bba3f018df0..0e5a2c672efd 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -109,6 +109,7 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding) struct netdev_rx_queue *rxq; unsigned long xa_idx; unsigned int rxq_idx; + int err; if (binding->list.next) list_del(&binding->list); @@ -120,7 +121,8 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding) rxq_idx = get_netdev_rx_queue_index(rxq); - WARN_ON(netdev_rx_queue_restart(binding->dev, rxq_idx)); + err = netdev_rx_queue_restart(binding->dev, rxq_idx); + WARN_ON(err && err != -ENETDOWN); } xa_erase(&net_devmem_dmabuf_bindings, binding->id); diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 6efd4cccc9dd..212f0a048cab 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -1734,30 +1734,30 @@ static int __init init_net_drop_monitor(void) return -ENOSPC; } - rc = genl_register_family(&net_drop_monitor_family); - if (rc) { - pr_err("Could not create drop monitor netlink family\n"); - return rc; + for_each_possible_cpu(cpu) { + net_dm_cpu_data_init(cpu); + net_dm_hw_cpu_data_init(cpu); } - WARN_ON(net_drop_monitor_family.mcgrp_offset != NET_DM_GRP_ALERT); rc = register_netdevice_notifier(&dropmon_net_notifier); if (rc < 0) { pr_crit("Failed to register netdevice notifier\n"); + return rc; + } + + rc = genl_register_family(&net_drop_monitor_family); + if (rc) { + pr_err("Could not create drop monitor netlink family\n"); goto out_unreg; } + WARN_ON(net_drop_monitor_family.mcgrp_offset != NET_DM_GRP_ALERT); rc = 0; - for_each_possible_cpu(cpu) { - net_dm_cpu_data_init(cpu); - net_dm_hw_cpu_data_init(cpu); - } - goto out; out_unreg: - genl_unregister_family(&net_drop_monitor_family); + WARN_ON(unregister_netdevice_notifier(&dropmon_net_notifier)); out: return rc; } @@ -1766,19 +1766,18 @@ static void exit_net_drop_monitor(void) { int cpu; - BUG_ON(unregister_netdevice_notifier(&dropmon_net_notifier)); - /* * Because of the module_get/put we do in the trace state change path * we are guaranteed not to have any current users when we get here */ + BUG_ON(genl_unregister_family(&net_drop_monitor_family)); + + BUG_ON(unregister_netdevice_notifier(&dropmon_net_notifier)); for_each_possible_cpu(cpu) { net_dm_hw_cpu_data_fini(cpu); net_dm_cpu_data_fini(cpu); } - - BUG_ON(genl_unregister_family(&net_drop_monitor_family)); } module_init(init_net_drop_monitor); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index e684ba3ebb38..94a7872ab231 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -37,8 +37,8 @@ static const struct fib_kuid_range fib_kuid_range_unset = { bool fib_rule_matchall(const struct fib_rule *rule) { - if (rule->iifindex || rule->oifindex || rule->mark || rule->tun_id || - rule->flags) + if (READ_ONCE(rule->iifindex) || READ_ONCE(rule->oifindex) || + rule->mark || rule->tun_id || rule->flags) return false; if (rule->suppress_ifgroup != -1 || rule->suppress_prefixlen != -1) return false; @@ -261,12 +261,14 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, struct flowi *fl, int flags, struct fib_lookup_arg *arg) { - int ret = 0; + int iifindex, oifindex, ret = 0; - if (rule->iifindex && (rule->iifindex != fl->flowi_iif)) + iifindex = READ_ONCE(rule->iifindex); + if (iifindex && (iifindex != fl->flowi_iif)) goto out; - if (rule->oifindex && (rule->oifindex != fl->flowi_oif)) + oifindex = READ_ONCE(rule->oifindex); + if (oifindex && (oifindex != fl->flowi_oif)) goto out; if ((rule->mark ^ fl->flowi_mark) & rule->mark_mask) @@ -1041,14 +1043,14 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, if (rule->iifname[0]) { if (nla_put_string(skb, FRA_IIFNAME, rule->iifname)) goto nla_put_failure; - if (rule->iifindex == -1) + if (READ_ONCE(rule->iifindex) == -1) frh->flags |= FIB_RULE_IIF_DETACHED; } if (rule->oifname[0]) { if (nla_put_string(skb, FRA_OIFNAME, rule->oifname)) goto nla_put_failure; - if (rule->oifindex == -1) + if (READ_ONCE(rule->oifindex) == -1) frh->flags |= FIB_RULE_OIF_DETACHED; } @@ -1220,10 +1222,10 @@ static void attach_rules(struct list_head *rules, struct net_device *dev) list_for_each_entry(rule, rules, list) { if (rule->iifindex == -1 && strcmp(dev->name, rule->iifname) == 0) - rule->iifindex = dev->ifindex; + WRITE_ONCE(rule->iifindex, dev->ifindex); if (rule->oifindex == -1 && strcmp(dev->name, rule->oifname) == 0) - rule->oifindex = dev->ifindex; + WRITE_ONCE(rule->oifindex, dev->ifindex); } } @@ -1233,9 +1235,9 @@ static void detach_rules(struct list_head *rules, struct net_device *dev) list_for_each_entry(rule, rules, list) { if (rule->iifindex == dev->ifindex) - rule->iifindex = -1; + WRITE_ONCE(rule->iifindex, -1); if (rule->oifindex == dev->ifindex) - rule->oifindex = -1; + WRITE_ONCE(rule->oifindex, -1); } } diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 0e638a37aa09..9cd8de6bebb5 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -853,23 +853,30 @@ __skb_flow_dissect_ports(const struct sk_buff *skb, void *target_container, const void *data, int nhoff, u8 ip_proto, int hlen) { - enum flow_dissector_key_id dissector_ports = FLOW_DISSECTOR_KEY_MAX; - struct flow_dissector_key_ports *key_ports; + struct flow_dissector_key_ports_range *key_ports_range = NULL; + struct flow_dissector_key_ports *key_ports = NULL; + __be32 ports; if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) - dissector_ports = FLOW_DISSECTOR_KEY_PORTS; - else if (dissector_uses_key(flow_dissector, - FLOW_DISSECTOR_KEY_PORTS_RANGE)) - dissector_ports = FLOW_DISSECTOR_KEY_PORTS_RANGE; + key_ports = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_PORTS, + target_container); + + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS_RANGE)) + key_ports_range = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_PORTS_RANGE, + target_container); - if (dissector_ports == FLOW_DISSECTOR_KEY_MAX) + if (!key_ports && !key_ports_range) return; - key_ports = skb_flow_dissector_target(flow_dissector, - dissector_ports, - target_container); - key_ports->ports = __skb_flow_get_ports(skb, nhoff, ip_proto, - data, hlen); + ports = __skb_flow_get_ports(skb, nhoff, ip_proto, data, hlen); + + if (key_ports) + key_ports->ports = ports; + + if (key_ports_range) + key_ports_range->tp.ports = ports; } static void @@ -924,6 +931,7 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys, struct flow_dissector *flow_dissector, void *target_container) { + struct flow_dissector_key_ports_range *key_ports_range = NULL; struct flow_dissector_key_ports *key_ports = NULL; struct flow_dissector_key_control *key_control; struct flow_dissector_key_basic *key_basic; @@ -968,20 +976,21 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys, key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; } - if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) { key_ports = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_PORTS, target_container); - else if (dissector_uses_key(flow_dissector, - FLOW_DISSECTOR_KEY_PORTS_RANGE)) - key_ports = skb_flow_dissector_target(flow_dissector, - FLOW_DISSECTOR_KEY_PORTS_RANGE, - target_container); - - if (key_ports) { key_ports->src = flow_keys->sport; key_ports->dst = flow_keys->dport; } + if (dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_PORTS_RANGE)) { + key_ports_range = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_PORTS_RANGE, + target_container); + key_ports_range->tp.src = flow_keys->sport; + key_ports_range->tp.dst = flow_keys->dport; + } if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_FLOW_LABEL)) { @@ -1108,10 +1117,12 @@ bool __skb_flow_dissect(const struct net *net, FLOW_DISSECTOR_KEY_BASIC, target_container); + rcu_read_lock(); + if (skb) { if (!net) { if (skb->dev) - net = dev_net(skb->dev); + net = dev_net_rcu(skb->dev); else if (skb->sk) net = sock_net(skb->sk); } @@ -1122,7 +1133,6 @@ bool __skb_flow_dissect(const struct net *net, enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR; struct bpf_prog_array *run_array; - rcu_read_lock(); run_array = rcu_dereference(init_net.bpf.run_array[type]); if (!run_array) run_array = rcu_dereference(net->bpf.run_array[type]); @@ -1150,17 +1160,17 @@ bool __skb_flow_dissect(const struct net *net, prog = READ_ONCE(run_array->items[0].prog); result = bpf_flow_dissect(prog, &ctx, n_proto, nhoff, hlen, flags); - if (result == BPF_FLOW_DISSECTOR_CONTINUE) - goto dissect_continue; - __skb_flow_bpf_to_target(&flow_keys, flow_dissector, - target_container); - rcu_read_unlock(); - return result == BPF_OK; + if (result != BPF_FLOW_DISSECTOR_CONTINUE) { + __skb_flow_bpf_to_target(&flow_keys, flow_dissector, + target_container); + rcu_read_unlock(); + return result == BPF_OK; + } } -dissect_continue: - rcu_read_unlock(); } + rcu_read_unlock(); + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { struct ethhdr *eth = eth_hdr(skb); diff --git a/net/core/gro.c b/net/core/gro.c index d1f44084e978..0ad549b07e03 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -7,9 +7,6 @@ #define MAX_GRO_SKBS 8 -/* This should be increased if a protocol with a bigger head is added. */ -#define GRO_MAX_HEAD (MAX_HEADER + 128) - static DEFINE_SPINLOCK(offload_lock); /** @@ -656,6 +653,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) skb->pkt_type = PACKET_HOST; skb->encapsulation = 0; + skb->ip_summed = CHECKSUM_NONE; skb_shinfo(skb)->gso_type = 0; skb_shinfo(skb)->gso_size = 0; if (unlikely(skb->slow_gro)) { diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index 711cd3b4347a..4417a18b3e95 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -23,6 +23,8 @@ #include <net/ip6_fib.h> #include <net/rtnh.h> +#include "dev.h" + DEFINE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled); EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_enabled); @@ -325,13 +327,23 @@ EXPORT_SYMBOL_GPL(lwtunnel_cmp_encap); int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct dst_entry *dst = skb_dst(skb); const struct lwtunnel_encap_ops *ops; struct lwtunnel_state *lwtstate; - int ret = -EINVAL; + struct dst_entry *dst; + int ret; + + if (dev_xmit_recursion()) { + net_crit_ratelimited("%s(): recursion limit reached on datapath\n", + __func__); + ret = -ENETDOWN; + goto drop; + } - if (!dst) + dst = skb_dst(skb); + if (!dst) { + ret = -EINVAL; goto drop; + } lwtstate = dst->lwtstate; if (lwtstate->type == LWTUNNEL_ENCAP_NONE || @@ -341,8 +353,11 @@ int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb) ret = -EOPNOTSUPP; rcu_read_lock(); ops = rcu_dereference(lwtun_encaps[lwtstate->type]); - if (likely(ops && ops->output)) + if (likely(ops && ops->output)) { + dev_xmit_recursion_inc(); ret = ops->output(net, sk, skb); + dev_xmit_recursion_dec(); + } rcu_read_unlock(); if (ret == -EOPNOTSUPP) @@ -359,13 +374,23 @@ EXPORT_SYMBOL_GPL(lwtunnel_output); int lwtunnel_xmit(struct sk_buff *skb) { - struct dst_entry *dst = skb_dst(skb); const struct lwtunnel_encap_ops *ops; struct lwtunnel_state *lwtstate; - int ret = -EINVAL; + struct dst_entry *dst; + int ret; + + if (dev_xmit_recursion()) { + net_crit_ratelimited("%s(): recursion limit reached on datapath\n", + __func__); + ret = -ENETDOWN; + goto drop; + } - if (!dst) + dst = skb_dst(skb); + if (!dst) { + ret = -EINVAL; goto drop; + } lwtstate = dst->lwtstate; @@ -376,8 +401,11 @@ int lwtunnel_xmit(struct sk_buff *skb) ret = -EOPNOTSUPP; rcu_read_lock(); ops = rcu_dereference(lwtun_encaps[lwtstate->type]); - if (likely(ops && ops->xmit)) + if (likely(ops && ops->xmit)) { + dev_xmit_recursion_inc(); ret = ops->xmit(skb); + dev_xmit_recursion_dec(); + } rcu_read_unlock(); if (ret == -EOPNOTSUPP) @@ -394,13 +422,23 @@ EXPORT_SYMBOL_GPL(lwtunnel_xmit); int lwtunnel_input(struct sk_buff *skb) { - struct dst_entry *dst = skb_dst(skb); const struct lwtunnel_encap_ops *ops; struct lwtunnel_state *lwtstate; - int ret = -EINVAL; + struct dst_entry *dst; + int ret; - if (!dst) + if (dev_xmit_recursion()) { + net_crit_ratelimited("%s(): recursion limit reached on datapath\n", + __func__); + ret = -ENETDOWN; goto drop; + } + + dst = skb_dst(skb); + if (!dst) { + ret = -EINVAL; + goto drop; + } lwtstate = dst->lwtstate; if (lwtstate->type == LWTUNNEL_ENCAP_NONE || @@ -410,8 +448,11 @@ int lwtunnel_input(struct sk_buff *skb) ret = -EOPNOTSUPP; rcu_read_lock(); ops = rcu_dereference(lwtun_encaps[lwtstate->type]); - if (likely(ops && ops->input)) + if (likely(ops && ops->input)) { + dev_xmit_recursion_inc(); ret = ops->input(skb); + dev_xmit_recursion_dec(); + } rcu_read_unlock(); if (ret == -EOPNOTSUPP) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 89656d180bc6..1a620f903c56 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2250,6 +2250,7 @@ static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = { static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = { [NDTPA_IFINDEX] = { .type = NLA_U32 }, [NDTPA_QUEUE_LEN] = { .type = NLA_U32 }, + [NDTPA_QUEUE_LENBYTES] = { .type = NLA_U32 }, [NDTPA_PROXY_QLEN] = { .type = NLA_U32 }, [NDTPA_APP_PROBES] = { .type = NLA_U32 }, [NDTPA_UCAST_PROBES] = { .type = NLA_U32 }, @@ -3447,10 +3448,12 @@ static const struct seq_operations neigh_stat_seq_ops = { static void __neigh_notify(struct neighbour *n, int type, int flags, u32 pid) { - struct net *net = dev_net(n->dev); struct sk_buff *skb; int err = -ENOBUFS; + struct net *net; + rcu_read_lock(); + net = dev_net_rcu(n->dev); skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC); if (skb == NULL) goto errout; @@ -3463,9 +3466,11 @@ static void __neigh_notify(struct neighbour *n, int type, int flags, goto errout; } rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); - return; + goto out; errout: rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); +out: + rcu_read_unlock(); } void neigh_app_ns(struct neighbour *n) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index cb39a12b2f82..4303f2a49262 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -464,7 +464,7 @@ static void net_complete_free(void) } -static void net_free(struct net *net) +void net_passive_dec(struct net *net) { if (refcount_dec_and_test(&net->passive)) { kfree(rcu_access_pointer(net->gen)); @@ -482,7 +482,7 @@ void net_drop_ns(void *p) struct net *net = (struct net *)p; if (net) - net_free(net); + net_passive_dec(net); } struct net *copy_net_ns(unsigned long flags, @@ -523,7 +523,7 @@ put_userns: key_remove_domain(net->key_domain); #endif put_user_ns(user_ns); - net_free(net); + net_passive_dec(net); dec_ucounts: dec_net_namespaces(ucounts); return ERR_PTR(rv); @@ -672,7 +672,7 @@ static void cleanup_net(struct work_struct *work) key_remove_domain(net->key_domain); #endif put_user_ns(net->user_ns); - net_free(net); + net_passive_dec(net); } cleanup_net_task = NULL; } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 62b4041aae1a..0ab722d95a2d 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -319,6 +319,7 @@ static int netpoll_owner_active(struct net_device *dev) static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) { netdev_tx_t status = NETDEV_TX_BUSY; + netdev_tx_t ret = NET_XMIT_DROP; struct net_device *dev; unsigned long tries; /* It is up to the caller to keep npinfo alive. */ @@ -327,11 +328,12 @@ static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) lockdep_assert_irqs_disabled(); dev = np->dev; + rcu_read_lock(); npinfo = rcu_dereference_bh(dev->npinfo); if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) { dev_kfree_skb_irq(skb); - return NET_XMIT_DROP; + goto out; } /* don't get messages out of order, and no recursion */ @@ -370,7 +372,10 @@ static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) skb_queue_tail(&npinfo->txq, skb); schedule_delayed_work(&npinfo->tx_work,0); } - return NETDEV_TX_OK; + ret = NETDEV_TX_OK; +out: + rcu_read_unlock(); + return ret; } netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 1f4d4b5570ab..d1e559fce918 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3432,6 +3432,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, err = -ENODEV; rtnl_nets_unlock(&rtnl_nets); + rtnl_nets_destroy(&rtnl_nets); errout: return err; } diff --git a/net/core/scm.c b/net/core/scm.c index 4f6a14babe5a..733c0cbd393d 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -282,6 +282,16 @@ efault: } EXPORT_SYMBOL(put_cmsg); +int put_cmsg_notrunc(struct msghdr *msg, int level, int type, int len, + void *data) +{ + /* Don't produce truncated CMSGs */ + if (!msg->msg_control || msg->msg_controllen < CMSG_LEN(len)) + return -ETOOSMALL; + + return put_cmsg(msg, level, type, len, data); +} + void put_cmsg_scm_timestamping64(struct msghdr *msg, struct scm_timestamping_internal *tss_internal) { struct scm_timestamping64 tss; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index a441613a1e6c..b1c81687e9d8 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -69,6 +69,7 @@ #include <net/dst.h> #include <net/sock.h> #include <net/checksum.h> +#include <net/gro.h> #include <net/gso.h> #include <net/hotdata.h> #include <net/ip6_checksum.h> @@ -95,7 +96,9 @@ static struct kmem_cache *skbuff_ext_cache __ro_after_init; #endif -#define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(MAX_TCP_HEADER) +#define GRO_MAX_HEAD_PAD (GRO_MAX_HEAD + NET_SKB_PAD + NET_IP_ALIGN) +#define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(max(MAX_TCP_HEADER, \ + GRO_MAX_HEAD_PAD)) /* We want SKB_SMALL_HEAD_CACHE_SIZE to not be a power of two. * This should ensure that SKB_SMALL_HEAD_HEADROOM is a unique @@ -220,67 +223,9 @@ static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr) #define NAPI_SKB_CACHE_BULK 16 #define NAPI_SKB_CACHE_HALF (NAPI_SKB_CACHE_SIZE / 2) -#if PAGE_SIZE == SZ_4K - -#define NAPI_HAS_SMALL_PAGE_FRAG 1 -#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) ((nc).pfmemalloc) - -/* specialized page frag allocator using a single order 0 page - * and slicing it into 1K sized fragment. Constrained to systems - * with a very limited amount of 1K fragments fitting a single - * page - to avoid excessive truesize underestimation - */ - -struct page_frag_1k { - void *va; - u16 offset; - bool pfmemalloc; -}; - -static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp) -{ - struct page *page; - int offset; - - offset = nc->offset - SZ_1K; - if (likely(offset >= 0)) - goto use_frag; - - page = alloc_pages_node(NUMA_NO_NODE, gfp, 0); - if (!page) - return NULL; - - nc->va = page_address(page); - nc->pfmemalloc = page_is_pfmemalloc(page); - offset = PAGE_SIZE - SZ_1K; - page_ref_add(page, offset / SZ_1K); - -use_frag: - nc->offset = offset; - return nc->va + offset; -} -#else - -/* the small page is actually unused in this build; add dummy helpers - * to please the compiler and avoid later preprocessor's conditionals - */ -#define NAPI_HAS_SMALL_PAGE_FRAG 0 -#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) false - -struct page_frag_1k { -}; - -static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp_mask) -{ - return NULL; -} - -#endif - struct napi_alloc_cache { local_lock_t bh_lock; struct page_frag_cache page; - struct page_frag_1k page_small; unsigned int skb_count; void *skb_cache[NAPI_SKB_CACHE_SIZE]; }; @@ -290,23 +235,6 @@ static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache) = { .bh_lock = INIT_LOCAL_LOCK(bh_lock), }; -/* Double check that napi_get_frags() allocates skbs with - * skb->head being backed by slab, not a page fragment. - * This is to make sure bug fixed in 3226b158e67c - * ("net: avoid 32 x truesize under-estimation for tiny skbs") - * does not accidentally come back. - */ -void napi_get_frags_check(struct napi_struct *napi) -{ - struct sk_buff *skb; - - local_bh_disable(); - skb = napi_get_frags(napi); - WARN_ON_ONCE(!NAPI_HAS_SMALL_PAGE_FRAG && skb && skb->head_frag); - napi_free_frags(napi); - local_bh_enable(); -} - void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) { struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); @@ -736,7 +664,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, /* If requested length is either too small or too big, * we use kmalloc() for skb->head allocation. */ - if (len <= SKB_WITH_OVERHEAD(1024) || + if (len <= SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE) || len > SKB_WITH_OVERHEAD(PAGE_SIZE) || (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) { skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE); @@ -813,10 +741,8 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len) /* If requested length is either too small or too big, * we use kmalloc() for skb->head allocation. - * When the small frag allocator is available, prefer it over kmalloc - * for small fragments */ - if ((!NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) || + if (len <= SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE) || len > SKB_WITH_OVERHEAD(PAGE_SIZE) || (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) { skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX | SKB_ALLOC_NAPI, @@ -826,32 +752,16 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len) goto skb_success; } + len = SKB_HEAD_ALIGN(len); + if (sk_memalloc_socks()) gfp_mask |= __GFP_MEMALLOC; local_lock_nested_bh(&napi_alloc_cache.bh_lock); nc = this_cpu_ptr(&napi_alloc_cache); - if (NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) { - /* we are artificially inflating the allocation size, but - * that is not as bad as it may look like, as: - * - 'len' less than GRO_MAX_HEAD makes little sense - * - On most systems, larger 'len' values lead to fragment - * size above 512 bytes - * - kmalloc would use the kmalloc-1k slab for such values - * - Builds with smaller GRO_MAX_HEAD will very likely do - * little networking, as that implies no WiFi and no - * tunnels support, and 32 bits arches. - */ - len = SZ_1K; - data = page_frag_alloc_1k(&nc->page_small, gfp_mask); - pfmemalloc = NAPI_SMALL_PAGE_PFMEMALLOC(nc->page_small); - } else { - len = SKB_HEAD_ALIGN(len); - - data = page_frag_alloc(&nc->page, len, gfp_mask); - pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page); - } + data = page_frag_alloc(&nc->page, len, gfp_mask); + pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page); local_unlock_nested_bh(&napi_alloc_cache.bh_lock); if (unlikely(!data)) @@ -6123,11 +6033,11 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) skb->offload_fwd_mark = 0; skb->offload_l3_fwd_mark = 0; #endif + ipvs_reset(skb); if (!xnet) return; - ipvs_reset(skb); skb->mark = 0; skb_clear_tstamp(skb); } diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 61f3f3d4e528..0ddc4c718833 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -549,6 +549,9 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb, return num_sge; } +#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) + psock->ingress_bytes += len; +#endif copied = len; msg->sg.start = 0; msg->sg.size = copied; @@ -1144,6 +1147,10 @@ int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock) if (!ret) sk_psock_set_state(psock, SK_PSOCK_RX_STRP_ENABLED); + if (sk_is_tcp(sk)) { + psock->strp.cb.read_sock = tcp_bpf_strp_read_sock; + psock->copied_seq = tcp_sk(sk)->copied_seq; + } return ret; } diff --git a/net/core/sock.c b/net/core/sock.c index eae2ae70a2e0..6c0e87f97fa4 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2246,6 +2246,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, get_net_track(net, &sk->ns_tracker, priority); sock_inuse_add(net, 1); } else { + net_passive_inc(net); __netns_tracker_alloc(net, &sk->ns_tracker, false, priority); } @@ -2270,6 +2271,7 @@ EXPORT_SYMBOL(sk_alloc); static void __sk_destruct(struct rcu_head *head) { struct sock *sk = container_of(head, struct sock, sk_rcu); + struct net *net = sock_net(sk); struct sk_filter *filter; if (sk->sk_destruct) @@ -2301,14 +2303,28 @@ static void __sk_destruct(struct rcu_head *head) put_cred(sk->sk_peer_cred); put_pid(sk->sk_peer_pid); - if (likely(sk->sk_net_refcnt)) - put_net_track(sock_net(sk), &sk->ns_tracker); - else - __netns_tracker_free(sock_net(sk), &sk->ns_tracker, false); - + if (likely(sk->sk_net_refcnt)) { + put_net_track(net, &sk->ns_tracker); + } else { + __netns_tracker_free(net, &sk->ns_tracker, false); + net_passive_dec(net); + } sk_prot_free(sk->sk_prot_creator, sk); } +void sk_net_refcnt_upgrade(struct sock *sk) +{ + struct net *net = sock_net(sk); + + WARN_ON_ONCE(sk->sk_net_refcnt); + __netns_tracker_free(net, &sk->ns_tracker, false); + net_passive_dec(net); + sk->sk_net_refcnt = 1; + get_net_track(net, &sk->ns_tracker, GFP_KERNEL); + sock_inuse_add(net, 1); +} +EXPORT_SYMBOL_GPL(sk_net_refcnt_upgrade); + void sk_destruct(struct sock *sk) { bool use_call_rcu = sock_flag(sk, SOCK_RCU_FREE); @@ -2405,6 +2421,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) * is not properly dismantling its kernel sockets at netns * destroy time. */ + net_passive_inc(sock_net(newsk)); __netns_tracker_alloc(sock_net(newsk), &newsk->ns_tracker, false, priority); } diff --git a/net/core/sock_map.c b/net/core/sock_map.c index f1b9b3958792..82a14f131d00 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -303,7 +303,10 @@ static int sock_map_link(struct bpf_map *map, struct sock *sk) write_lock_bh(&sk->sk_callback_lock); if (stream_parser && stream_verdict && !psock->saved_data_ready) { - ret = sk_psock_init_strp(sk, psock); + if (sk_is_tcp(sk)) + ret = sk_psock_init_strp(sk, psock); + else + ret = -EOPNOTSUPP; if (ret) { write_unlock_bh(&sk->sk_callback_lock); sk_psock_put(sk, psock); @@ -541,6 +544,9 @@ static bool sock_map_sk_state_allowed(const struct sock *sk) return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_LISTEN); if (sk_is_stream_unix(sk)) return (1 << sk->sk_state) & TCPF_ESTABLISHED; + if (sk_is_vsock(sk) && + (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) + return (1 << sk->sk_state) & TCPF_ESTABLISHED; return true; } diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index ad2741f1346a..c7769ee0d9c5 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -34,6 +34,7 @@ static int min_sndbuf = SOCK_MIN_SNDBUF; static int min_rcvbuf = SOCK_MIN_RCVBUF; static int max_skb_frags = MAX_SKB_FRAGS; static int min_mem_pcpu_rsv = SK_MEMORY_PCPU_RESERVE; +static int netdev_budget_usecs_min = 2 * USEC_PER_SEC / HZ; static int net_msg_warn; /* Unused, but still a sysctl */ @@ -587,7 +588,7 @@ static struct ctl_table net_core_table[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, + .extra1 = &netdev_budget_usecs_min, }, { .procname = "fb_tunnels_only_for_init_net", diff --git a/net/devlink/core.c b/net/devlink/core.c index f49cd83f1955..7203c39532fc 100644 --- a/net/devlink/core.c +++ b/net/devlink/core.c @@ -117,7 +117,7 @@ static struct devlink_rel *devlink_rel_alloc(void) err = xa_alloc_cyclic(&devlink_rels, &rel->index, rel, xa_limit_32b, &next, GFP_KERNEL); - if (err) { + if (err < 0) { kfree(rel); return ERR_PTR(err); } diff --git a/net/ethtool/cabletest.c b/net/ethtool/cabletest.c index f22051f33868..84096f6b0236 100644 --- a/net/ethtool/cabletest.c +++ b/net/ethtool/cabletest.c @@ -72,8 +72,8 @@ int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info) dev = req_info.dev; rtnl_lock(); - phydev = ethnl_req_get_phydev(&req_info, - tb[ETHTOOL_A_CABLE_TEST_HEADER], + phydev = ethnl_req_get_phydev(&req_info, tb, + ETHTOOL_A_CABLE_TEST_HEADER, info->extack); if (IS_ERR_OR_NULL(phydev)) { ret = -EOPNOTSUPP; @@ -339,8 +339,8 @@ int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info) goto out_dev_put; rtnl_lock(); - phydev = ethnl_req_get_phydev(&req_info, - tb[ETHTOOL_A_CABLE_TEST_TDR_HEADER], + phydev = ethnl_req_get_phydev(&req_info, tb, + ETHTOOL_A_CABLE_TEST_TDR_HEADER, info->extack); if (IS_ERR_OR_NULL(phydev)) { ret = -EOPNOTSUPP; diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 2bd77c94f9f1..b97374b508f6 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -6,6 +6,7 @@ #include <linux/rtnetlink.h> #include <linux/ptp_clock_kernel.h> #include <linux/phy_link_topology.h> +#include <net/netdev_queues.h> #include "netlink.h" #include "common.h" @@ -462,6 +463,11 @@ const char ts_rx_filter_names[][ETH_GSTRING_LEN] = { }; static_assert(ARRAY_SIZE(ts_rx_filter_names) == __HWTSTAMP_FILTER_CNT); +const char ts_flags_names[][ETH_GSTRING_LEN] = { + [const_ilog2(HWTSTAMP_FLAG_BONDED_PHC_INDEX)] = "bonded-phc-index", +}; +static_assert(ARRAY_SIZE(ts_flags_names) == __HWTSTAMP_FLAG_CNT); + const char udp_tunnel_type_names[][ETH_GSTRING_LEN] = { [ETHTOOL_UDP_TUNNEL_TYPE_VXLAN] = "vxlan", [ETHTOOL_UDP_TUNNEL_TYPE_GENEVE] = "geneve", @@ -766,6 +772,21 @@ int ethtool_check_ops(const struct ethtool_ops *ops) return 0; } +void ethtool_ringparam_get_cfg(struct net_device *dev, + struct ethtool_ringparam *param, + struct kernel_ethtool_ringparam *kparam, + struct netlink_ext_ack *extack) +{ + memset(param, 0, sizeof(*param)); + memset(kparam, 0, sizeof(*kparam)); + + param->cmd = ETHTOOL_GRINGPARAM; + dev->ethtool_ops->get_ringparam(dev, param, kparam, extack); + + /* Driver gives us current state, we want to return current config */ + kparam->tcp_data_split = dev->cfg->hds_config; +} + static void ethtool_init_tsinfo(struct kernel_ethtool_ts_info *info) { memset(info, 0, sizeof(*info)); diff --git a/net/ethtool/common.h b/net/ethtool/common.h index 850eadde4bfc..a1088c2441d0 100644 --- a/net/ethtool/common.h +++ b/net/ethtool/common.h @@ -13,6 +13,7 @@ ETHTOOL_LINK_MODE_ ## speed ## base ## type ## _ ## duplex ## _BIT #define __SOF_TIMESTAMPING_CNT (const_ilog2(SOF_TIMESTAMPING_LAST) + 1) +#define __HWTSTAMP_FLAG_CNT (const_ilog2(HWTSTAMP_FLAG_LAST) + 1) struct link_mode_info { int speed; @@ -38,6 +39,7 @@ extern const char wol_mode_names[][ETH_GSTRING_LEN]; extern const char sof_timestamping_names[][ETH_GSTRING_LEN]; extern const char ts_tx_type_names[][ETH_GSTRING_LEN]; extern const char ts_rx_filter_names[][ETH_GSTRING_LEN]; +extern const char ts_flags_names[][ETH_GSTRING_LEN]; extern const char udp_tunnel_type_names[][ETH_GSTRING_LEN]; int __ethtool_get_link(struct net_device *dev); @@ -49,6 +51,12 @@ int ethtool_check_max_channel(struct net_device *dev, struct ethtool_channels channels, struct genl_info *info); int ethtool_check_rss_ctx_busy(struct net_device *dev, u32 rss_context); + +void ethtool_ringparam_get_cfg(struct net_device *dev, + struct ethtool_ringparam *param, + struct kernel_ethtool_ringparam *kparam, + struct netlink_ext_ack *extack); + int __ethtool_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info); int ethtool_get_ts_info_by_phc(struct net_device *dev, struct kernel_ethtool_ts_info *info, diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 34bee42e1247..1c3ba2247776 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -993,7 +993,7 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, return rc; /* Nonzero ring with RSS only makes sense if NIC adds them together */ - if (cmd == ETHTOOL_SRXCLSRLINS && info.flow_type & FLOW_RSS && + if (cmd == ETHTOOL_SRXCLSRLINS && info.fs.flow_type & FLOW_RSS && !ops->cap_rss_rxnfc_adds && ethtool_get_flow_spec_ring(info.fs.ring_cookie)) return -EINVAL; @@ -2059,8 +2059,8 @@ static int ethtool_get_ringparam(struct net_device *dev, void __user *useraddr) static int ethtool_set_ringparam(struct net_device *dev, void __user *useraddr) { - struct ethtool_ringparam ringparam, max = { .cmd = ETHTOOL_GRINGPARAM }; struct kernel_ethtool_ringparam kernel_ringparam; + struct ethtool_ringparam ringparam, max; int ret; if (!dev->ethtool_ops->set_ringparam || !dev->ethtool_ops->get_ringparam) @@ -2069,7 +2069,7 @@ static int ethtool_set_ringparam(struct net_device *dev, void __user *useraddr) if (copy_from_user(&ringparam, useraddr, sizeof(ringparam))) return -EFAULT; - dev->ethtool_ops->get_ringparam(dev, &max, &kernel_ringparam, NULL); + ethtool_ringparam_get_cfg(dev, &max, &kernel_ringparam, NULL); /* ensure new ring parameters are within the maximums */ if (ringparam.rx_pending > max.rx_max_pending || diff --git a/net/ethtool/linkstate.c b/net/ethtool/linkstate.c index af19e1bed303..05a5f72c99fa 100644 --- a/net/ethtool/linkstate.c +++ b/net/ethtool/linkstate.c @@ -103,7 +103,7 @@ static int linkstate_prepare_data(const struct ethnl_req_info *req_base, struct phy_device *phydev; int ret; - phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_LINKSTATE_HEADER], + phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_LINKSTATE_HEADER, info->extack); if (IS_ERR(phydev)) { ret = PTR_ERR(phydev); diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index b4c45207fa32..734849a57369 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -211,7 +211,7 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info, } struct phy_device *ethnl_req_get_phydev(const struct ethnl_req_info *req_info, - const struct nlattr *header, + struct nlattr **tb, unsigned int header, struct netlink_ext_ack *extack) { struct phy_device *phydev; @@ -225,8 +225,8 @@ struct phy_device *ethnl_req_get_phydev(const struct ethnl_req_info *req_info, return req_info->dev->phydev; phydev = phy_link_topo_get_phy(req_info->dev, req_info->phy_index); - if (!phydev) { - NL_SET_ERR_MSG_ATTR(extack, header, + if (!phydev && tb) { + NL_SET_ERR_MSG_ATTR(extack, tb[header], "no phy matching phyindex"); return ERR_PTR(-ENODEV); } diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index ff69ca0715de..ec6ab5443a6f 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -275,7 +275,8 @@ static inline void ethnl_parse_header_dev_put(struct ethnl_req_info *req_info) * ethnl_req_get_phydev() - Gets the phy_device targeted by this request, * if any. Must be called under rntl_lock(). * @req_info: The ethnl request to get the phy from. - * @header: The netlink header, used for error reporting. + * @tb: The netlink attributes array, for error reporting. + * @header: The netlink header index, used for error reporting. * @extack: The netlink extended ACK, for error reporting. * * The caller must hold RTNL, until it's done interacting with the returned @@ -289,7 +290,7 @@ static inline void ethnl_parse_header_dev_put(struct ethnl_req_info *req_info) * is returned. */ struct phy_device *ethnl_req_get_phydev(const struct ethnl_req_info *req_info, - const struct nlattr *header, + struct nlattr **tb, unsigned int header, struct netlink_ext_ack *extack); /** diff --git a/net/ethtool/phy.c b/net/ethtool/phy.c index ed8f690f6bac..e067cc234419 100644 --- a/net/ethtool/phy.c +++ b/net/ethtool/phy.c @@ -125,7 +125,7 @@ static int ethnl_phy_parse_request(struct ethnl_req_info *req_base, struct phy_req_info *req_info = PHY_REQINFO(req_base); struct phy_device *phydev; - phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_PHY_HEADER], + phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_PHY_HEADER, extack); if (!phydev) return 0; diff --git a/net/ethtool/plca.c b/net/ethtool/plca.c index d95d92f173a6..e1f7820a6158 100644 --- a/net/ethtool/plca.c +++ b/net/ethtool/plca.c @@ -62,7 +62,7 @@ static int plca_get_cfg_prepare_data(const struct ethnl_req_info *req_base, struct phy_device *phydev; int ret; - phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_PLCA_HEADER], + phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_PLCA_HEADER, info->extack); // check that the PHY device is available and connected if (IS_ERR_OR_NULL(phydev)) { @@ -152,7 +152,7 @@ ethnl_set_plca(struct ethnl_req_info *req_info, struct genl_info *info) bool mod = false; int ret; - phydev = ethnl_req_get_phydev(req_info, tb[ETHTOOL_A_PLCA_HEADER], + phydev = ethnl_req_get_phydev(req_info, tb, ETHTOOL_A_PLCA_HEADER, info->extack); // check that the PHY device is available and connected if (IS_ERR_OR_NULL(phydev)) @@ -211,7 +211,7 @@ static int plca_get_status_prepare_data(const struct ethnl_req_info *req_base, struct phy_device *phydev; int ret; - phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_PLCA_HEADER], + phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_PLCA_HEADER, info->extack); // check that the PHY device is available and connected if (IS_ERR_OR_NULL(phydev)) { diff --git a/net/ethtool/pse-pd.c b/net/ethtool/pse-pd.c index 2819e2ba6be2..4f6b99eab2a6 100644 --- a/net/ethtool/pse-pd.c +++ b/net/ethtool/pse-pd.c @@ -64,7 +64,7 @@ static int pse_prepare_data(const struct ethnl_req_info *req_base, if (ret < 0) return ret; - phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_PSE_HEADER], + phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_PSE_HEADER, info->extack); if (IS_ERR(phydev)) return -ENODEV; @@ -261,7 +261,7 @@ ethnl_set_pse(struct ethnl_req_info *req_info, struct genl_info *info) struct phy_device *phydev; int ret; - phydev = ethnl_req_get_phydev(req_info, tb[ETHTOOL_A_PSE_HEADER], + phydev = ethnl_req_get_phydev(req_info, tb, ETHTOOL_A_PSE_HEADER, info->extack); ret = ethnl_set_pse_validate(phydev, info); if (ret) diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c index 7839bfd1ac6a..aeedd5ec6b8c 100644 --- a/net/ethtool/rings.c +++ b/net/ethtool/rings.c @@ -215,17 +215,16 @@ ethnl_set_rings_validate(struct ethnl_req_info *req_info, static int ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info) { - struct kernel_ethtool_ringparam kernel_ringparam = {}; - struct ethtool_ringparam ringparam = {}; + struct kernel_ethtool_ringparam kernel_ringparam; struct net_device *dev = req_info->dev; + struct ethtool_ringparam ringparam; struct nlattr **tb = info->attrs; const struct nlattr *err_attr; bool mod = false; int ret; - dev->ethtool_ops->get_ringparam(dev, &ringparam, - &kernel_ringparam, info->extack); - kernel_ringparam.tcp_data_split = dev->cfg->hds_config; + ethtool_ringparam_get_cfg(dev, &ringparam, &kernel_ringparam, + info->extack); ethnl_update_u32(&ringparam.rx_pending, tb[ETHTOOL_A_RINGS_RX], &mod); ethnl_update_u32(&ringparam.rx_mini_pending, diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c index 7cb106b590ab..58df9ad02ce8 100644 --- a/net/ethtool/rss.c +++ b/net/ethtool/rss.c @@ -107,6 +107,8 @@ rss_prepare_ctx(const struct rss_req_info *request, struct net_device *dev, u32 total_size, indir_bytes; u8 *rss_config; + data->no_key_fields = !dev->ethtool_ops->rxfh_per_ctx_key; + ctx = xa_load(&dev->ethtool->rss_ctx, request->rss_context); if (!ctx) return -ENOENT; @@ -153,7 +155,6 @@ rss_prepare_data(const struct ethnl_req_info *req_base, if (!ops->cap_rss_ctx_supported && !ops->create_rxfh_context) return -EOPNOTSUPP; - data->no_key_fields = !ops->rxfh_per_ctx_key; return rss_prepare_ctx(request, dev, data, info); } diff --git a/net/ethtool/stats.c b/net/ethtool/stats.c index 038a2558f052..3ca8eb2a3b31 100644 --- a/net/ethtool/stats.c +++ b/net/ethtool/stats.c @@ -138,7 +138,7 @@ static int stats_prepare_data(const struct ethnl_req_info *req_base, struct phy_device *phydev; int ret; - phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_STATS_HEADER], + phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_STATS_HEADER, info->extack); if (IS_ERR(phydev)) return PTR_ERR(phydev); diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c index 818cf01f0911..f6a67109beda 100644 --- a/net/ethtool/strset.c +++ b/net/ethtool/strset.c @@ -75,6 +75,11 @@ static const struct strset_info info_template[] = { .count = __HWTSTAMP_FILTER_CNT, .strings = ts_rx_filter_names, }, + [ETH_SS_TS_FLAGS] = { + .per_dev = false, + .count = __HWTSTAMP_FLAG_CNT, + .strings = ts_flags_names, + }, [ETH_SS_UDP_TUNNEL_TYPES] = { .per_dev = false, .count = __ETHTOOL_UDP_TUNNEL_TYPE_CNT, @@ -304,7 +309,7 @@ static int strset_prepare_data(const struct ethnl_req_info *req_base, return 0; } - phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_HEADER_FLAGS], + phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_HEADER_FLAGS, info->extack); /* phydev can be NULL, check for errors only */ diff --git a/net/ethtool/tsconfig.c b/net/ethtool/tsconfig.c index 9188e088fb2f..2be356bdfe87 100644 --- a/net/ethtool/tsconfig.c +++ b/net/ethtool/tsconfig.c @@ -54,7 +54,7 @@ static int tsconfig_prepare_data(const struct ethnl_req_info *req_base, data->hwtst_config.tx_type = BIT(cfg.tx_type); data->hwtst_config.rx_filter = BIT(cfg.rx_filter); - data->hwtst_config.flags = BIT(cfg.flags); + data->hwtst_config.flags = cfg.flags; data->hwprov_desc.index = -1; hwprov = rtnl_dereference(dev->hwprov); @@ -91,10 +91,16 @@ static int tsconfig_reply_size(const struct ethnl_req_info *req_base, BUILD_BUG_ON(__HWTSTAMP_TX_CNT > 32); BUILD_BUG_ON(__HWTSTAMP_FILTER_CNT > 32); + BUILD_BUG_ON(__HWTSTAMP_FLAG_CNT > 32); - if (data->hwtst_config.flags) - /* _TSCONFIG_HWTSTAMP_FLAGS */ - len += nla_total_size(sizeof(u32)); + if (data->hwtst_config.flags) { + ret = ethnl_bitset32_size(&data->hwtst_config.flags, + NULL, __HWTSTAMP_FLAG_CNT, + ts_flags_names, compact); + if (ret < 0) + return ret; + len += ret; /* _TSCONFIG_HWTSTAMP_FLAGS */ + } if (data->hwtst_config.tx_type) { ret = ethnl_bitset32_size(&data->hwtst_config.tx_type, @@ -130,8 +136,10 @@ static int tsconfig_fill_reply(struct sk_buff *skb, int ret; if (data->hwtst_config.flags) { - ret = nla_put_u32(skb, ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS, - data->hwtst_config.flags); + ret = ethnl_put_bitset32(skb, ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS, + &data->hwtst_config.flags, NULL, + __HWTSTAMP_FLAG_CNT, + ts_flags_names, compact); if (ret < 0) return ret; } @@ -180,7 +188,7 @@ const struct nla_policy ethnl_tsconfig_set_policy[ETHTOOL_A_TSCONFIG_MAX + 1] = [ETHTOOL_A_TSCONFIG_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), [ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER] = NLA_POLICY_NESTED(ethnl_ts_hwtst_prov_policy), - [ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS] = { .type = NLA_U32 }, + [ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS] = { .type = NLA_NESTED }, [ETHTOOL_A_TSCONFIG_RX_FILTERS] = { .type = NLA_NESTED }, [ETHTOOL_A_TSCONFIG_TX_TYPES] = { .type = NLA_NESTED }, }; @@ -296,6 +304,7 @@ static int ethnl_set_tsconfig(struct ethnl_req_info *req_base, BUILD_BUG_ON(__HWTSTAMP_TX_CNT >= 32); BUILD_BUG_ON(__HWTSTAMP_FILTER_CNT >= 32); + BUILD_BUG_ON(__HWTSTAMP_FLAG_CNT > 32); if (!netif_device_present(dev)) return -ENODEV; @@ -377,9 +386,13 @@ static int ethnl_set_tsconfig(struct ethnl_req_info *req_base, } if (tb[ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS]) { - ethnl_update_u32(&hwtst_config.flags, - tb[ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS], - &config_mod); + ret = ethnl_update_bitset32(&hwtst_config.flags, + __HWTSTAMP_FLAG_CNT, + tb[ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS], + ts_flags_names, info->extack, + &config_mod); + if (ret < 0) + goto err_free_hwprov; } ret = net_hwtstamp_validate(&hwtst_config); diff --git a/net/ethtool/tsinfo.c b/net/ethtool/tsinfo.c index 691be6c445b3..ad3866c5a902 100644 --- a/net/ethtool/tsinfo.c +++ b/net/ethtool/tsinfo.c @@ -290,7 +290,8 @@ static void *ethnl_tsinfo_prepare_dump(struct sk_buff *skb, reply_data = ctx->reply_data; memset(reply_data, 0, sizeof(*reply_data)); reply_data->base.dev = dev; - memset(&reply_data->ts_info, 0, sizeof(reply_data->ts_info)); + reply_data->ts_info.cmd = ETHTOOL_GET_TS_INFO; + reply_data->ts_info.phc_index = -1; return ehdr; } diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index cb9a7ed8abd3..814300eee39d 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -659,10 +659,12 @@ static int arp_xmit_finish(struct net *net, struct sock *sk, struct sk_buff *skb */ void arp_xmit(struct sk_buff *skb) { + rcu_read_lock(); /* Send it off, maybe filter it using firewalling first. */ NF_HOOK(NFPROTO_ARP, NF_ARP_OUT, - dev_net(skb->dev), NULL, skb, NULL, skb->dev, + dev_net_rcu(skb->dev), NULL, skb, NULL, skb->dev, arp_xmit_finish); + rcu_read_unlock(); } EXPORT_SYMBOL(arp_xmit); @@ -1075,7 +1077,7 @@ static int arp_req_set_public(struct net *net, struct arpreq *r, __be32 mask = ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr; if (!dev && (r->arp_flags & ATF_COM)) { - dev = dev_getbyhwaddr_rcu(net, r->arp_ha.sa_family, + dev = dev_getbyhwaddr(net, r->arp_ha.sa_family, r->arp_ha.sa_data); if (!dev) return -ENODEV; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index c8b3cf5fba4c..55b8151759bc 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1371,10 +1371,11 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope) __be32 addr = 0; unsigned char localnet_scope = RT_SCOPE_HOST; struct in_device *in_dev; - struct net *net = dev_net(dev); + struct net *net; int master_idx; rcu_read_lock(); + net = dev_net_rcu(dev); in_dev = __in_dev_get_rcu(dev); if (!in_dev) goto no_in_dev; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 094084b61bff..5482edb5aade 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -399,10 +399,10 @@ static void icmp_push_reply(struct sock *sk, static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) { - struct ipcm_cookie ipc; struct rtable *rt = skb_rtable(skb); - struct net *net = dev_net(rt->dst.dev); + struct net *net = dev_net_rcu(rt->dst.dev); bool apply_ratelimit = false; + struct ipcm_cookie ipc; struct flowi4 fl4; struct sock *sk; struct inet_sock *inet; @@ -608,12 +608,14 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, struct sock *sk; if (!rt) - goto out; + return; + + rcu_read_lock(); if (rt->dst.dev) - net = dev_net(rt->dst.dev); + net = dev_net_rcu(rt->dst.dev); else if (skb_in->dev) - net = dev_net(skb_in->dev); + net = dev_net_rcu(skb_in->dev); else goto out; @@ -785,7 +787,8 @@ out_unlock: icmp_xmit_unlock(sk); out_bh_enable: local_bh_enable(); -out:; +out: + rcu_read_unlock(); } EXPORT_SYMBOL(__icmp_send); @@ -834,7 +837,7 @@ static void icmp_socket_deliver(struct sk_buff *skb, u32 info) * avoid additional coding at protocol handlers. */ if (!pskb_may_pull(skb, iph->ihl * 4 + 8)) { - __ICMP_INC_STATS(dev_net(skb->dev), ICMP_MIB_INERRORS); + __ICMP_INC_STATS(dev_net_rcu(skb->dev), ICMP_MIB_INERRORS); return; } @@ -868,7 +871,7 @@ static enum skb_drop_reason icmp_unreach(struct sk_buff *skb) struct net *net; u32 info = 0; - net = dev_net(skb_dst(skb)->dev); + net = dev_net_rcu(skb_dst(skb)->dev); /* * Incomplete header ? @@ -979,7 +982,7 @@ out_err: static enum skb_drop_reason icmp_redirect(struct sk_buff *skb) { if (skb->len < sizeof(struct iphdr)) { - __ICMP_INC_STATS(dev_net(skb->dev), ICMP_MIB_INERRORS); + __ICMP_INC_STATS(dev_net_rcu(skb->dev), ICMP_MIB_INERRORS); return SKB_DROP_REASON_PKT_TOO_SMALL; } @@ -1011,7 +1014,7 @@ static enum skb_drop_reason icmp_echo(struct sk_buff *skb) struct icmp_bxm icmp_param; struct net *net; - net = dev_net(skb_dst(skb)->dev); + net = dev_net_rcu(skb_dst(skb)->dev); /* should there be an ICMP stat for ignored echos? */ if (READ_ONCE(net->ipv4.sysctl_icmp_echo_ignore_all)) return SKB_NOT_DROPPED_YET; @@ -1040,9 +1043,9 @@ static enum skb_drop_reason icmp_echo(struct sk_buff *skb) bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr) { + struct net *net = dev_net_rcu(skb->dev); struct icmp_ext_hdr *ext_hdr, _ext_hdr; struct icmp_ext_echo_iio *iio, _iio; - struct net *net = dev_net(skb->dev); struct inet6_dev *in6_dev; struct in_device *in_dev; struct net_device *dev; @@ -1181,7 +1184,7 @@ static enum skb_drop_reason icmp_timestamp(struct sk_buff *skb) return SKB_NOT_DROPPED_YET; out_err: - __ICMP_INC_STATS(dev_net(skb_dst(skb)->dev), ICMP_MIB_INERRORS); + __ICMP_INC_STATS(dev_net_rcu(skb_dst(skb)->dev), ICMP_MIB_INERRORS); return SKB_DROP_REASON_PKT_TOO_SMALL; } @@ -1198,7 +1201,7 @@ int icmp_rcv(struct sk_buff *skb) { enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; struct rtable *rt = skb_rtable(skb); - struct net *net = dev_net(rt->dst.dev); + struct net *net = dev_net_rcu(rt->dst.dev); struct icmphdr *icmph; if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { @@ -1371,9 +1374,9 @@ int icmp_err(struct sk_buff *skb, u32 info) struct iphdr *iph = (struct iphdr *)skb->data; int offset = iph->ihl<<2; struct icmphdr *icmph = (struct icmphdr *)(skb->data + offset); + struct net *net = dev_net_rcu(skb->dev); int type = icmp_hdr(skb)->type; int code = icmp_hdr(skb)->code; - struct net *net = dev_net(skb->dev); /* * Use ping_err to handle all icmp errors except those diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 577b88a43293..753704f75b2c 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -390,7 +390,13 @@ static inline int ip_rt_proc_init(void) static inline bool rt_is_expired(const struct rtable *rth) { - return rth->rt_genid != rt_genid_ipv4(dev_net(rth->dst.dev)); + bool res; + + rcu_read_lock(); + res = rth->rt_genid != rt_genid_ipv4(dev_net_rcu(rth->dst.dev)); + rcu_read_unlock(); + + return res; } void rt_cache_flush(struct net *net) @@ -1002,9 +1008,9 @@ out: kfree_skb_reason(skb, reason); static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) { struct dst_entry *dst = &rt->dst; - struct net *net = dev_net(dst->dev); struct fib_result res; bool lock = false; + struct net *net; u32 old_mtu; if (ip_mtu_locked(dst)) @@ -1014,6 +1020,8 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) if (old_mtu < mtu) return; + rcu_read_lock(); + net = dev_net_rcu(dst->dev); if (mtu < net->ipv4.ip_rt_min_pmtu) { lock = true; mtu = min(old_mtu, net->ipv4.ip_rt_min_pmtu); @@ -1021,9 +1029,8 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) if (rt->rt_pmtu == mtu && !lock && time_before(jiffies, dst->expires - net->ipv4.ip_rt_mtu_expires / 2)) - return; + goto out; - rcu_read_lock(); if (fib_lookup(net, fl4, &res, 0) == 0) { struct fib_nh_common *nhc; @@ -1037,14 +1044,14 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock, jiffies + net->ipv4.ip_rt_mtu_expires); } - rcu_read_unlock(); - return; + goto out; } #endif /* CONFIG_IP_ROUTE_MULTIPATH */ nhc = FIB_RES_NHC(res); update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock, jiffies + net->ipv4.ip_rt_mtu_expires); } +out: rcu_read_unlock(); } @@ -1307,10 +1314,15 @@ static void set_class_tag(struct rtable *rt, u32 tag) static unsigned int ipv4_default_advmss(const struct dst_entry *dst) { - struct net *net = dev_net(dst->dev); unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr); - unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size, - net->ipv4.ip_rt_min_advmss); + unsigned int advmss; + struct net *net; + + rcu_read_lock(); + net = dev_net_rcu(dst->dev); + advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size, + net->ipv4.ip_rt_min_advmss); + rcu_read_unlock(); return min(advmss, IPV4_MAX_PMTU - header_size); } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0d704bda6c41..57df7c1d2faa 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1565,12 +1565,13 @@ EXPORT_SYMBOL(tcp_recv_skb); * or for 'peeking' the socket using this routine * (although both would be easy to implement). */ -int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, - sk_read_actor_t recv_actor) +static int __tcp_read_sock(struct sock *sk, read_descriptor_t *desc, + sk_read_actor_t recv_actor, bool noack, + u32 *copied_seq) { struct sk_buff *skb; struct tcp_sock *tp = tcp_sk(sk); - u32 seq = tp->copied_seq; + u32 seq = *copied_seq; u32 offset; int copied = 0; @@ -1624,9 +1625,12 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, tcp_eat_recv_skb(sk, skb); if (!desc->count) break; - WRITE_ONCE(tp->copied_seq, seq); + WRITE_ONCE(*copied_seq, seq); } - WRITE_ONCE(tp->copied_seq, seq); + WRITE_ONCE(*copied_seq, seq); + + if (noack) + goto out; tcp_rcv_space_adjust(sk); @@ -1635,10 +1639,25 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, tcp_recv_skb(sk, seq, &offset); tcp_cleanup_rbuf(sk, copied); } +out: return copied; } + +int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, + sk_read_actor_t recv_actor) +{ + return __tcp_read_sock(sk, desc, recv_actor, false, + &tcp_sk(sk)->copied_seq); +} EXPORT_SYMBOL(tcp_read_sock); +int tcp_read_sock_noack(struct sock *sk, read_descriptor_t *desc, + sk_read_actor_t recv_actor, bool noack, + u32 *copied_seq) +{ + return __tcp_read_sock(sk, desc, recv_actor, noack, copied_seq); +} + int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor) { struct sk_buff *skb; @@ -2438,14 +2457,12 @@ static int tcp_recvmsg_dmabuf(struct sock *sk, const struct sk_buff *skb, */ memset(&dmabuf_cmsg, 0, sizeof(dmabuf_cmsg)); dmabuf_cmsg.frag_size = copy; - err = put_cmsg(msg, SOL_SOCKET, SO_DEVMEM_LINEAR, - sizeof(dmabuf_cmsg), &dmabuf_cmsg); - if (err || msg->msg_flags & MSG_CTRUNC) { - msg->msg_flags &= ~MSG_CTRUNC; - if (!err) - err = -ETOOSMALL; + err = put_cmsg_notrunc(msg, SOL_SOCKET, + SO_DEVMEM_LINEAR, + sizeof(dmabuf_cmsg), + &dmabuf_cmsg); + if (err) goto out; - } sent += copy; @@ -2499,16 +2516,12 @@ static int tcp_recvmsg_dmabuf(struct sock *sk, const struct sk_buff *skb, offset += copy; remaining_len -= copy; - err = put_cmsg(msg, SOL_SOCKET, - SO_DEVMEM_DMABUF, - sizeof(dmabuf_cmsg), - &dmabuf_cmsg); - if (err || msg->msg_flags & MSG_CTRUNC) { - msg->msg_flags &= ~MSG_CTRUNC; - if (!err) - err = -ETOOSMALL; + err = put_cmsg_notrunc(msg, SOL_SOCKET, + SO_DEVMEM_DMABUF, + sizeof(dmabuf_cmsg), + &dmabuf_cmsg); + if (err) goto out; - } atomic_long_inc(&niov->pp_ref_count); tcp_xa_pool.netmems[tcp_xa_pool.idx++] = skb_frag_netmem(frag); diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 47f65b1b70ca..ba581785adb4 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -646,6 +646,42 @@ static int tcp_bpf_assert_proto_ops(struct proto *ops) ops->sendmsg == tcp_sendmsg ? 0 : -ENOTSUPP; } +#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) +int tcp_bpf_strp_read_sock(struct strparser *strp, read_descriptor_t *desc, + sk_read_actor_t recv_actor) +{ + struct sock *sk = strp->sk; + struct sk_psock *psock; + struct tcp_sock *tp; + int copied = 0; + + tp = tcp_sk(sk); + rcu_read_lock(); + psock = sk_psock(sk); + if (WARN_ON_ONCE(!psock)) { + desc->error = -EINVAL; + goto out; + } + + psock->ingress_bytes = 0; + copied = tcp_read_sock_noack(sk, desc, recv_actor, true, + &psock->copied_seq); + if (copied < 0) + goto out; + /* recv_actor may redirect skb to another socket (SK_REDIRECT) or + * just put skb into ingress queue of current socket (SK_PASS). + * For SK_REDIRECT, we need to ack the frame immediately but for + * SK_PASS, we want to delay the ack until tcp_bpf_recvmsg_parser(). + */ + tp->copied_seq = psock->copied_seq - psock->ingress_bytes; + tcp_rcv_space_adjust(sk); + __tcp_cleanup_rbuf(sk, copied - psock->ingress_bytes); +out: + rcu_read_unlock(); + return copied; +} +#endif /* CONFIG_BPF_STREAM_PARSER */ + int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) { int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4; diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 0f523cbfe329..32b28fc21b63 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -178,7 +178,7 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb) if (!skb) return; - skb_dst_drop(skb); + tcp_cleanup_skb(skb); /* segs_in has been initialized to 1 in tcp_create_openreq_child(). * Hence, reset segs_in to 0 before calling tcp_segs_in() * to avoid double counting. Also, tcp_segs_in() expects @@ -195,7 +195,7 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb) TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN; tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; - __skb_queue_tail(&sk->sk_receive_queue, skb); + tcp_add_receive_queue(sk, skb); tp->syn_data_acked = 1; /* u64_stats_update_begin(&tp->syncp) not needed here, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index eb82e01da911..0cbf81bf3d45 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -243,9 +243,15 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb) do_div(val, skb->truesize); tcp_sk(sk)->scaling_ratio = val ? val : 1; - if (old_ratio != tcp_sk(sk)->scaling_ratio) - WRITE_ONCE(tcp_sk(sk)->window_clamp, - tcp_win_from_space(sk, sk->sk_rcvbuf)); + if (old_ratio != tcp_sk(sk)->scaling_ratio) { + struct tcp_sock *tp = tcp_sk(sk); + + val = tcp_win_from_space(sk, sk->sk_rcvbuf); + tcp_set_window_clamp(sk, val); + + if (tp->window_clamp < tp->rcvq_space.space) + tp->rcvq_space.space = tp->window_clamp; + } } icsk->icsk_ack.rcv_mss = min_t(unsigned int, len, tcp_sk(sk)->advmss); @@ -4970,7 +4976,7 @@ static void tcp_ofo_queue(struct sock *sk) tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; if (!eaten) - __skb_queue_tail(&sk->sk_receive_queue, skb); + tcp_add_receive_queue(sk, skb); else kfree_skb_partial(skb, fragstolen); @@ -5162,7 +5168,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, skb, fragstolen)) ? 1 : 0; tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq); if (!eaten) { - __skb_queue_tail(&sk->sk_receive_queue, skb); + tcp_add_receive_queue(sk, skb); skb_set_owner_r(skb, sk); } return eaten; @@ -5245,7 +5251,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) __kfree_skb(skb); return; } - skb_dst_drop(skb); + tcp_cleanup_skb(skb); __skb_pull(skb, tcp_hdr(skb)->doff * 4); reason = SKB_DROP_REASON_NOT_SPECIFIED; @@ -6226,7 +6232,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS); /* Bulk data transfer: receiver */ - skb_dst_drop(skb); + tcp_cleanup_skb(skb); __skb_pull(skb, tcp_header_len); eaten = tcp_queue_rcv(sk, skb, &fragstolen); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index cc2b5194a18d..2632844d2c35 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2027,7 +2027,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb, */ skb_condense(skb); - skb_dst_drop(skb); + tcp_cleanup_skb(skb); if (unlikely(tcp_checksum_complete(skb))) { bh_unlock_sock(sk); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index b089b08e9617..dfdb7a4608a8 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -815,12 +815,6 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, /* In sequence, PAWS is OK. */ - /* TODO: We probably should defer ts_recent change once - * we take ownership of @req. - */ - if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_nxt)) - WRITE_ONCE(req->ts_recent, tmp_opt.rcv_tsval); - if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) { /* Truncate SYN, it is out of window starting at tcp_rsk(req)->rcv_isn + 1. */ @@ -869,6 +863,10 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, if (!child) goto listen_overflow; + if (own_req && tmp_opt.saw_tstamp && + !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_nxt)) + tcp_sk(child)->rx_opt.ts_recent = tmp_opt.rcv_tsval; + if (own_req && rsk_drop_req(req)) { reqsk_queue_removed(&inet_csk(req->rsk_listener)->icsk_accept_queue, req); inet_csk_reqsk_queue_drop_and_put(req->rsk_listener, req); diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 2308665b51c5..2dfac79dc78b 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -13,12 +13,15 @@ #include <net/tcp.h> #include <net/protocol.h> -static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq, +static void tcp_gso_tstamp(struct sk_buff *skb, struct sk_buff *gso_skb, unsigned int seq, unsigned int mss) { + u32 flags = skb_shinfo(gso_skb)->tx_flags & SKBTX_ANY_TSTAMP; + u32 ts_seq = skb_shinfo(gso_skb)->tskey; + while (skb) { if (before(ts_seq, seq + mss)) { - skb_shinfo(skb)->tx_flags |= SKBTX_SW_TSTAMP; + skb_shinfo(skb)->tx_flags |= flags; skb_shinfo(skb)->tskey = ts_seq; return; } @@ -193,8 +196,8 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, th = tcp_hdr(skb); seq = ntohl(th->seq); - if (unlikely(skb_shinfo(gso_skb)->tx_flags & SKBTX_SW_TSTAMP)) - tcp_gso_tstamp(segs, skb_shinfo(gso_skb)->tskey, seq, mss); + if (unlikely(skb_shinfo(gso_skb)->tx_flags & SKBTX_ANY_TSTAMP)) + tcp_gso_tstamp(segs, gso_skb, seq, mss); newcheck = ~csum_fold(csum_add(csum_unfold(th->check), delta)); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index b412ed88ccd9..e7a75afa995d 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -884,11 +884,9 @@ void tcp_init_xmit_timers(struct sock *sk) { inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, &tcp_keepalive_timer); - hrtimer_init(&tcp_sk(sk)->pacing_timer, CLOCK_MONOTONIC, - HRTIMER_MODE_ABS_PINNED_SOFT); - tcp_sk(sk)->pacing_timer.function = tcp_pace_kick; + hrtimer_setup(&tcp_sk(sk)->pacing_timer, tcp_pace_kick, CLOCK_MONOTONIC, + HRTIMER_MODE_ABS_PINNED_SOFT); - hrtimer_init(&tcp_sk(sk)->compressed_ack_timer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL_PINNED_SOFT); - tcp_sk(sk)->compressed_ack_timer.function = tcp_compressed_ack_kick; + hrtimer_setup(&tcp_sk(sk)->compressed_ack_timer, tcp_compressed_ack_kick, CLOCK_MONOTONIC, + HRTIMER_MODE_REL_PINNED_SOFT); } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c472c9a57cf6..a9bb9ce5438e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1141,9 +1141,9 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4, const int hlen = skb_network_header_len(skb) + sizeof(struct udphdr); - if (hlen + cork->gso_size > cork->fragsize) { + if (hlen + min(datalen, cork->gso_size) > cork->fragsize) { kfree_skb(skb); - return -EINVAL; + return -EMSGSIZE; } if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) { kfree_skb(skb); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index a5be6e4ed326..ecfca59f31f1 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -321,13 +321,17 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, /* clear destructor to avoid skb_segment assigning it to tail */ copy_dtor = gso_skb->destructor == sock_wfree; - if (copy_dtor) + if (copy_dtor) { gso_skb->destructor = NULL; + gso_skb->sk = NULL; + } segs = skb_segment(gso_skb, features); if (IS_ERR_OR_NULL(segs)) { - if (copy_dtor) + if (copy_dtor) { gso_skb->destructor = sock_wfree; + gso_skb->sk = sk; + } return segs; } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index a6984a29fdb9..4d14ab7f7e99 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -76,7 +76,7 @@ static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, { /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */ struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset); - struct net *net = dev_net(skb->dev); + struct net *net = dev_net_rcu(skb->dev); if (type == ICMPV6_PKT_TOOBIG) ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL)); @@ -473,7 +473,10 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, if (!skb->dev) return; - net = dev_net(skb->dev); + + rcu_read_lock(); + + net = dev_net_rcu(skb->dev); mark = IP6_REPLY_MARK(net, skb->mark); /* * Make sure we respect the rules @@ -496,7 +499,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, !(type == ICMPV6_PARAMPROB && code == ICMPV6_UNK_OPTION && (opt_unrec(skb, info)))) - return; + goto out; saddr = NULL; } @@ -526,7 +529,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) { net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n", &hdr->saddr, &hdr->daddr); - return; + goto out; } /* @@ -535,7 +538,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, if (is_ineligible(skb)) { net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n", &hdr->saddr, &hdr->daddr); - return; + goto out; } /* Needed by both icmpv6_global_allow and icmpv6_xmit_lock */ @@ -582,7 +585,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, np = inet6_sk(sk); if (!icmpv6_xrlim_allow(sk, type, &fl6, apply_ratelimit)) - goto out; + goto out_unlock; tmp_hdr.icmp6_type = type; tmp_hdr.icmp6_code = code; @@ -600,7 +603,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, dst = icmpv6_route_lookup(net, skb, sk, &fl6); if (IS_ERR(dst)) - goto out; + goto out_unlock; ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); @@ -616,7 +619,6 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, goto out_dst_release; } - rcu_read_lock(); idev = __in6_dev_get(skb->dev); if (ip6_append_data(sk, icmpv6_getfrag, &msg, @@ -630,13 +632,15 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, len + sizeof(struct icmp6hdr)); } - rcu_read_unlock(); + out_dst_release: dst_release(dst); -out: +out_unlock: icmpv6_xmit_unlock(sk); out_bh_enable: local_bh_enable(); +out: + rcu_read_unlock(); } EXPORT_SYMBOL(icmp6_send); @@ -679,8 +683,8 @@ int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type, skb_pull(skb2, nhs); skb_reset_network_header(skb2); - rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, - skb, 0); + rt = rt6_lookup(dev_net_rcu(skb->dev), &ipv6_hdr(skb2)->saddr, + NULL, 0, skb, 0); if (rt && rt->dst.dev) skb2->dev = rt->dst.dev; @@ -717,7 +721,7 @@ EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach); static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb) { - struct net *net = dev_net(skb->dev); + struct net *net = dev_net_rcu(skb->dev); struct sock *sk; struct inet6_dev *idev; struct ipv6_pinfo *np; @@ -832,7 +836,7 @@ enum skb_drop_reason icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) { struct inet6_skb_parm *opt = IP6CB(skb); - struct net *net = dev_net(skb->dev); + struct net *net = dev_net_rcu(skb->dev); const struct inet6_protocol *ipprot; enum skb_drop_reason reason; int inner_offset; @@ -889,7 +893,7 @@ out: static int icmpv6_rcv(struct sk_buff *skb) { enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; - struct net *net = dev_net(skb->dev); + struct net *net = dev_net_rcu(skb->dev); struct net_device *dev = icmp6_dev(skb); struct inet6_dev *idev = __in6_dev_get(dev); const struct in6_addr *saddr, *daddr; @@ -921,7 +925,7 @@ static int icmpv6_rcv(struct sk_buff *skb) skb_set_network_header(skb, nh); } - __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS); + __ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_INMSGS); saddr = &ipv6_hdr(skb)->saddr; daddr = &ipv6_hdr(skb)->daddr; @@ -939,7 +943,7 @@ static int icmpv6_rcv(struct sk_buff *skb) type = hdr->icmp6_type; - ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type); + ICMP6MSGIN_INC_STATS(dev_net_rcu(dev), idev, type); switch (type) { case ICMPV6_ECHO_REQUEST: @@ -1034,9 +1038,9 @@ static int icmpv6_rcv(struct sk_buff *skb) csum_error: reason = SKB_DROP_REASON_ICMP_CSUM; - __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS); + __ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_CSUMERRORS); discard_it: - __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS); + __ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_INERRORS); drop_no_count: kfree_skb_reason(skb, reason); return 0; diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c index ff7e734e335b..7d574f5132e2 100644 --- a/net/ipv6/ila/ila_lwt.c +++ b/net/ipv6/ila/ila_lwt.c @@ -88,13 +88,15 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb) goto drop; } - if (ilwt->connected) { + /* cache only if we don't create a dst reference loop */ + if (ilwt->connected && orig_dst->lwtstate != dst->lwtstate) { local_bh_disable(); dst_cache_set_ip6(&ilwt->dst_cache, dst, &fl6.saddr); local_bh_enable(); } } + skb_dst_drop(skb); skb_dst_set(skb, dst); return dst_output(net, sk, skb); diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c index 28e5a89dc255..09065187378e 100644 --- a/net/ipv6/ioam6_iptunnel.c +++ b/net/ipv6/ioam6_iptunnel.c @@ -336,8 +336,7 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb, static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct dst_entry *dst = skb_dst(skb), *cache_dst; - struct in6_addr orig_daddr; + struct dst_entry *dst = skb_dst(skb), *cache_dst = NULL; struct ioam6_lwt *ilwt; int err = -EINVAL; u32 pkt_cnt; @@ -352,8 +351,6 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) if (pkt_cnt % ilwt->freq.n >= ilwt->freq.k) goto out; - orig_daddr = ipv6_hdr(skb)->daddr; - local_bh_disable(); cache_dst = dst_cache_get(&ilwt->cache); local_bh_enable(); @@ -407,27 +404,34 @@ do_encap: cache_dst = ip6_route_output(net, NULL, &fl6); if (cache_dst->error) { err = cache_dst->error; - dst_release(cache_dst); goto drop; } - local_bh_disable(); - dst_cache_set_ip6(&ilwt->cache, cache_dst, &fl6.saddr); - local_bh_enable(); + /* cache only if we don't create a dst reference loop */ + if (dst->lwtstate != cache_dst->lwtstate) { + local_bh_disable(); + dst_cache_set_ip6(&ilwt->cache, cache_dst, &fl6.saddr); + local_bh_enable(); + } err = skb_cow_head(skb, LL_RESERVED_SPACE(cache_dst->dev)); if (unlikely(err)) goto drop; } - if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr)) { + /* avoid lwtunnel_output() reentry loop when destination is the same + * after transformation (e.g., with the inline mode) + */ + if (dst->lwtstate != cache_dst->lwtstate) { skb_dst_drop(skb); skb_dst_set(skb, cache_dst); return dst_output(net, sk, skb); } out: + dst_release(cache_dst); return dst->lwtstate->orig_output(net, sk, skb); drop: + dst_release(cache_dst); kfree_skb(skb); return err; } diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 70c0e16c0ae6..39da6a7ce5f1 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -477,9 +477,7 @@ discard: static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { skb_clear_delivery_time(skb); - rcu_read_lock(); ip6_protocol_deliver_rcu(net, skb, 0, false); - rcu_read_unlock(); return 0; } @@ -487,9 +485,15 @@ static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *sk int ip6_input(struct sk_buff *skb) { - return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, - dev_net(skb->dev), NULL, skb, skb->dev, NULL, - ip6_input_finish); + int res; + + rcu_read_lock(); + res = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, + dev_net_rcu(skb->dev), NULL, skb, skb->dev, NULL, + ip6_input_finish); + rcu_read_unlock(); + + return res; } EXPORT_SYMBOL_GPL(ip6_input); diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 9dfdb40988b0..65831b4fee1f 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1773,21 +1773,19 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu) struct net_device *dev = idev->dev; int hlen = LL_RESERVED_SPACE(dev); int tlen = dev->needed_tailroom; - struct net *net = dev_net(dev); const struct in6_addr *saddr; struct in6_addr addr_buf; struct mld2_report *pmr; struct sk_buff *skb; unsigned int size; struct sock *sk; - int err; + struct net *net; - sk = net->ipv6.igmp_sk; /* we assume size > sizeof(ra) here * Also try to not allocate high-order pages for big MTU */ size = min_t(int, mtu, PAGE_SIZE / 2) + hlen + tlen; - skb = sock_alloc_send_skb(sk, size, 1, &err); + skb = alloc_skb(size, GFP_KERNEL); if (!skb) return NULL; @@ -1795,6 +1793,12 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu) skb_reserve(skb, hlen); skb_tailroom_reserve(skb, mtu, tlen); + rcu_read_lock(); + + net = dev_net_rcu(dev); + sk = net->ipv6.igmp_sk; + skb_set_owner_w(skb, sk); + if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) { /* <draft-ietf-magma-mld-source-05.txt>: * use unspecified address as the source address @@ -1806,6 +1810,8 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu) ip6_mc_hdr(sk, skb, dev, saddr, &mld2_all_mcr, NEXTHDR_HOP, 0); + rcu_read_unlock(); + skb_put_data(skb, ra, sizeof(ra)); skb_set_transport_header(skb, skb_tail_pointer(skb) - skb->data); @@ -2165,21 +2171,21 @@ static void mld_send_cr(struct inet6_dev *idev) static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) { - struct net *net = dev_net(dev); - struct sock *sk = net->ipv6.igmp_sk; + const struct in6_addr *snd_addr, *saddr; + int err, len, payload_len, full_len; + struct in6_addr addr_buf; struct inet6_dev *idev; struct sk_buff *skb; struct mld_msg *hdr; - const struct in6_addr *snd_addr, *saddr; - struct in6_addr addr_buf; int hlen = LL_RESERVED_SPACE(dev); int tlen = dev->needed_tailroom; - int err, len, payload_len, full_len; u8 ra[8] = { IPPROTO_ICMPV6, 0, IPV6_TLV_ROUTERALERT, 2, 0, 0, IPV6_TLV_PADN, 0 }; - struct flowi6 fl6; struct dst_entry *dst; + struct flowi6 fl6; + struct net *net; + struct sock *sk; if (type == ICMPV6_MGM_REDUCTION) snd_addr = &in6addr_linklocal_allrouters; @@ -2190,19 +2196,21 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) payload_len = len + sizeof(ra); full_len = sizeof(struct ipv6hdr) + payload_len; - rcu_read_lock(); - IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_OUTREQUESTS); - rcu_read_unlock(); + skb = alloc_skb(hlen + tlen + full_len, GFP_KERNEL); - skb = sock_alloc_send_skb(sk, hlen + tlen + full_len, 1, &err); + rcu_read_lock(); + net = dev_net_rcu(dev); + idev = __in6_dev_get(dev); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS); if (!skb) { - rcu_read_lock(); - IP6_INC_STATS(net, __in6_dev_get(dev), - IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); rcu_read_unlock(); return; } + sk = net->ipv6.igmp_sk; + skb_set_owner_w(skb, sk); + skb->priority = TC_PRIO_CONTROL; skb_reserve(skb, hlen); @@ -2227,9 +2235,6 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) IPPROTO_ICMPV6, csum_partial(hdr, len, 0)); - rcu_read_lock(); - idev = __in6_dev_get(skb->dev); - icmpv6_flow_init(sk, &fl6, type, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->dev->ifindex); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index d044c67019de..8699d1a188dc 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -418,15 +418,11 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev, { int hlen = LL_RESERVED_SPACE(dev); int tlen = dev->needed_tailroom; - struct sock *sk = dev_net(dev)->ipv6.ndisc_sk; struct sk_buff *skb; skb = alloc_skb(hlen + sizeof(struct ipv6hdr) + len + tlen, GFP_ATOMIC); - if (!skb) { - ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb\n", - __func__); + if (!skb) return NULL; - } skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; @@ -437,7 +433,9 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev, /* Manually assign socket ownership as we avoid calling * sock_alloc_send_pskb() to bypass wmem buffer limits */ - skb_set_owner_w(skb, sk); + rcu_read_lock(); + skb_set_owner_w(skb, dev_net_rcu(dev)->ipv6.ndisc_sk); + rcu_read_unlock(); return skb; } @@ -473,16 +471,20 @@ static void ip6_nd_hdr(struct sk_buff *skb, void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr, const struct in6_addr *saddr) { + struct icmp6hdr *icmp6h = icmp6_hdr(skb); struct dst_entry *dst = skb_dst(skb); - struct net *net = dev_net(skb->dev); - struct sock *sk = net->ipv6.ndisc_sk; struct inet6_dev *idev; + struct net *net; + struct sock *sk; int err; - struct icmp6hdr *icmp6h = icmp6_hdr(skb); u8 type; type = icmp6h->icmp6_type; + rcu_read_lock(); + + net = dev_net_rcu(skb->dev); + sk = net->ipv6.ndisc_sk; if (!dst) { struct flowi6 fl6; int oif = skb->dev->ifindex; @@ -490,6 +492,7 @@ void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr, icmpv6_flow_init(sk, &fl6, type, saddr, daddr, oif); dst = icmp6_dst_alloc(skb->dev, &fl6); if (IS_ERR(dst)) { + rcu_read_unlock(); kfree_skb(skb); return; } @@ -504,7 +507,6 @@ void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr, ip6_nd_hdr(skb, saddr, daddr, READ_ONCE(inet6_sk(sk)->hop_limit), skb->len); - rcu_read_lock(); idev = __in6_dev_get(dst->dev); IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS); @@ -1694,7 +1696,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) bool ret; if (netif_is_l3_master(skb->dev)) { - dev = __dev_get_by_index(dev_net(skb->dev), IPCB(skb)->iif); + dev = dev_get_by_index_rcu(dev_net(skb->dev), IPCB(skb)->iif); if (!dev) return; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 78362822b907..15ce21afc8c6 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3196,13 +3196,18 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst) { struct net_device *dev = dst->dev; unsigned int mtu = dst_mtu(dst); - struct net *net = dev_net(dev); + struct net *net; mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); + rcu_read_lock(); + + net = dev_net_rcu(dev); if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss) mtu = net->ipv6.sysctl.ip6_rt_min_advmss; + rcu_read_unlock(); + /* * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. @@ -3639,7 +3644,8 @@ out: in6_dev_put(idev); if (err) { - lwtstate_put(fib6_nh->fib_nh_lws); + fib_nh_common_release(&fib6_nh->nh_common); + fib6_nh->nh_common.nhc_pcpu_rth_output = NULL; fib6_nh->fib_nh_lws = NULL; netdev_put(dev, dev_tracker); } @@ -3797,10 +3803,12 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, if (nh) { if (rt->fib6_src.plen) { NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing"); + err = -EINVAL; goto out_free; } if (!nexthop_get(nh)) { NL_SET_ERR_MSG(extack, "Nexthop has been deleted"); + err = -ENOENT; goto out_free; } rt->nh = nh; diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c index 7ba22d2f2bfe..7c05ac846646 100644 --- a/net/ipv6/rpl_iptunnel.c +++ b/net/ipv6/rpl_iptunnel.c @@ -232,13 +232,15 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb) dst = ip6_route_output(net, NULL, &fl6); if (dst->error) { err = dst->error; - dst_release(dst); goto drop; } - local_bh_disable(); - dst_cache_set_ip6(&rlwt->cache, dst, &fl6.saddr); - local_bh_enable(); + /* cache only if we don't create a dst reference loop */ + if (orig_dst->lwtstate != dst->lwtstate) { + local_bh_disable(); + dst_cache_set_ip6(&rlwt->cache, dst, &fl6.saddr); + local_bh_enable(); + } err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); if (unlikely(err)) @@ -251,6 +253,7 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb) return dst_output(net, sk, skb); drop: + dst_release(dst); kfree_skb(skb); return err; } @@ -259,23 +262,35 @@ static int rpl_input(struct sk_buff *skb) { struct dst_entry *orig_dst = skb_dst(skb); struct dst_entry *dst = NULL; + struct lwtunnel_state *lwtst; struct rpl_lwt *rlwt; int err; - rlwt = rpl_lwt_lwtunnel(orig_dst->lwtstate); + /* We cannot dereference "orig_dst" once ip6_route_input() or + * skb_dst_drop() is called. However, in order to detect a dst loop, we + * need the address of its lwtstate. So, save the address of lwtstate + * now and use it later as a comparison. + */ + lwtst = orig_dst->lwtstate; + + rlwt = rpl_lwt_lwtunnel(lwtst); local_bh_disable(); dst = dst_cache_get(&rlwt->cache); local_bh_enable(); err = rpl_do_srh(skb, rlwt, dst); - if (unlikely(err)) + if (unlikely(err)) { + dst_release(dst); goto drop; + } if (!dst) { ip6_route_input(skb); dst = skb_dst(skb); - if (!dst->error) { + + /* cache only if we don't create a dst reference loop */ + if (!dst->error && lwtst != dst->lwtstate) { local_bh_disable(); dst_cache_set_ip6(&rlwt->cache, dst, &ipv6_hdr(skb)->saddr); diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 4bf937bfc263..51583461ae29 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -472,23 +472,35 @@ static int seg6_input_core(struct net *net, struct sock *sk, { struct dst_entry *orig_dst = skb_dst(skb); struct dst_entry *dst = NULL; + struct lwtunnel_state *lwtst; struct seg6_lwt *slwt; int err; - slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate); + /* We cannot dereference "orig_dst" once ip6_route_input() or + * skb_dst_drop() is called. However, in order to detect a dst loop, we + * need the address of its lwtstate. So, save the address of lwtstate + * now and use it later as a comparison. + */ + lwtst = orig_dst->lwtstate; + + slwt = seg6_lwt_lwtunnel(lwtst); local_bh_disable(); dst = dst_cache_get(&slwt->cache); local_bh_enable(); err = seg6_do_srh(skb, dst); - if (unlikely(err)) + if (unlikely(err)) { + dst_release(dst); goto drop; + } if (!dst) { ip6_route_input(skb); dst = skb_dst(skb); - if (!dst->error) { + + /* cache only if we don't create a dst reference loop */ + if (!dst->error && lwtst != dst->lwtstate) { local_bh_disable(); dst_cache_set_ip6(&slwt->cache, dst, &ipv6_hdr(skb)->saddr); @@ -571,13 +583,15 @@ static int seg6_output_core(struct net *net, struct sock *sk, dst = ip6_route_output(net, NULL, &fl6); if (dst->error) { err = dst->error; - dst_release(dst); goto drop; } - local_bh_disable(); - dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr); - local_bh_enable(); + /* cache only if we don't create a dst reference loop */ + if (orig_dst->lwtstate != dst->lwtstate) { + local_bh_disable(); + dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr); + local_bh_enable(); + } err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); if (unlikely(err)) @@ -593,6 +607,7 @@ static int seg6_output_core(struct net *net, struct sock *sk, return dst_output(net, sk, skb); drop: + dst_release(dst); kfree_skb(skb); return err; } diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index a45bf17cb2a1..ae2da28f9dfb 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -94,14 +94,23 @@ INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff) } static void __tcpv6_gso_segment_csum(struct sk_buff *seg, + struct in6_addr *oldip, + const struct in6_addr *newip, __be16 *oldport, __be16 newport) { - struct tcphdr *th; + struct tcphdr *th = tcp_hdr(seg); + + if (!ipv6_addr_equal(oldip, newip)) { + inet_proto_csum_replace16(&th->check, seg, + oldip->s6_addr32, + newip->s6_addr32, + true); + *oldip = *newip; + } if (*oldport == newport) return; - th = tcp_hdr(seg); inet_proto_csum_replace2(&th->check, seg, *oldport, newport, false); *oldport = newport; } @@ -129,10 +138,10 @@ static struct sk_buff *__tcpv6_gso_segment_list_csum(struct sk_buff *segs) th2 = tcp_hdr(seg); iph2 = ipv6_hdr(seg); - iph2->saddr = iph->saddr; - iph2->daddr = iph->daddr; - __tcpv6_gso_segment_csum(seg, &th2->source, th->source); - __tcpv6_gso_segment_csum(seg, &th2->dest, th->dest); + __tcpv6_gso_segment_csum(seg, &iph2->saddr, &iph->saddr, + &th2->source, th->source); + __tcpv6_gso_segment_csum(seg, &iph2->daddr, &iph->daddr, + &th2->dest, th->dest); } return segs; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 6671daa67f4f..c6ea438b5c75 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1389,9 +1389,9 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, const int hlen = skb_network_header_len(skb) + sizeof(struct udphdr); - if (hlen + cork->gso_size > cork->fragsize) { + if (hlen + min(datalen, cork->gso_size) > cork->fragsize) { kfree_skb(skb); - return -EINVAL; + return -EMSGSIZE; } if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) { kfree_skb(skb); diff --git a/net/llc/llc_s_ac.c b/net/llc/llc_s_ac.c index 06fb8e6944b0..7a0cae9a8111 100644 --- a/net/llc/llc_s_ac.c +++ b/net/llc/llc_s_ac.c @@ -24,7 +24,7 @@ #include <net/llc_s_ac.h> #include <net/llc_s_ev.h> #include <net/llc_sap.h> - +#include <net/sock.h> /** * llc_sap_action_unitdata_ind - forward UI PDU to network layer @@ -40,6 +40,26 @@ int llc_sap_action_unitdata_ind(struct llc_sap *sap, struct sk_buff *skb) return 0; } +static int llc_prepare_and_xmit(struct sk_buff *skb) +{ + struct llc_sap_state_ev *ev = llc_sap_ev(skb); + struct sk_buff *nskb; + int rc; + + rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac); + if (rc) + return rc; + + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) + return -ENOMEM; + + if (skb->sk) + skb_set_owner_w(nskb, skb->sk); + + return dev_queue_xmit(nskb); +} + /** * llc_sap_action_send_ui - sends UI PDU resp to UNITDATA REQ to MAC layer * @sap: SAP @@ -52,17 +72,12 @@ int llc_sap_action_unitdata_ind(struct llc_sap *sap, struct sk_buff *skb) int llc_sap_action_send_ui(struct llc_sap *sap, struct sk_buff *skb) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); - int rc; llc_pdu_header_init(skb, LLC_PDU_TYPE_U, ev->saddr.lsap, ev->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_ui_cmd(skb); - rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac); - if (likely(!rc)) { - skb_get(skb); - rc = dev_queue_xmit(skb); - } - return rc; + + return llc_prepare_and_xmit(skb); } /** @@ -77,17 +92,12 @@ int llc_sap_action_send_ui(struct llc_sap *sap, struct sk_buff *skb) int llc_sap_action_send_xid_c(struct llc_sap *sap, struct sk_buff *skb) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); - int rc; llc_pdu_header_init(skb, LLC_PDU_TYPE_U_XID, ev->saddr.lsap, ev->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_xid_cmd(skb, LLC_XID_NULL_CLASS_2, 0); - rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac); - if (likely(!rc)) { - skb_get(skb); - rc = dev_queue_xmit(skb); - } - return rc; + + return llc_prepare_and_xmit(skb); } /** @@ -133,17 +143,12 @@ out: int llc_sap_action_send_test_c(struct llc_sap *sap, struct sk_buff *skb) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); - int rc; llc_pdu_header_init(skb, LLC_PDU_TYPE_U, ev->saddr.lsap, ev->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_test_cmd(skb); - rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac); - if (likely(!rc)) { - skb_get(skb); - rc = dev_queue_xmit(skb); - } - return rc; + + return llc_prepare_and_xmit(skb); } int llc_sap_action_send_test_r(struct llc_sap *sap, struct sk_buff *skb) diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index 299d38e9e863..35349a7f16cb 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -116,8 +116,14 @@ void drv_remove_interface(struct ieee80211_local *local, sdata->flags &= ~IEEE80211_SDATA_IN_DRIVER; - /* Remove driver debugfs entries */ - ieee80211_debugfs_recreate_netdev(sdata, sdata->vif.valid_links); + /* + * Remove driver debugfs entries. + * The virtual monitor interface doesn't get a debugfs + * entry, so it's exempt here. + */ + if (sdata != rcu_access_pointer(local->monitor_sdata)) + ieee80211_debugfs_recreate_netdev(sdata, + sdata->vif.valid_links); trace_drv_remove_interface(local, sdata); local->ops->remove_interface(&local->hw, &sdata->vif); diff --git a/net/mac80211/eht.c b/net/mac80211/eht.c index 7a3116c36df9..fd41046e3b68 100644 --- a/net/mac80211/eht.c +++ b/net/mac80211/eht.c @@ -2,7 +2,7 @@ /* * EHT handling * - * Copyright(c) 2021-2024 Intel Corporation + * Copyright(c) 2021-2025 Intel Corporation */ #include "ieee80211_i.h" @@ -76,6 +76,13 @@ ieee80211_eht_cap_ie_to_sta_eht_cap(struct ieee80211_sub_if_data *sdata, link_sta->cur_max_bandwidth = ieee80211_sta_cap_rx_bw(link_sta); link_sta->pub->bandwidth = ieee80211_sta_cur_vht_bw(link_sta); + /* + * The MPDU length bits are reserved on all but 2.4 GHz and get set via + * VHT (5 GHz) or HE (6 GHz) capabilities. + */ + if (sband->band != NL80211_BAND_2GHZ) + return; + switch (u8_get_bits(eht_cap->eht_cap_elem.mac_cap_info[0], IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_MASK)) { case IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_11454: diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 0ea7e77860b7..738de269e13f 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1206,16 +1206,17 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local) return; } - RCU_INIT_POINTER(local->monitor_sdata, NULL); - mutex_unlock(&local->iflist_mtx); - - synchronize_net(); - + clear_bit(SDATA_STATE_RUNNING, &sdata->state); ieee80211_link_release_channel(&sdata->deflink); if (ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) drv_remove_interface(local, sdata); + RCU_INIT_POINTER(local->monitor_sdata, NULL); + mutex_unlock(&local->iflist_mtx); + + synchronize_net(); + kfree(sdata); } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index f8d52b3b0d0e..36a9be9a66c8 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -4959,6 +4959,7 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, parse_params.start = bss_ies->data; parse_params.len = bss_ies->len; parse_params.bss = cbss; + parse_params.link_id = -1; bss_elems = ieee802_11_parse_elems_full(&parse_params); if (!bss_elems) { ret = false; diff --git a/net/mac80211/parse.c b/net/mac80211/parse.c index cd318c1c67be..6da39c864f45 100644 --- a/net/mac80211/parse.c +++ b/net/mac80211/parse.c @@ -47,6 +47,9 @@ struct ieee80211_elems_parse { /* The EPCS Multi-Link element in the original elements */ const struct element *ml_epcs_elem; + bool multi_link_inner; + bool skip_vendor; + /* * scratch buffer that can be used for various element parsing related * tasks, e.g., element de-fragmentation etc. @@ -152,12 +155,11 @@ ieee80211_parse_extension_element(u32 *crc, switch (le16_get_bits(mle->control, IEEE80211_ML_CONTROL_TYPE)) { case IEEE80211_ML_CONTROL_TYPE_BASIC: - if (elems_parse->ml_basic_elem) { + if (elems_parse->multi_link_inner) { elems->parse_error |= IEEE80211_PARSE_ERR_DUP_NEST_ML_BASIC; break; } - elems_parse->ml_basic_elem = elem; break; case IEEE80211_ML_CONTROL_TYPE_RECONF: elems_parse->ml_reconf_elem = elem; @@ -399,6 +401,9 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, IEEE80211_PARSE_ERR_BAD_ELEM_SIZE; break; case WLAN_EID_VENDOR_SPECIFIC: + if (elems_parse->skip_vendor) + break; + if (elen >= 4 && pos[0] == 0x00 && pos[1] == 0x50 && pos[2] == 0xf2) { /* Microsoft OUI (00:50:F2) */ @@ -866,21 +871,36 @@ ieee80211_mle_get_sta_prof(struct ieee80211_elems_parse *elems_parse, } } -static void ieee80211_mle_parse_link(struct ieee80211_elems_parse *elems_parse, - struct ieee80211_elems_parse_params *params) +static const struct element * +ieee80211_prep_mle_link_parse(struct ieee80211_elems_parse *elems_parse, + struct ieee80211_elems_parse_params *params, + struct ieee80211_elems_parse_params *sub) { struct ieee802_11_elems *elems = &elems_parse->elems; struct ieee80211_mle_per_sta_profile *prof; - struct ieee80211_elems_parse_params sub = { - .mode = params->mode, - .action = params->action, - .from_ap = params->from_ap, - .link_id = -1, - }; - ssize_t ml_len = elems->ml_basic_len; - const struct element *non_inherit = NULL; + const struct element *tmp; + ssize_t ml_len; const u8 *end; + if (params->mode < IEEE80211_CONN_MODE_EHT) + return NULL; + + for_each_element_extid(tmp, WLAN_EID_EXT_EHT_MULTI_LINK, + elems->ie_start, elems->total_len) { + const struct ieee80211_multi_link_elem *mle = + (void *)tmp->data + 1; + + if (!ieee80211_mle_size_ok(tmp->data + 1, tmp->datalen - 1)) + continue; + + if (le16_get_bits(mle->control, IEEE80211_ML_CONTROL_TYPE) != + IEEE80211_ML_CONTROL_TYPE_BASIC) + continue; + + elems_parse->ml_basic_elem = tmp; + break; + } + ml_len = cfg80211_defragment_element(elems_parse->ml_basic_elem, elems->ie_start, elems->total_len, @@ -891,26 +911,26 @@ static void ieee80211_mle_parse_link(struct ieee80211_elems_parse *elems_parse, WLAN_EID_FRAGMENT); if (ml_len < 0) - return; + return NULL; elems->ml_basic = (const void *)elems_parse->scratch_pos; elems->ml_basic_len = ml_len; elems_parse->scratch_pos += ml_len; if (params->link_id == -1) - return; + return NULL; ieee80211_mle_get_sta_prof(elems_parse, params->link_id); prof = elems->prof; if (!prof) - return; + return NULL; /* check if we have the 4 bytes for the fixed part in assoc response */ if (elems->sta_prof_len < sizeof(*prof) + prof->sta_info_len - 1 + 4) { elems->prof = NULL; elems->sta_prof_len = 0; - return; + return NULL; } /* @@ -919,13 +939,17 @@ static void ieee80211_mle_parse_link(struct ieee80211_elems_parse *elems_parse, * the -1 is because the 'sta_info_len' is accounted to as part of the * per-STA profile, but not part of the 'u8 variable[]' portion. */ - sub.start = prof->variable + prof->sta_info_len - 1 + 4; + sub->start = prof->variable + prof->sta_info_len - 1 + 4; end = (const u8 *)prof + elems->sta_prof_len; - sub.len = end - sub.start; + sub->len = end - sub->start; - non_inherit = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, - sub.start, sub.len); - _ieee802_11_parse_elems_full(&sub, elems_parse, non_inherit); + sub->mode = params->mode; + sub->action = params->action; + sub->from_ap = params->from_ap; + sub->link_id = -1; + + return cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, + sub->start, sub->len); } static void @@ -973,15 +997,19 @@ ieee80211_mle_defrag_epcs(struct ieee80211_elems_parse *elems_parse) struct ieee802_11_elems * ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params) { + struct ieee80211_elems_parse_params sub = {}; struct ieee80211_elems_parse *elems_parse; - struct ieee802_11_elems *elems; const struct element *non_inherit = NULL; - u8 *nontransmitted_profile; - int nontransmitted_profile_len = 0; + struct ieee802_11_elems *elems; size_t scratch_len = 3 * params->len; + bool multi_link_inner = false; BUILD_BUG_ON(offsetof(typeof(*elems_parse), elems) != 0); + /* cannot parse for both a specific link and non-transmitted BSS */ + if (WARN_ON(params->link_id >= 0 && params->bss)) + return NULL; + elems_parse = kzalloc(struct_size(elems_parse, scratch, scratch_len), GFP_ATOMIC); if (!elems_parse) @@ -998,34 +1026,51 @@ ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params) ieee80211_clear_tpe(&elems->tpe); ieee80211_clear_tpe(&elems->csa_tpe); - nontransmitted_profile = elems_parse->scratch_pos; - nontransmitted_profile_len = - ieee802_11_find_bssid_profile(params->start, params->len, - elems, params->bss, - nontransmitted_profile); - elems_parse->scratch_pos += nontransmitted_profile_len; - non_inherit = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, - nontransmitted_profile, - nontransmitted_profile_len); + /* + * If we're looking for a non-transmitted BSS then we cannot at + * the same time be looking for a second link as the two can only + * appear in the same frame carrying info for different BSSes. + * + * In any case, we only look for one at a time, as encoded by + * the WARN_ON above. + */ + if (params->bss) { + int nontx_len = + ieee802_11_find_bssid_profile(params->start, + params->len, + elems, params->bss, + elems_parse->scratch_pos); + sub.start = elems_parse->scratch_pos; + sub.mode = params->mode; + sub.len = nontx_len; + sub.action = params->action; + sub.link_id = params->link_id; + + /* consume the space used for non-transmitted profile */ + elems_parse->scratch_pos += nontx_len; + + non_inherit = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, + sub.start, nontx_len); + } else { + /* must always parse to get elems_parse->ml_basic_elem */ + non_inherit = ieee80211_prep_mle_link_parse(elems_parse, params, + &sub); + multi_link_inner = true; + } + elems_parse->skip_vendor = + cfg80211_find_elem(WLAN_EID_VENDOR_SPECIFIC, + sub.start, sub.len); elems->crc = _ieee802_11_parse_elems_full(params, elems_parse, non_inherit); - /* Override with nontransmitted profile, if found */ - if (nontransmitted_profile_len) { - struct ieee80211_elems_parse_params sub = { - .mode = params->mode, - .start = nontransmitted_profile, - .len = nontransmitted_profile_len, - .action = params->action, - .link_id = params->link_id, - }; - + /* Override with nontransmitted/per-STA profile if found */ + if (sub.len) { + elems_parse->multi_link_inner = multi_link_inner; + elems_parse->skip_vendor = false; _ieee802_11_parse_elems_full(&sub, elems_parse, NULL); } - ieee80211_mle_parse_link(elems_parse, params); - ieee80211_mle_defrag_reconf(elems_parse); ieee80211_mle_defrag_epcs(elems_parse); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 1e28efe4203c..0659ec892ec6 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -6,7 +6,7 @@ * Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright(c) 2015 - 2017 Intel Deutschland GmbH - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation */ #include <linux/jiffies.h> @@ -3329,8 +3329,8 @@ static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata, return; } - if (!ether_addr_equal(mgmt->sa, sdata->deflink.u.mgd.bssid) || - !ether_addr_equal(mgmt->bssid, sdata->deflink.u.mgd.bssid)) { + if (!ether_addr_equal(mgmt->sa, sdata->vif.cfg.ap_addr) || + !ether_addr_equal(mgmt->bssid, sdata->vif.cfg.ap_addr)) { /* Not from the current AP or not associated yet. */ return; } @@ -3346,9 +3346,9 @@ static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata, skb_reserve(skb, local->hw.extra_tx_headroom); resp = skb_put_zero(skb, 24); - memcpy(resp->da, mgmt->sa, ETH_ALEN); + memcpy(resp->da, sdata->vif.cfg.ap_addr, ETH_ALEN); memcpy(resp->sa, sdata->vif.addr, ETH_ALEN); - memcpy(resp->bssid, sdata->deflink.u.mgd.bssid, ETH_ALEN); + memcpy(resp->bssid, sdata->vif.cfg.ap_addr, ETH_ALEN); resp->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); skb_put(skb, 1 + sizeof(resp->u.action.u.sa_query)); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index f83268fa9f92..caa3d0236b5e 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -4,7 +4,7 @@ * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015 - 2017 Intel Deutschland GmbH - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation */ #include <linux/module.h> @@ -1335,9 +1335,13 @@ static int _sta_info_move_state(struct sta_info *sta, sta->sta.addr, new_state); /* notify the driver before the actual changes so it can - * fail the transition + * fail the transition if the state is increasing. + * The driver is required not to fail when the transition + * is decreasing the state, so first, do all the preparation + * work and only then, notify the driver. */ - if (test_sta_flag(sta, WLAN_STA_INSERTED)) { + if (new_state > sta->sta_state && + test_sta_flag(sta, WLAN_STA_INSERTED)) { int err = drv_sta_state(sta->local, sta->sdata, sta, sta->sta_state, new_state); if (err) @@ -1413,6 +1417,16 @@ static int _sta_info_move_state(struct sta_info *sta, break; } + if (new_state < sta->sta_state && + test_sta_flag(sta, WLAN_STA_INSERTED)) { + int err = drv_sta_state(sta->local, sta->sdata, sta, + sta->sta_state, new_state); + + WARN_ONCE(err, + "Driver is not allowed to fail if the sta_state is transitioning down the list: %d\n", + err); + } + sta->sta_state = new_state; return 0; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index f6b631faf4f7..fdda14c08e2b 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -6,7 +6,7 @@ * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * utilities for mac80211 */ @@ -687,7 +687,7 @@ void __ieee80211_flush_queues(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, unsigned int queues, bool drop) { - if (!local->ops->flush) + if (!local->ops->flush && !drop) return; /* @@ -714,7 +714,8 @@ void __ieee80211_flush_queues(struct ieee80211_local *local, } } - drv_flush(local, sdata, queues, drop); + if (local->ops->flush) + drv_flush(local, sdata, queues, drop); ieee80211_wake_queues_by_reason(&local->hw, queues, IEEE80211_QUEUE_STOP_REASON_FLUSH, @@ -2192,8 +2193,10 @@ int ieee80211_reconfig(struct ieee80211_local *local) ieee80211_reconfig_roc(local); /* Requeue all works */ - list_for_each_entry(sdata, &local->interfaces, list) - wiphy_work_queue(local->hw.wiphy, &sdata->work); + list_for_each_entry(sdata, &local->interfaces, list) { + if (ieee80211_sdata_running(sdata)) + wiphy_work_queue(local->hw.wiphy, &sdata->work); + } } ieee80211_wake_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP, diff --git a/net/mac802154/main.c b/net/mac802154/main.c index 21b7c3b280b4..ea1efef3572a 100644 --- a/net/mac802154/main.c +++ b/net/mac802154/main.c @@ -213,8 +213,8 @@ int ieee802154_register_hw(struct ieee802154_hw *hw) goto out_wq; } - hrtimer_init(&local->ifs_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - local->ifs_timer.function = ieee802154_xmit_ifs_timer; + hrtimer_setup(&local->ifs_timer, ieee802154_xmit_ifs_timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); wpan_phy_set_dev(local->phy, local->hw.parent); diff --git a/net/mctp/route.c b/net/mctp/route.c index 3f2bd65ff5e3..4c460160914f 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -332,8 +332,14 @@ static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb) & MCTP_HDR_SEQ_MASK; if (!key->reasm_head) { - key->reasm_head = skb; - key->reasm_tailp = &(skb_shinfo(skb)->frag_list); + /* Since we're manipulating the shared frag_list, ensure it isn't + * shared with any other SKBs. + */ + key->reasm_head = skb_unshare(skb, GFP_ATOMIC); + if (!key->reasm_head) + return -ENOMEM; + + key->reasm_tailp = &(skb_shinfo(key->reasm_head)->frag_list); key->last_seq = this_seq; return 0; } diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c index 17165b86ce22..06c1897b685a 100644 --- a/net/mctp/test/route-test.c +++ b/net/mctp/test/route-test.c @@ -921,6 +921,114 @@ static void mctp_test_route_input_sk_fail_frag(struct kunit *test) __mctp_route_test_fini(test, dev, rt, sock); } +/* Input route to socket, using a fragmented message created from clones. + */ +static void mctp_test_route_input_cloned_frag(struct kunit *test) +{ + /* 5 packet fragments, forming 2 complete messages */ + const struct mctp_hdr hdrs[5] = { + RX_FRAG(FL_S, 0), + RX_FRAG(0, 1), + RX_FRAG(FL_E, 2), + RX_FRAG(FL_S, 0), + RX_FRAG(FL_E, 1), + }; + struct mctp_test_route *rt; + struct mctp_test_dev *dev; + struct sk_buff *skb[5]; + struct sk_buff *rx_skb; + struct socket *sock; + size_t data_len; + u8 compare[100]; + u8 flat[100]; + size_t total; + void *p; + int rc; + + /* Arbitrary length */ + data_len = 3; + total = data_len + sizeof(struct mctp_hdr); + + __mctp_route_test_init(test, &dev, &rt, &sock, MCTP_NET_ANY); + + /* Create a single skb initially with concatenated packets */ + skb[0] = mctp_test_create_skb(&hdrs[0], 5 * total); + mctp_test_skb_set_dev(skb[0], dev); + memset(skb[0]->data, 0 * 0x11, skb[0]->len); + memcpy(skb[0]->data, &hdrs[0], sizeof(struct mctp_hdr)); + + /* Extract and populate packets */ + for (int i = 1; i < 5; i++) { + skb[i] = skb_clone(skb[i - 1], GFP_ATOMIC); + KUNIT_ASSERT_TRUE(test, skb[i]); + p = skb_pull(skb[i], total); + KUNIT_ASSERT_TRUE(test, p); + skb_reset_network_header(skb[i]); + memcpy(skb[i]->data, &hdrs[i], sizeof(struct mctp_hdr)); + memset(&skb[i]->data[sizeof(struct mctp_hdr)], i * 0x11, data_len); + } + for (int i = 0; i < 5; i++) + skb_trim(skb[i], total); + + /* SOM packets have a type byte to match the socket */ + skb[0]->data[4] = 0; + skb[3]->data[4] = 0; + + skb_dump("pkt1 ", skb[0], false); + skb_dump("pkt2 ", skb[1], false); + skb_dump("pkt3 ", skb[2], false); + skb_dump("pkt4 ", skb[3], false); + skb_dump("pkt5 ", skb[4], false); + + for (int i = 0; i < 5; i++) { + KUNIT_EXPECT_EQ(test, refcount_read(&skb[i]->users), 1); + /* Take a reference so we can check refcounts at the end */ + skb_get(skb[i]); + } + + /* Feed the fragments into MCTP core */ + for (int i = 0; i < 5; i++) { + rc = mctp_route_input(&rt->rt, skb[i]); + KUNIT_EXPECT_EQ(test, rc, 0); + } + + /* Receive first reassembled message */ + rx_skb = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc); + KUNIT_EXPECT_EQ(test, rc, 0); + KUNIT_EXPECT_EQ(test, rx_skb->len, 3 * data_len); + rc = skb_copy_bits(rx_skb, 0, flat, rx_skb->len); + for (int i = 0; i < rx_skb->len; i++) + compare[i] = (i / data_len) * 0x11; + /* Set type byte */ + compare[0] = 0; + + KUNIT_EXPECT_MEMEQ(test, flat, compare, rx_skb->len); + KUNIT_EXPECT_EQ(test, refcount_read(&rx_skb->users), 1); + kfree_skb(rx_skb); + + /* Receive second reassembled message */ + rx_skb = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc); + KUNIT_EXPECT_EQ(test, rc, 0); + KUNIT_EXPECT_EQ(test, rx_skb->len, 2 * data_len); + rc = skb_copy_bits(rx_skb, 0, flat, rx_skb->len); + for (int i = 0; i < rx_skb->len; i++) + compare[i] = (i / data_len + 3) * 0x11; + /* Set type byte */ + compare[0] = 0; + + KUNIT_EXPECT_MEMEQ(test, flat, compare, rx_skb->len); + KUNIT_EXPECT_EQ(test, refcount_read(&rx_skb->users), 1); + kfree_skb(rx_skb); + + /* Check input skb refcounts */ + for (int i = 0; i < 5; i++) { + KUNIT_EXPECT_EQ(test, refcount_read(&skb[i]->users), 1); + kfree_skb(skb[i]); + } + + __mctp_route_test_fini(test, dev, rt, sock); +} + #if IS_ENABLED(CONFIG_MCTP_FLOWS) static void mctp_test_flow_init(struct kunit *test, @@ -1144,6 +1252,7 @@ static struct kunit_case mctp_test_cases[] = { KUNIT_CASE(mctp_test_packet_flow), KUNIT_CASE(mctp_test_fragment_flow), KUNIT_CASE(mctp_test_route_output_key_create), + KUNIT_CASE(mctp_test_route_input_cloned_frag), {} }; diff --git a/net/mptcp/options.c b/net/mptcp/options.c index fd2de185bc93..23949ae2a3a8 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -651,6 +651,7 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff * struct mptcp_sock *msk = mptcp_sk(subflow->conn); bool drop_other_suboptions = false; unsigned int opt_size = *size; + struct mptcp_addr_info addr; bool echo; int len; @@ -659,7 +660,7 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff * */ if (!mptcp_pm_should_add_signal(msk) || (opts->suboptions & (OPTION_MPTCP_MPJ_ACK | OPTION_MPTCP_MPC_ACK)) || - !mptcp_pm_add_addr_signal(msk, skb, opt_size, remaining, &opts->addr, + !mptcp_pm_add_addr_signal(msk, skb, opt_size, remaining, &addr, &echo, &drop_other_suboptions)) return false; @@ -672,7 +673,7 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff * else if (opts->suboptions & OPTION_MPTCP_DSS) return false; - len = mptcp_add_addr_len(opts->addr.family, echo, !!opts->addr.port); + len = mptcp_add_addr_len(addr.family, echo, !!addr.port); if (remaining < len) return false; @@ -689,6 +690,7 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff * opts->ahmac = 0; *size -= opt_size; } + opts->addr = addr; opts->suboptions |= OPTION_MPTCP_ADD_ADDR; if (!echo) { MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDRTX); diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 572d160edca3..7868207c4e9d 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -977,7 +977,7 @@ static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry) static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, struct mptcp_pm_addr_entry *entry, - bool needs_id) + bool needs_id, bool replace) { struct mptcp_pm_addr_entry *cur, *del_entry = NULL; unsigned int addr_max; @@ -1017,6 +1017,17 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, if (entry->addr.id) goto out; + /* allow callers that only need to look up the local + * addr's id to skip replacement. This allows them to + * avoid calling synchronize_rcu in the packet recv + * path. + */ + if (!replace) { + kfree(entry); + ret = cur->addr.id; + goto out; + } + pernet->addrs--; entry->addr.id = cur->addr.id; list_del_rcu(&cur->list); @@ -1165,7 +1176,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc entry->ifindex = 0; entry->flags = MPTCP_PM_ADDR_FLAG_IMPLICIT; entry->lsk = NULL; - ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true); + ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true, false); if (ret < 0) kfree(entry); @@ -1433,7 +1444,8 @@ int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info) } } ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, - !mptcp_pm_has_addr_attr_id(attr, info)); + !mptcp_pm_has_addr_attr_id(attr, info), + true); if (ret < 0) { GENL_SET_ERR_MSG_FMT(info, "too many addresses or duplicate one: %d", ret); goto out_free; @@ -1514,11 +1526,6 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net, if (mptcp_pm_is_userspace(msk)) goto next; - if (list_empty(&msk->conn_list)) { - mptcp_pm_remove_anno_addr(msk, addr, false); - goto next; - } - lock_sock(sk); remove_subflow = mptcp_lookup_subflow_by_saddr(&msk->conn_list, addr); mptcp_pm_remove_anno_addr(msk, addr, remove_subflow && diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index f6a207958459..ad21925af061 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -1199,6 +1199,8 @@ static inline void __mptcp_do_fallback(struct mptcp_sock *msk) pr_debug("TCP fallback already done (msk=%p)\n", msk); return; } + if (WARN_ON_ONCE(!READ_ONCE(msk->allow_infinite_fallback))) + return; set_bit(MPTCP_FALLBACK_DONE, &msk->flags); } diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index fd021cf8286e..9f18217dddc8 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1142,7 +1142,6 @@ static enum mapping_status get_mapping_status(struct sock *ssk, if (data_len == 0) { pr_debug("infinite mapping received\n"); MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX); - subflow->map_data_len = 0; return MAPPING_INVALID; } @@ -1286,18 +1285,6 @@ static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ss mptcp_schedule_work(sk); } -static bool subflow_can_fallback(struct mptcp_subflow_context *subflow) -{ - struct mptcp_sock *msk = mptcp_sk(subflow->conn); - - if (subflow->mp_join) - return false; - else if (READ_ONCE(msk->csum_enabled)) - return !subflow->valid_csum_seen; - else - return READ_ONCE(msk->allow_infinite_fallback); -} - static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); @@ -1393,7 +1380,7 @@ fallback: return true; } - if (!subflow_can_fallback(subflow) && subflow->map_data_len) { + if (!READ_ONCE(msk->allow_infinite_fallback)) { /* fatal protocol error, close the socket. * subflow_error_report() will introduce the appropriate barriers */ @@ -1772,10 +1759,7 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family, * needs it. * Update ns_tracker to current stack trace and refcounted tracker. */ - __netns_tracker_free(net, &sf->sk->ns_tracker, false); - sf->sk->sk_net_refcnt = 1; - get_net_track(net, &sf->sk->ns_tracker, GFP_KERNEL); - sock_inuse_add(net, 1); + sk_net_refcnt_upgrade(sf->sk); err = tcp_set_ulp(sf->sk, "mptcp"); if (err) goto err_free; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 7d13110ce188..0633276d96bf 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3091,12 +3091,12 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) case IP_VS_SO_GET_SERVICES: { struct ip_vs_get_services *get; - int size; + size_t size; get = (struct ip_vs_get_services *)arg; size = struct_size(get, entrytable, get->num_services); if (*len != size) { - pr_err("length: %u != %u\n", *len, size); + pr_err("length: %u != %zu\n", *len, size); ret = -EINVAL; goto out; } @@ -3132,12 +3132,12 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) case IP_VS_SO_GET_DESTS: { struct ip_vs_get_dests *get; - int size; + size_t size; get = (struct ip_vs_get_dests *)arg; size = struct_size(get, entrytable, get->num_dests); if (*len != size) { - pr_err("length: %u != %u\n", *len, size); + pr_err("length: %u != %zu\n", *len, size); ret = -EINVAL; goto out; } diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index 4890af4dc263..913ede2f57f9 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -132,7 +132,7 @@ static int __nf_conncount_add(struct net *net, struct nf_conn *found_ct; unsigned int collect = 0; - if (time_is_after_eq_jiffies((unsigned long)list->last_gc)) + if ((u32)jiffies == list->last_gc) goto add_new_node; /* check the saved connections */ @@ -234,7 +234,7 @@ bool nf_conncount_gc_list(struct net *net, bool ret = false; /* don't bother if we just did GC */ - if (time_is_after_eq_jiffies((unsigned long)READ_ONCE(list->last_gc))) + if ((u32)jiffies == READ_ONCE(list->last_gc)) return false; /* don't bother if other cpu is already doing GC */ @@ -377,6 +377,8 @@ restart: conn->tuple = *tuple; conn->zone = *zone; + conn->cpu = raw_smp_processor_id(); + conn->jiffies32 = (u32)jiffies; memcpy(rbconn->key, key, sizeof(u32) * data->keylen); nf_conncount_list_init(&rbconn->list); diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 97c6eb8847a0..8cd4cf7ae211 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -381,10 +381,8 @@ static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx, flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset; - if (unlikely(nf_flow_exceeds_mtu(skb, mtu))) { - flow_offload_teardown(flow); + if (unlikely(nf_flow_exceeds_mtu(skb, mtu))) return 0; - } iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset); thoff = (iph->ihl * 4) + ctx->offset; @@ -662,10 +660,8 @@ static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx, flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset; - if (unlikely(nf_flow_exceeds_mtu(skb, mtu))) { - flow_offload_teardown(flow); + if (unlikely(nf_flow_exceeds_mtu(skb, mtu))) return 0; - } ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset); thoff = sizeof(*ip6h) + ctx->offset; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a34de9c17cf1..c2df81b7e950 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -34,7 +34,6 @@ unsigned int nf_tables_net_id __read_mostly; static LIST_HEAD(nf_tables_expressions); static LIST_HEAD(nf_tables_objects); static LIST_HEAD(nf_tables_flowtables); -static LIST_HEAD(nf_tables_destroy_list); static LIST_HEAD(nf_tables_gc_list); static DEFINE_SPINLOCK(nf_tables_destroy_list_lock); static DEFINE_SPINLOCK(nf_tables_gc_list_lock); @@ -125,7 +124,6 @@ static void nft_validate_state_update(struct nft_table *table, u8 new_validate_s table->validate_state = new_validate_state; } static void nf_tables_trans_destroy_work(struct work_struct *w); -static DECLARE_WORK(trans_destroy_work, nf_tables_trans_destroy_work); static void nft_trans_gc_work(struct work_struct *work); static DECLARE_WORK(trans_gc_work, nft_trans_gc_work); @@ -10006,11 +10004,12 @@ static void nft_commit_release(struct nft_trans *trans) static void nf_tables_trans_destroy_work(struct work_struct *w) { + struct nftables_pernet *nft_net = container_of(w, struct nftables_pernet, destroy_work); struct nft_trans *trans, *next; LIST_HEAD(head); spin_lock(&nf_tables_destroy_list_lock); - list_splice_init(&nf_tables_destroy_list, &head); + list_splice_init(&nft_net->destroy_list, &head); spin_unlock(&nf_tables_destroy_list_lock); if (list_empty(&head)) @@ -10024,9 +10023,11 @@ static void nf_tables_trans_destroy_work(struct work_struct *w) } } -void nf_tables_trans_destroy_flush_work(void) +void nf_tables_trans_destroy_flush_work(struct net *net) { - flush_work(&trans_destroy_work); + struct nftables_pernet *nft_net = nft_pernet(net); + + flush_work(&nft_net->destroy_work); } EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work); @@ -10484,11 +10485,11 @@ static void nf_tables_commit_release(struct net *net) trans->put_net = true; spin_lock(&nf_tables_destroy_list_lock); - list_splice_tail_init(&nft_net->commit_list, &nf_tables_destroy_list); + list_splice_tail_init(&nft_net->commit_list, &nft_net->destroy_list); spin_unlock(&nf_tables_destroy_list_lock); nf_tables_module_autoload_cleanup(net); - schedule_work(&trans_destroy_work); + schedule_work(&nft_net->destroy_work); mutex_unlock(&nft_net->commit_mutex); } @@ -11853,7 +11854,7 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event, gc_seq = nft_gc_seq_begin(nft_net); - nf_tables_trans_destroy_flush_work(); + nf_tables_trans_destroy_flush_work(net); again: list_for_each_entry(table, &nft_net->tables, list) { if (nft_table_has_owner(table) && @@ -11895,6 +11896,7 @@ static int __net_init nf_tables_init_net(struct net *net) INIT_LIST_HEAD(&nft_net->tables); INIT_LIST_HEAD(&nft_net->commit_list); + INIT_LIST_HEAD(&nft_net->destroy_list); INIT_LIST_HEAD(&nft_net->commit_set_list); INIT_LIST_HEAD(&nft_net->binding_list); INIT_LIST_HEAD(&nft_net->module_list); @@ -11903,6 +11905,7 @@ static int __net_init nf_tables_init_net(struct net *net) nft_net->base_seq = 1; nft_net->gc_seq = 0; nft_net->validate_state = NFT_VALIDATE_SKIP; + INIT_WORK(&nft_net->destroy_work, nf_tables_trans_destroy_work); return 0; } @@ -11931,14 +11934,17 @@ static void __net_exit nf_tables_exit_net(struct net *net) if (!list_empty(&nft_net->module_list)) nf_tables_module_autoload_cleanup(net); + cancel_work_sync(&nft_net->destroy_work); __nft_release_tables(net); nft_gc_seq_end(nft_net, gc_seq); mutex_unlock(&nft_net->commit_mutex); + WARN_ON_ONCE(!list_empty(&nft_net->tables)); WARN_ON_ONCE(!list_empty(&nft_net->module_list)); WARN_ON_ONCE(!list_empty(&nft_net->notify_list)); + WARN_ON_ONCE(!list_empty(&nft_net->destroy_list)); } static void nf_tables_exit_batch(struct list_head *net_exit_list) @@ -12029,10 +12035,8 @@ static void __exit nf_tables_module_exit(void) unregister_netdevice_notifier(&nf_tables_flowtable_notifier); nft_chain_filter_fini(); nft_chain_route_fini(); - nf_tables_trans_destroy_flush_work(); unregister_pernet_subsys(&nf_tables_net_ops); cancel_work_sync(&trans_gc_work); - cancel_work_sync(&trans_destroy_work); rcu_barrier(); rhltable_destroy(&nft_objname_ht); nf_tables_core_module_exit(); diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 7ca4f0d21fe2..72711d62fddf 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -228,7 +228,7 @@ static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv) return 0; } -static void nft_compat_wait_for_destructors(void) +static void nft_compat_wait_for_destructors(struct net *net) { /* xtables matches or targets can have side effects, e.g. * creation/destruction of /proc files. @@ -236,7 +236,7 @@ static void nft_compat_wait_for_destructors(void) * work queue. If we have pending invocations we thus * need to wait for those to finish. */ - nf_tables_trans_destroy_flush_work(); + nf_tables_trans_destroy_flush_work(net); } static int @@ -262,7 +262,7 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv); - nft_compat_wait_for_destructors(); + nft_compat_wait_for_destructors(ctx->net); ret = xt_check_target(&par, size, proto, inv); if (ret < 0) { @@ -515,7 +515,7 @@ __nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv); - nft_compat_wait_for_destructors(); + nft_compat_wait_for_destructors(ctx->net); return xt_check_match(&par, size, proto, inv); } diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 2e59aba681a1..d526e69a2a2b 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -230,6 +230,7 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr, enum ip_conntrack_info ctinfo; u16 value = nft_reg_load16(®s->data[priv->sreg]); struct nf_conn *ct; + int oldcnt; ct = nf_ct_get(skb, &ctinfo); if (ct) /* already tracked */ @@ -250,10 +251,11 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr, ct = this_cpu_read(nft_ct_pcpu_template); - if (likely(refcount_read(&ct->ct_general.use) == 1)) { - refcount_inc(&ct->ct_general.use); + __refcount_inc(&ct->ct_general.use, &oldcnt); + if (likely(oldcnt == 1)) { nf_ct_zone_add(ct, &zone); } else { + refcount_dec(&ct->ct_general.use); /* previous skb got queued to userspace, allocate temporary * one until percpu template can be reused. */ diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index b8d03364566c..c74012c99125 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -85,7 +85,6 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb, unsigned char optbuf[sizeof(struct ip_options) + 40]; struct ip_options *opt = (struct ip_options *)optbuf; struct iphdr *iph, _iph; - unsigned int start; bool found = false; __be32 info; int optlen; @@ -93,7 +92,6 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb, iph = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); if (!iph) return -EBADMSG; - start = sizeof(struct iphdr); optlen = iph->ihl * 4 - (int)sizeof(struct iphdr); if (optlen <= 0) @@ -103,7 +101,7 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb, /* Copy the options since __ip_options_compile() modifies * the options. */ - if (skb_copy_bits(skb, start, opt->__data, optlen)) + if (skb_copy_bits(skb, sizeof(struct iphdr), opt->__data, optlen)) return -EBADMSG; opt->optlen = optlen; @@ -118,18 +116,18 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb, found = target == IPOPT_SSRR ? opt->is_strictroute : !opt->is_strictroute; if (found) - *offset = opt->srr + start; + *offset = opt->srr; break; case IPOPT_RR: if (!opt->rr) break; - *offset = opt->rr + start; + *offset = opt->rr; found = true; break; case IPOPT_RA: if (!opt->router_alert) break; - *offset = opt->router_alert + start; + *offset = opt->router_alert; found = true; break; default: diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 85311226183a..a53ea60d0a78 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -795,16 +795,6 @@ static int netlink_release(struct socket *sock) sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1); - /* Because struct net might disappear soon, do not keep a pointer. */ - if (!sk->sk_net_refcnt && sock_net(sk) != &init_net) { - __netns_tracker_free(sock_net(sk), &sk->ns_tracker, false); - /* Because of deferred_put_nlk_sk and use of work queue, - * it is possible netns will be freed before this socket. - */ - sock_net_set(sk, &init_net); - __netns_tracker_alloc(&init_net, &sk->ns_tracker, - false, GFP_KERNEL); - } call_rcu(&nlk->rcu, deferred_put_nlk_sk); return 0; } diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 3bb4810234aa..e573e9221302 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -1368,8 +1368,11 @@ bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr) attr == OVS_KEY_ATTR_CT_MARK) return true; if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && - attr == OVS_KEY_ATTR_CT_LABELS) - return true; + attr == OVS_KEY_ATTR_CT_LABELS) { + struct ovs_net *ovs_net = net_generic(net, ovs_net_id); + + return ovs_net->xt_label; + } return false; } @@ -1378,7 +1381,6 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, bool log) { - unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE; struct ovs_conntrack_info ct_info; const char *helper = NULL; u16 family; @@ -1407,12 +1409,6 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr, return -ENOMEM; } - if (nf_connlabels_get(net, n_bits - 1)) { - nf_ct_tmpl_free(ct_info.ct); - OVS_NLERR(log, "Failed to set connlabel length"); - return -EOPNOTSUPP; - } - if (ct_info.timeout[0]) { if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto, ct_info.timeout)) @@ -1581,7 +1577,6 @@ static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info) if (ct_info->ct) { if (ct_info->timeout[0]) nf_ct_destroy_timeout(ct_info->ct); - nf_connlabels_put(nf_ct_net(ct_info->ct)); nf_ct_tmpl_free(ct_info->ct); } } @@ -2006,9 +2001,17 @@ struct genl_family dp_ct_limit_genl_family __ro_after_init = { int ovs_ct_init(struct net *net) { -#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) + unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE; struct ovs_net *ovs_net = net_generic(net, ovs_net_id); + if (nf_connlabels_get(net, n_bits - 1)) { + ovs_net->xt_label = false; + OVS_NLERR(true, "Failed to set connlabel length"); + } else { + ovs_net->xt_label = true; + } + +#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) return ovs_ct_limit_init(net, ovs_net); #else return 0; @@ -2017,9 +2020,12 @@ int ovs_ct_init(struct net *net) void ovs_ct_exit(struct net *net) { -#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) struct ovs_net *ovs_net = net_generic(net, ovs_net_id); +#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) ovs_ct_limit_exit(net, ovs_net); #endif + + if (ovs_net->xt_label) + nf_connlabels_put(net); } diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 225f6048867f..5d548eda742d 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -2101,6 +2101,7 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, { struct ovs_header *ovs_header; struct ovs_vport_stats vport_stats; + struct net *net_vport; int err; ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family, @@ -2117,12 +2118,15 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, nla_put_u32(skb, OVS_VPORT_ATTR_IFINDEX, vport->dev->ifindex)) goto nla_put_failure; - if (!net_eq(net, dev_net(vport->dev))) { - int id = peernet2id_alloc(net, dev_net(vport->dev), gfp); + rcu_read_lock(); + net_vport = dev_net_rcu(vport->dev); + if (!net_eq(net, net_vport)) { + int id = peernet2id_alloc(net, net_vport, GFP_ATOMIC); if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id)) - goto nla_put_failure; + goto nla_put_failure_unlock; } + rcu_read_unlock(); ovs_vport_get_stats(vport, &vport_stats); if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS, @@ -2143,6 +2147,8 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, genlmsg_end(skb, ovs_header); return 0; +nla_put_failure_unlock: + rcu_read_unlock(); nla_put_failure: err = -EMSGSIZE; error: diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 365b9bb7f546..9ca6231ea647 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -160,6 +160,9 @@ struct ovs_net { #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) struct ovs_ct_limit_info *ct_limit_info; #endif + + /* Module reference for configuring conntrack. */ + bool xt_label; }; /** diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 881ddd3696d5..95e0dd14dc1a 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2317,14 +2317,10 @@ int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb) OVS_FLOW_ATTR_MASK, true, skb); } -#define MAX_ACTIONS_BUFSIZE (32 * 1024) - static struct sw_flow_actions *nla_alloc_flow_actions(int size) { struct sw_flow_actions *sfa; - WARN_ON_ONCE(size > MAX_ACTIONS_BUFSIZE); - sfa = kmalloc(kmalloc_size_roundup(sizeof(*sfa) + size), GFP_KERNEL); if (!sfa) return ERR_PTR(-ENOMEM); @@ -2480,15 +2476,6 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, new_acts_size = max(next_offset + req_size, ksize(*sfa) * 2); - if (new_acts_size > MAX_ACTIONS_BUFSIZE) { - if ((next_offset + req_size) > MAX_ACTIONS_BUFSIZE) { - OVS_NLERR(log, "Flow action size exceeds max %u", - MAX_ACTIONS_BUFSIZE); - return ERR_PTR(-EMSGSIZE); - } - new_acts_size = MAX_ACTIONS_BUFSIZE; - } - acts = nla_alloc_flow_actions(new_acts_size); if (IS_ERR(acts)) return ERR_CAST(acts); @@ -3545,7 +3532,7 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, int err; u32 mpls_label_count = 0; - *sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE)); + *sfa = nla_alloc_flow_actions(nla_len(attr)); if (IS_ERR(*sfa)) return PTR_ERR(*sfa); diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 0581c53e6517..3cc2f303bf78 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -504,12 +504,8 @@ bool rds_tcp_tune(struct socket *sock) release_sock(sk); return false; } - /* Update ns_tracker to current stack trace and refcounted tracker */ - __netns_tracker_free(net, &sk->ns_tracker, false); - - sk->sk_net_refcnt = 1; - netns_tracker_alloc(net, &sk->ns_tracker, GFP_KERNEL); - sock_inuse_add(net, 1); + sk_net_refcnt_upgrade(sk); + put_net(net); } rtn = net_generic(net, rds_tcp_netid); if (rtn->sndbuf_size > 0) { diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 72c65d938a15..a4a668b88a8f 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -701,11 +701,9 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct net_device *dev; ax25_address *source; ax25_uid_assoc *user; + int err = -EINVAL; int n; - if (!sock_flag(sk, SOCK_ZAPPED)) - return -EINVAL; - if (addr_len != sizeof(struct sockaddr_rose) && addr_len != sizeof(struct full_sockaddr_rose)) return -EINVAL; @@ -718,8 +716,15 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if ((unsigned int) addr->srose_ndigis > ROSE_MAX_DIGIS) return -EINVAL; - if ((dev = rose_dev_get(&addr->srose_addr)) == NULL) - return -EADDRNOTAVAIL; + lock_sock(sk); + + if (!sock_flag(sk, SOCK_ZAPPED)) + goto out_release; + + err = -EADDRNOTAVAIL; + dev = rose_dev_get(&addr->srose_addr); + if (!dev) + goto out_release; source = &addr->srose_call; @@ -730,7 +735,8 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) } else { if (ax25_uid_policy && !capable(CAP_NET_BIND_SERVICE)) { dev_put(dev); - return -EACCES; + err = -EACCES; + goto out_release; } rose->source_call = *source; } @@ -753,8 +759,10 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) rose_insert_socket(sk); sock_reset_flag(sk, SOCK_ZAPPED); - - return 0; + err = 0; +out_release: + release_sock(sk); + return err; } static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 718193df9d2e..3cc3af15086f 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -327,8 +327,8 @@ struct rxrpc_local { * packet with a maximum set of jumbo subpackets or a PING ACK padded * out to 64K with zeropages for PMTUD. */ - struct kvec kvec[RXRPC_MAX_NR_JUMBO > 3 + 16 ? - RXRPC_MAX_NR_JUMBO : 3 + 16]; + struct kvec kvec[1 + RXRPC_MAX_NR_JUMBO > 3 + 16 ? + 1 + RXRPC_MAX_NR_JUMBO : 3 + 16]; }; /* @@ -344,6 +344,7 @@ struct rxrpc_peer { struct hlist_head error_targets; /* targets for net error distribution */ struct rb_root service_conns; /* Service connections */ struct list_head keepalive_link; /* Link in net->peer_keepalive[] */ + unsigned long app_data; /* Application data (e.g. afs_server) */ time64_t last_tx_at; /* Last time packet sent here */ seqlock_t service_conn_lock; spinlock_t lock; /* access lock */ @@ -360,7 +361,6 @@ struct rxrpc_peer { u8 pmtud_jumbo; /* Max jumbo packets for the MTU */ bool ackr_adv_pmtud; /* T if the peer advertises path-MTU */ unsigned int ackr_max_data; /* Maximum data advertised by peer */ - seqcount_t mtu_lock; /* Lockless MTU access management */ unsigned int if_mtu; /* Local interface MTU (- hdrsize) for this peer */ unsigned int max_data; /* Maximum packet data capacity for this peer */ unsigned short hdrsize; /* header size (IP + UDP + RxRPC) */ @@ -582,6 +582,7 @@ enum rxrpc_call_flag { RXRPC_CALL_EXCLUSIVE, /* The call uses a once-only connection */ RXRPC_CALL_RX_IS_IDLE, /* recvmsg() is idle - send an ACK */ RXRPC_CALL_RECVMSG_READ_ALL, /* recvmsg() read all of the received data */ + RXRPC_CALL_CONN_CHALLENGING, /* The connection is being challenged */ }; /* @@ -602,7 +603,6 @@ enum rxrpc_call_state { RXRPC_CALL_CLIENT_AWAIT_REPLY, /* - client awaiting reply */ RXRPC_CALL_CLIENT_RECV_REPLY, /* - client receiving reply phase */ RXRPC_CALL_SERVER_PREALLOC, /* - service preallocation */ - RXRPC_CALL_SERVER_SECURING, /* - server securing request connection */ RXRPC_CALL_SERVER_RECV_REQUEST, /* - server receiving request */ RXRPC_CALL_SERVER_ACK_REQUEST, /* - server pending ACK of request */ RXRPC_CALL_SERVER_SEND_REPLY, /* - server sending reply */ @@ -874,8 +874,7 @@ struct rxrpc_txbuf { #define RXRPC_TXBUF_RESENT 0x100 /* Set if has been resent */ __be16 cksum; /* Checksum to go in header */ bool jumboable; /* Can be non-terminal jumbo subpacket */ - u8 nr_kvec; /* Amount of kvec[] used */ - struct kvec kvec[1]; + void *data; /* Data with preceding jumbo header */ }; static inline bool rxrpc_sending_to_server(const struct rxrpc_txbuf *txb) diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 5a543c3f6fb0..c4c8b46a68c6 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -22,7 +22,6 @@ const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = { [RXRPC_CALL_CLIENT_AWAIT_REPLY] = "ClAwtRpl", [RXRPC_CALL_CLIENT_RECV_REPLY] = "ClRcvRpl", [RXRPC_CALL_SERVER_PREALLOC] = "SvPrealc", - [RXRPC_CALL_SERVER_SECURING] = "SvSecure", [RXRPC_CALL_SERVER_RECV_REQUEST] = "SvRcvReq", [RXRPC_CALL_SERVER_ACK_REQUEST] = "SvAckReq", [RXRPC_CALL_SERVER_SEND_REPLY] = "SvSndRpl", @@ -453,17 +452,16 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx, call->cong_tstamp = skb->tstamp; __set_bit(RXRPC_CALL_EXPOSED, &call->flags); - rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SECURING); + rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST); spin_lock(&conn->state_lock); switch (conn->state) { case RXRPC_CONN_SERVICE_UNSECURED: case RXRPC_CONN_SERVICE_CHALLENGING: - rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SECURING); + __set_bit(RXRPC_CALL_CONN_CHALLENGING, &call->flags); break; case RXRPC_CONN_SERVICE: - rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST); break; case RXRPC_CONN_ABORTED: diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 713e04394ceb..4d9c5e21ba78 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -228,10 +228,8 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn) */ static void rxrpc_call_is_secure(struct rxrpc_call *call) { - if (call && __rxrpc_call_state(call) == RXRPC_CALL_SERVER_SECURING) { - rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST); + if (call && __test_and_clear_bit(RXRPC_CALL_CONN_CHALLENGING, &call->flags)) rxrpc_notify_socket(call); - } } /* @@ -272,6 +270,7 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, * we've already received the packet, put it on the * front of the queue. */ + sp->conn = rxrpc_get_connection(conn, rxrpc_conn_get_poke_secured); skb->mark = RXRPC_SKB_MARK_SERVICE_CONN_SECURED; rxrpc_get_skb(skb, rxrpc_skb_get_conn_secured); skb_queue_head(&conn->local->rx_queue, skb); @@ -437,14 +436,16 @@ void rxrpc_input_conn_event(struct rxrpc_connection *conn, struct sk_buff *skb) if (test_and_clear_bit(RXRPC_CONN_EV_ABORT_CALLS, &conn->events)) rxrpc_abort_calls(conn); - switch (skb->mark) { - case RXRPC_SKB_MARK_SERVICE_CONN_SECURED: - if (conn->state != RXRPC_CONN_SERVICE) - break; + if (skb) { + switch (skb->mark) { + case RXRPC_SKB_MARK_SERVICE_CONN_SECURED: + if (conn->state != RXRPC_CONN_SERVICE) + break; - for (loop = 0; loop < RXRPC_MAXCALLS; loop++) - rxrpc_call_is_secure(conn->channels[loop].call); - break; + for (loop = 0; loop < RXRPC_MAXCALLS; loop++) + rxrpc_call_is_secure(conn->channels[loop].call); + break; + } } /* Process delayed ACKs whose time has come. */ diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 7eba4d7d9a38..2f1fd1e2e7e4 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -67,6 +67,7 @@ struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *rxnet, INIT_WORK(&conn->destructor, rxrpc_clean_up_connection); INIT_LIST_HEAD(&conn->proc_link); INIT_LIST_HEAD(&conn->link); + INIT_LIST_HEAD(&conn->attend_link); mutex_init(&conn->security_lock); mutex_init(&conn->tx_data_alloc_lock); skb_queue_head_init(&conn->rx_queue); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 4974b5accafa..24aceb183c2c 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -448,11 +448,19 @@ static void rxrpc_input_queue_data(struct rxrpc_call *call, struct sk_buff *skb, struct rxrpc_skb_priv *sp = rxrpc_skb(skb); bool last = sp->hdr.flags & RXRPC_LAST_PACKET; - skb_queue_tail(&call->recvmsg_queue, skb); + spin_lock_irq(&call->recvmsg_queue.lock); + + __skb_queue_tail(&call->recvmsg_queue, skb); rxrpc_input_update_ack_window(call, window, wtop); trace_rxrpc_receive(call, last ? why + 1 : why, sp->hdr.serial, sp->hdr.seq); if (last) + /* Change the state inside the lock so that recvmsg syncs + * correctly with it and using sendmsg() to send a reply + * doesn't race. + */ rxrpc_end_rx_phase(call, sp->hdr.serial); + + spin_unlock_irq(&call->recvmsg_queue.lock); } /* @@ -657,7 +665,7 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb rxrpc_propose_delay_ACK(call, sp->hdr.serial, rxrpc_propose_ack_input_data); } - if (notify) { + if (notify && !test_bit(RXRPC_CALL_CONN_CHALLENGING, &call->flags)) { trace_rxrpc_notify_socket(call->debug_id, sp->hdr.serial); rxrpc_notify_socket(call); } @@ -802,9 +810,7 @@ static void rxrpc_input_ack_trailer(struct rxrpc_call *call, struct sk_buff *skb if (max_mtu < peer->max_data) { trace_rxrpc_pmtud_reduce(peer, sp->hdr.serial, max_mtu, rxrpc_pmtud_reduce_ack); - write_seqcount_begin(&peer->mtu_lock); peer->max_data = max_mtu; - write_seqcount_end(&peer->mtu_lock); } max_data = umin(max_mtu, peer->max_data); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 6f7a125d6e90..95905b85a8d7 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -428,13 +428,13 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call) static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc_send_data_req *req, struct rxrpc_txbuf *txb, + struct rxrpc_wire_header *whdr, rxrpc_serial_t serial, int subpkt) { - struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base; - struct rxrpc_jumbo_header *jumbo = (void *)(whdr + 1) - sizeof(*jumbo); + struct rxrpc_jumbo_header *jumbo = txb->data - sizeof(*jumbo); enum rxrpc_req_ack_trace why; struct rxrpc_connection *conn = call->conn; - struct kvec *kv = &call->local->kvec[subpkt]; + struct kvec *kv = &call->local->kvec[1 + subpkt]; size_t len = txb->pkt_len; bool last; u8 flags; @@ -491,18 +491,15 @@ static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call, } dont_set_request_ack: - /* The jumbo header overlays the wire header in the txbuf. */ + /* There's a jumbo header prepended to the data if we need it. */ if (subpkt < req->n - 1) flags |= RXRPC_JUMBO_PACKET; else flags &= ~RXRPC_JUMBO_PACKET; if (subpkt == 0) { whdr->flags = flags; - whdr->serial = htonl(txb->serial); whdr->cksum = txb->cksum; - whdr->serviceId = htons(conn->service_id); - kv->iov_base = whdr; - len += sizeof(*whdr); + kv->iov_base = txb->data; } else { jumbo->flags = flags; jumbo->pad = 0; @@ -535,7 +532,9 @@ static unsigned int rxrpc_prepare_txqueue(struct rxrpc_txqueue *tq, /* * Prepare a (jumbo) packet for transmission. */ -static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_send_data_req *req) +static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, + struct rxrpc_send_data_req *req, + struct rxrpc_wire_header *whdr) { struct rxrpc_txqueue *tq = req->tq; rxrpc_serial_t serial; @@ -549,6 +548,18 @@ static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_se /* Each transmission of a Tx packet needs a new serial number */ serial = rxrpc_get_next_serials(call->conn, req->n); + whdr->epoch = htonl(call->conn->proto.epoch); + whdr->cid = htonl(call->cid); + whdr->callNumber = htonl(call->call_id); + whdr->seq = htonl(seq); + whdr->serial = htonl(serial); + whdr->type = RXRPC_PACKET_TYPE_DATA; + whdr->flags = 0; + whdr->userStatus = 0; + whdr->securityIndex = call->security_ix; + whdr->_rsvd = 0; + whdr->serviceId = htons(call->conn->service_id); + call->tx_last_serial = serial + req->n - 1; call->tx_last_sent = req->now; xmit_ts = rxrpc_prepare_txqueue(tq, req); @@ -576,7 +587,7 @@ static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_se if (i + 1 == req->n) /* Only sample the last subpacket in a jumbo. */ __set_bit(ix, &tq->rtt_samples); - len += rxrpc_prepare_data_subpacket(call, req, txb, serial, i); + len += rxrpc_prepare_data_subpacket(call, req, txb, whdr, serial, i); serial++; seq++; i++; @@ -618,6 +629,7 @@ static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_se } rxrpc_set_keepalive(call, req->now); + page_frag_free(whdr); return len; } @@ -626,25 +638,33 @@ static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_se */ void rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_send_data_req *req) { + struct rxrpc_wire_header *whdr; struct rxrpc_connection *conn = call->conn; enum rxrpc_tx_point frag; struct rxrpc_txqueue *tq = req->tq; struct rxrpc_txbuf *txb; struct msghdr msg; rxrpc_seq_t seq = req->seq; - size_t len; + size_t len = sizeof(*whdr); bool new_call = test_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags); int ret, stat_ix; _enter("%x,%x-%x", tq->qbase, seq, seq + req->n - 1); + whdr = page_frag_alloc(&call->local->tx_alloc, sizeof(*whdr), GFP_NOFS); + if (!whdr) + return; /* Drop the packet if no memory. */ + + call->local->kvec[0].iov_base = whdr; + call->local->kvec[0].iov_len = sizeof(*whdr); + stat_ix = umin(req->n, ARRAY_SIZE(call->rxnet->stat_tx_jumbo)) - 1; atomic_inc(&call->rxnet->stat_tx_jumbo[stat_ix]); - len = rxrpc_prepare_data_packet(call, req); + len += rxrpc_prepare_data_packet(call, req, whdr); txb = tq->bufs[seq & RXRPC_TXQ_MASK]; - iov_iter_kvec(&msg.msg_iter, WRITE, call->local->kvec, req->n, len); + iov_iter_kvec(&msg.msg_iter, WRITE, call->local->kvec, 1 + req->n, len); msg.msg_name = &call->peer->srx.transport; msg.msg_namelen = call->peer->srx.transport_len; @@ -695,13 +715,13 @@ void rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_send_data_req if (ret == -EMSGSIZE) { rxrpc_inc_stat(call->rxnet, stat_tx_data_send_msgsize); - trace_rxrpc_tx_packet(call->debug_id, call->local->kvec[0].iov_base, frag); + trace_rxrpc_tx_packet(call->debug_id, whdr, frag); ret = 0; } else if (ret < 0) { rxrpc_inc_stat(call->rxnet, stat_tx_data_send_fail); trace_rxrpc_tx_fail(call->debug_id, txb->serial, ret, frag); } else { - trace_rxrpc_tx_packet(call->debug_id, call->local->kvec[0].iov_base, frag); + trace_rxrpc_tx_packet(call->debug_id, whdr, frag); } rxrpc_tx_backoff(call, ret); diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index e874c31fa901..7f4729234957 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -130,9 +130,7 @@ static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, unsigned int mtu) peer->pmtud_bad = max_data + 1; trace_rxrpc_pmtud_reduce(peer, 0, max_data, rxrpc_pmtud_reduce_icmp); - write_seqcount_begin(&peer->mtu_lock); peer->max_data = max_data; - write_seqcount_end(&peer->mtu_lock); } } @@ -169,6 +167,13 @@ void rxrpc_input_error(struct rxrpc_local *local, struct sk_buff *skb) goto out; } + if ((serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6 && + serr->ee.ee_type == ICMPV6_PKT_TOOBIG && + serr->ee.ee_code == 0)) { + rxrpc_adjust_mtu(peer, serr->ee.ee_info); + goto out; + } + rxrpc_store_error(peer, skb); out: rxrpc_put_peer(peer, rxrpc_peer_put_input_error); @@ -401,13 +406,8 @@ void rxrpc_input_probe_for_pmtud(struct rxrpc_connection *conn, rxrpc_serial_t a } max_data = umin(max_data, peer->ackr_max_data); - if (max_data != peer->max_data) { - preempt_disable(); - write_seqcount_begin(&peer->mtu_lock); + if (max_data != peer->max_data) peer->max_data = max_data; - write_seqcount_end(&peer->mtu_lock); - preempt_enable(); - } jumbo = max_data + sizeof(struct rxrpc_jumbo_header); jumbo /= RXRPC_JUMBO_SUBPKTLEN; diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 0fcc87f0409f..71b6e07bf161 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -235,7 +235,6 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp, peer->service_conns = RB_ROOT; seqlock_init(&peer->service_conn_lock); spin_lock_init(&peer->lock); - seqcount_init(&peer->mtu_lock); peer->debug_id = atomic_inc_return(&rxrpc_debug_id); peer->recent_srtt_us = UINT_MAX; peer->cong_ssthresh = RXRPC_TX_MAX_WINDOW; @@ -325,10 +324,10 @@ void rxrpc_new_incoming_peer(struct rxrpc_local *local, struct rxrpc_peer *peer) hash_key = rxrpc_peer_hash_key(local, &peer->srx); rxrpc_init_peer(local, peer, hash_key); - spin_lock_bh(&rxnet->peer_hash_lock); + spin_lock(&rxnet->peer_hash_lock); hash_add_rcu(rxnet->peer_hash, &peer->hash_link, hash_key); list_add_tail(&peer->keepalive_link, &rxnet->peer_keepalive_new); - spin_unlock_bh(&rxnet->peer_hash_lock); + spin_unlock(&rxnet->peer_hash_lock); } /* @@ -462,7 +461,7 @@ void rxrpc_destroy_all_peers(struct rxrpc_net *rxnet) continue; hlist_for_each_entry(peer, &rxnet->peer_hash[i], hash_link) { - pr_err("Leaked peer %u {%u} %pISp\n", + pr_err("Leaked peer %x {%u} %pISp\n", peer->debug_id, refcount_read(&peer->ref), &peer->srx.transport); @@ -479,7 +478,7 @@ void rxrpc_destroy_all_peers(struct rxrpc_net *rxnet) */ struct rxrpc_peer *rxrpc_kernel_get_call_peer(struct socket *sock, struct rxrpc_call *call) { - return call->peer; + return rxrpc_get_peer(call->peer, rxrpc_peer_get_application); } EXPORT_SYMBOL(rxrpc_kernel_get_call_peer); @@ -521,3 +520,29 @@ const struct sockaddr *rxrpc_kernel_remote_addr(const struct rxrpc_peer *peer) (peer ? &peer->srx.transport : &rxrpc_null_addr.transport); } EXPORT_SYMBOL(rxrpc_kernel_remote_addr); + +/** + * rxrpc_kernel_set_peer_data - Set app-specific data on a peer. + * @peer: The peer to alter + * @app_data: The data to set + * + * Set the app-specific data on a peer. AF_RXRPC makes no effort to retain + * anything the data might refer to. The previous app_data is returned. + */ +unsigned long rxrpc_kernel_set_peer_data(struct rxrpc_peer *peer, unsigned long app_data) +{ + return xchg(&peer->app_data, app_data); +} +EXPORT_SYMBOL(rxrpc_kernel_set_peer_data); + +/** + * rxrpc_kernel_get_peer_data - Get app-specific data from a peer. + * @peer: The peer to query + * + * Retrieve the app-specific data from a peer. + */ +unsigned long rxrpc_kernel_get_peer_data(const struct rxrpc_peer *peer) +{ + return peer->app_data; +} +EXPORT_SYMBOL(rxrpc_kernel_get_peer_data); diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 62b09d23ec08..6cb37b0eb77f 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -257,8 +257,7 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call, struct rxrpc_txbuf *txb, struct skcipher_request *req) { - struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base; - struct rxkad_level1_hdr *hdr = (void *)(whdr + 1); + struct rxkad_level1_hdr *hdr = txb->data; struct rxrpc_crypt iv; struct scatterlist sg; size_t pad; @@ -274,7 +273,7 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call, pad = RXKAD_ALIGN - pad; pad &= RXKAD_ALIGN - 1; if (pad) { - memset(txb->kvec[0].iov_base + txb->offset, 0, pad); + memset(txb->data + txb->offset, 0, pad); txb->pkt_len += pad; } @@ -300,8 +299,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, struct skcipher_request *req) { const struct rxrpc_key_token *token; - struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base; - struct rxkad_level2_hdr *rxkhdr = (void *)(whdr + 1); + struct rxkad_level2_hdr *rxkhdr = txb->data; struct rxrpc_crypt iv; struct scatterlist sg; size_t content, pad; @@ -319,7 +317,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, txb->pkt_len = round_up(content, RXKAD_ALIGN); pad = txb->pkt_len - content; if (pad) - memset(txb->kvec[0].iov_base + txb->offset, 0, pad); + memset(txb->data + txb->offset, 0, pad); /* encrypt from the session key */ token = call->conn->key->payload.data[0]; @@ -407,9 +405,8 @@ static int rxkad_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) /* Clear excess space in the packet */ if (txb->pkt_len < txb->alloc_size) { - struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base; size_t gap = txb->alloc_size - txb->pkt_len; - void *p = whdr + 1; + void *p = txb->data; memset(p + txb->pkt_len, 0, gap); } diff --git a/net/rxrpc/rxperf.c b/net/rxrpc/rxperf.c index 7ef93407be83..e848a4777b8c 100644 --- a/net/rxrpc/rxperf.c +++ b/net/rxrpc/rxperf.c @@ -478,6 +478,18 @@ static int rxperf_deliver_request(struct rxperf_call *call) call->unmarshal++; fallthrough; case 2: + ret = rxperf_extract_data(call, true); + if (ret < 0) + return ret; + + /* Deal with the terminal magic cookie. */ + call->iov_len = 4; + call->kvec[0].iov_len = call->iov_len; + call->kvec[0].iov_base = call->tmp; + iov_iter_kvec(&call->iter, READ, call->kvec, 1, call->iov_len); + call->unmarshal++; + fallthrough; + case 3: ret = rxperf_extract_data(call, false); if (ret < 0) return ret; diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 0e8da909d4f2..84dc6c94f23b 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -419,7 +419,7 @@ reload: size_t copy = umin(txb->space, msg_data_left(msg)); _debug("add %zu", copy); - if (!copy_from_iter_full(txb->kvec[0].iov_base + txb->offset, + if (!copy_from_iter_full(txb->data + txb->offset, copy, &msg->msg_iter)) goto efault; _debug("added"); @@ -445,8 +445,6 @@ reload: ret = call->security->secure_packet(call, txb); if (ret < 0) goto out; - - txb->kvec[0].iov_len += txb->len; rxrpc_queue_packet(rx, call, txb, notify_end_tx); txb = NULL; } @@ -707,7 +705,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) } else { switch (rxrpc_call_state(call)) { case RXRPC_CALL_CLIENT_AWAIT_CONN: - case RXRPC_CALL_SERVER_SECURING: + case RXRPC_CALL_SERVER_RECV_REQUEST: if (p.command == RXRPC_CMD_SEND_ABORT) break; fallthrough; diff --git a/net/rxrpc/txbuf.c b/net/rxrpc/txbuf.c index 131d9e55c8e9..c550991d48fa 100644 --- a/net/rxrpc/txbuf.c +++ b/net/rxrpc/txbuf.c @@ -19,17 +19,19 @@ atomic_t rxrpc_nr_txbuf; struct rxrpc_txbuf *rxrpc_alloc_data_txbuf(struct rxrpc_call *call, size_t data_size, size_t data_align, gfp_t gfp) { - struct rxrpc_wire_header *whdr; struct rxrpc_txbuf *txb; - size_t total, hoff; + size_t total, doff, jsize = sizeof(struct rxrpc_jumbo_header); void *buf; txb = kzalloc(sizeof(*txb), gfp); if (!txb) return NULL; - hoff = round_up(sizeof(*whdr), data_align) - sizeof(*whdr); - total = hoff + sizeof(*whdr) + data_size; + /* We put a jumbo header in the buffer, but not a full wire header to + * avoid delayed-corruption problems with zerocopy. + */ + doff = round_up(jsize, data_align); + total = doff + data_size; data_align = umax(data_align, L1_CACHE_BYTES); mutex_lock(&call->conn->tx_data_alloc_lock); @@ -41,30 +43,15 @@ struct rxrpc_txbuf *rxrpc_alloc_data_txbuf(struct rxrpc_call *call, size_t data_ return NULL; } - whdr = buf + hoff; - refcount_set(&txb->ref, 1); txb->call_debug_id = call->debug_id; txb->debug_id = atomic_inc_return(&rxrpc_txbuf_debug_ids); txb->alloc_size = data_size; txb->space = data_size; - txb->offset = sizeof(*whdr); + txb->offset = 0; txb->flags = call->conn->out_clientflag; txb->seq = call->send_top + 1; - txb->nr_kvec = 1; - txb->kvec[0].iov_base = whdr; - txb->kvec[0].iov_len = sizeof(*whdr); - - whdr->epoch = htonl(call->conn->proto.epoch); - whdr->cid = htonl(call->cid); - whdr->callNumber = htonl(call->call_id); - whdr->seq = htonl(txb->seq); - whdr->type = RXRPC_PACKET_TYPE_DATA; - whdr->flags = 0; - whdr->userStatus = 0; - whdr->securityIndex = call->security_ix; - whdr->_rsvd = 0; - whdr->serviceId = htons(call->dest_srx.srx_service); + txb->data = buf + doff; trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, 1, rxrpc_txbuf_alloc_data); @@ -90,14 +77,10 @@ void rxrpc_see_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what) static void rxrpc_free_txbuf(struct rxrpc_txbuf *txb) { - int i; - trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, 0, rxrpc_txbuf_free); - for (i = 0; i < txb->nr_kvec; i++) - if (txb->kvec[i].iov_base && - !is_zero_pfn(page_to_pfn(virt_to_page(txb->kvec[i].iov_base)))) - page_frag_free(txb->kvec[i].iov_base); + if (txb->data) + page_frag_free(txb->data); kfree(txb); atomic_dec(&rxrpc_nr_txbuf); } diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c index 91c0ec729823..c1f75f272757 100644 --- a/net/sched/act_gate.c +++ b/net/sched/act_gate.c @@ -287,8 +287,7 @@ static void gate_setup_timer(struct tcf_gate *gact, u64 basetime, gact->param.tcfg_basetime = basetime; gact->param.tcfg_clockid = clockid; gact->tk_offset = tko; - hrtimer_init(&gact->hitimer, clockid, HRTIMER_MODE_ABS_SOFT); - gact->hitimer.function = gate_timer_func; + hrtimer_setup(&gact->hitimer, gate_timer_func, clockid, HRTIMER_MODE_ABS_SOFT); } static int tcf_gate_init(struct net *net, struct nlattr *nla, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 8e47e5355be6..4f648af8cfaa 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -97,7 +97,7 @@ tcf_exts_miss_cookie_base_alloc(struct tcf_exts *exts, struct tcf_proto *tp, err = xa_alloc_cyclic(&tcf_exts_miss_cookies_xa, &n->miss_cookie_base, n, xa_limit_32b, &next, GFP_KERNEL); - if (err) + if (err < 0) goto err_xa_alloc; exts->miss_cookie_node = n; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index e3e91cf867eb..0b102a0ca83d 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -619,8 +619,7 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc, clockid_t clockid) { - hrtimer_init(&wd->timer, clockid, HRTIMER_MODE_ABS_PINNED); - wd->timer.function = qdisc_watchdog; + hrtimer_setup(&wd->timer, qdisc_watchdog, clockid, HRTIMER_MODE_ABS_PINNED); wd->qdisc = qdisc; } EXPORT_SYMBOL(qdisc_watchdog_init_clockid); @@ -2254,6 +2253,12 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, return -EOPNOTSUPP; } + /* Prevent creation of traffic classes with classid TC_H_ROOT */ + if (clid == TC_H_ROOT) { + NL_SET_ERR_MSG(extack, "Cannot create traffic class with classid TC_H_ROOT"); + return -EINVAL; + } + new_cl = cl; err = -EOPNOTSUPP; if (cops->change) diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index b50b2c2cc09b..e6bfd39ff339 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -40,6 +40,9 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch, { unsigned int prev_backlog; + if (unlikely(READ_ONCE(sch->limit) == 0)) + return qdisc_drop(skb, sch, to_free); + if (likely(sch->q.qlen < READ_ONCE(sch->limit))) return qdisc_enqueue_tail(skb, sch); diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index ab6234b4fcd5..532fde548b88 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -913,7 +913,8 @@ static void gred_destroy(struct Qdisc *sch) for (i = 0; i < table->DPs; i++) gred_destroy_vq(table->tab[i]); - gred_offload(sch, TC_GRED_DESTROY); + if (table->opt) + gred_offload(sch, TC_GRED_DESTROY); kfree(table->opt); } diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 71ec9986ed37..fdd79d3ccd8c 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -749,9 +749,9 @@ deliver: if (err != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(err)) qdisc_qstats_drop(sch); - qdisc_tree_reduce_backlog(sch, 1, pkt_len); sch->qstats.backlog -= pkt_len; sch->q.qlen--; + qdisc_tree_reduce_backlog(sch, 1, pkt_len); } goto tfifo_dequeue; } diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index a68e17891b0b..14021b812329 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1932,8 +1932,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, if (!TXTIME_ASSIST_IS_ENABLED(q->flags) && !FULL_OFFLOAD_IS_ENABLED(q->flags) && !hrtimer_active(&q->advance_timer)) { - hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS); - q->advance_timer.function = advance_sched; + hrtimer_setup(&q->advance_timer, advance_sched, q->clockid, HRTIMER_MODE_ABS); } err = taprio_get_start_time(sch, new_admin, &start); @@ -2056,8 +2055,7 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt, spin_lock_init(&q->current_entry_lock); - hrtimer_init(&q->advance_timer, CLOCK_TAI, HRTIMER_MODE_ABS); - q->advance_timer.function = advance_sched; + hrtimer_setup(&q->advance_timer, advance_sched, CLOCK_TAI, HRTIMER_MODE_ABS); q->root = sch; diff --git a/net/sctp/stream.c b/net/sctp/stream.c index c241cc552e8d..bfcff6d6a438 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -735,7 +735,7 @@ struct sctp_chunk *sctp_process_strreset_tsnreq( * value SHOULD be the smallest TSN not acknowledged by the * receiver of the request plus 2^31. */ - init_tsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + (1 << 31); + init_tsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + (1U << 31); sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL, init_tsn, GFP_ATOMIC); diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index ca6984541edb..3e6cb35baf25 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -3337,10 +3337,7 @@ int smc_create_clcsk(struct net *net, struct sock *sk, int family) * which need net ref. */ sk = smc->clcsock->sk; - __netns_tracker_free(net, &sk->ns_tracker, false); - sk->sk_net_refcnt = 1; - get_net_track(net, &sk->ns_tracker, GFP_KERNEL); - sock_inuse_add(net, 1); + sk_net_refcnt_upgrade(sk); return 0; } diff --git a/net/socket.c b/net/socket.c index 262a28b59c7f..28bae5a94234 100644 --- a/net/socket.c +++ b/net/socket.c @@ -479,6 +479,11 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname) sock->file = file; file->private_data = sock; stream_open(SOCK_INODE(sock), file); + /* + * Disable permission and pre-content events, but enable legacy + * inotify events for legacy users. + */ + file_set_fsnotify_mode(file, FMODE_NONOTIFY_PERM); return file; } EXPORT_SYMBOL(sock_alloc_file); diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index 8299ceb3e373..95696f42647e 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -347,7 +347,10 @@ static int strp_read_sock(struct strparser *strp) struct socket *sock = strp->sk->sk_socket; read_descriptor_t desc; - if (unlikely(!sock || !sock->ops || !sock->ops->read_sock)) + if (unlikely(!sock || !sock->ops)) + return -EBUSY; + + if (unlikely(!strp->cb.read_sock && !sock->ops->read_sock)) return -EBUSY; desc.arg.data = strp; @@ -355,7 +358,10 @@ static int strp_read_sock(struct strparser *strp) desc.count = 1; /* give more than one skb per call */ /* sk should be locked here, so okay to do read_sock */ - sock->ops->read_sock(strp->sk, &desc, strp_recv); + if (strp->cb.read_sock) + strp->cb.read_sock(strp, &desc, strp_recv); + else + sock->ops->read_sock(strp->sk, &desc, strp_recv); desc.error = strp->cb.read_sock_done(strp, desc.error); @@ -468,6 +474,7 @@ int strp_init(struct strparser *strp, struct sock *sk, strp->cb.unlock = cb->unlock ? : strp_sock_unlock; strp->cb.rcv_msg = cb->rcv_msg; strp->cb.parse_msg = cb->parse_msg; + strp->cb.read_sock = cb->read_sock; strp->cb.read_sock_done = cb->read_sock_done ? : default_read_sock_done; strp->cb.abort_parser = cb->abort_parser ? : strp_abort_strp; diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index cb279eb9ac4b..7ce5e28a6c03 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1674,12 +1674,14 @@ static void remove_cache_proc_entries(struct cache_detail *cd) } } -#ifdef CONFIG_PROC_FS static int create_cache_proc_entries(struct cache_detail *cd, struct net *net) { struct proc_dir_entry *p; struct sunrpc_net *sn; + if (!IS_ENABLED(CONFIG_PROC_FS)) + return 0; + sn = net_generic(net, sunrpc_net_id); cd->procfs = proc_mkdir(cd->name, sn->proc_net_rpc); if (cd->procfs == NULL) @@ -1707,12 +1709,6 @@ out_nomem: remove_cache_proc_entries(cd); return -ENOMEM; } -#else /* CONFIG_PROC_FS */ -static int create_cache_proc_entries(struct cache_detail *cd, struct net *net) -{ - return 0; -} -#endif void __init cache_initialize(void) { diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index cef623ea1506..9b45fbdc90ca 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -864,8 +864,6 @@ void rpc_signal_task(struct rpc_task *task) if (!rpc_task_set_rpc_status(task, -ERESTARTSYS)) return; trace_rpc_task_signalled(task, task->tk_action); - set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate); - smp_mb__after_atomic(); queue = READ_ONCE(task->tk_waitqueue); if (queue) rpc_wake_up_queued_task(queue, task); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index cb3bd12f5818..72e5a01df3d3 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1541,10 +1541,7 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv, newlen = error; if (protocol == IPPROTO_TCP) { - __netns_tracker_free(net, &sock->sk->ns_tracker, false); - sock->sk->sk_net_refcnt = 1; - get_net_track(net, &sock->sk->ns_tracker, GFP_KERNEL); - sock_inuse_add(net, 1); + sk_net_refcnt_upgrade(sock->sk); if ((error = kernel_listen(sock, 64)) < 0) goto bummer; } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index c60936d8cef7..83cc095846d3 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1941,12 +1941,8 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt, goto out; } - if (protocol == IPPROTO_TCP) { - __netns_tracker_free(xprt->xprt_net, &sock->sk->ns_tracker, false); - sock->sk->sk_net_refcnt = 1; - get_net_track(xprt->xprt_net, &sock->sk->ns_tracker, GFP_KERNEL); - sock_inuse_add(xprt->xprt_net, 1); - } + if (protocol == IPPROTO_TCP) + sk_net_refcnt_upgrade(sock->sk); filp = sock_alloc_file(sock, O_NONBLOCK, NULL); if (IS_ERR(filp)) @@ -2581,7 +2577,15 @@ static void xs_tls_handshake_done(void *data, int status, key_serial_t peerid) struct sock_xprt *lower_transport = container_of(lower_xprt, struct sock_xprt, xprt); - lower_transport->xprt_err = status ? -EACCES : 0; + switch (status) { + case 0: + case -EACCES: + case -ETIMEDOUT: + lower_transport->xprt_err = status; + break; + default: + lower_transport->xprt_err = -EACCES; + } complete(&lower_transport->handshake_done); xprt_put(lower_xprt); } diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 6488ead9e464..4d5fbacef496 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -472,7 +472,7 @@ bool switchdev_port_obj_act_is_deferred(struct net_device *dev, EXPORT_SYMBOL_GPL(switchdev_port_obj_act_is_deferred); static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain); -static BLOCKING_NOTIFIER_HEAD(switchdev_blocking_notif_chain); +static RAW_NOTIFIER_HEAD(switchdev_blocking_notif_chain); /** * register_switchdev_notifier - Register notifier @@ -518,17 +518,27 @@ EXPORT_SYMBOL_GPL(call_switchdev_notifiers); int register_switchdev_blocking_notifier(struct notifier_block *nb) { - struct blocking_notifier_head *chain = &switchdev_blocking_notif_chain; + struct raw_notifier_head *chain = &switchdev_blocking_notif_chain; + int err; + + rtnl_lock(); + err = raw_notifier_chain_register(chain, nb); + rtnl_unlock(); - return blocking_notifier_chain_register(chain, nb); + return err; } EXPORT_SYMBOL_GPL(register_switchdev_blocking_notifier); int unregister_switchdev_blocking_notifier(struct notifier_block *nb) { - struct blocking_notifier_head *chain = &switchdev_blocking_notif_chain; + struct raw_notifier_head *chain = &switchdev_blocking_notif_chain; + int err; - return blocking_notifier_chain_unregister(chain, nb); + rtnl_lock(); + err = raw_notifier_chain_unregister(chain, nb); + rtnl_unlock(); + + return err; } EXPORT_SYMBOL_GPL(unregister_switchdev_blocking_notifier); @@ -536,10 +546,11 @@ int call_switchdev_blocking_notifiers(unsigned long val, struct net_device *dev, struct switchdev_notifier_info *info, struct netlink_ext_ack *extack) { + ASSERT_RTNL(); info->dev = dev; info->extack = extack; - return blocking_notifier_call_chain(&switchdev_blocking_notif_chain, - val, info); + return raw_notifier_call_chain(&switchdev_blocking_notif_chain, + val, info); } EXPORT_SYMBOL_GPL(call_switchdev_blocking_notifiers); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 34945de1fb1f..f0e613d97664 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2102,6 +2102,7 @@ restart_locked: goto out_sock_put; } + sock_put(other); goto lookup; } diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 075695173648..7e3db87ae433 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -824,13 +824,19 @@ static void __vsock_release(struct sock *sk, int level) */ lock_sock_nested(sk, level); - sock_orphan(sk); + /* Indicate to vsock_remove_sock() that the socket is being released and + * can be removed from the bound_table. Unlike transport reassignment + * case, where the socket must remain bound despite vsock_remove_sock() + * being called from the transport release() callback. + */ + sock_set_flag(sk, SOCK_DEAD); if (vsk->transport) vsk->transport->release(vsk); else if (sock_type_connectible(sk->sk_type)) vsock_remove_sock(vsk); + sock_orphan(sk); sk->sk_shutdown = SHUTDOWN_MASK; skb_queue_purge(&sk->sk_receive_queue); @@ -1183,6 +1189,9 @@ static int vsock_read_skb(struct sock *sk, skb_read_actor_t read_actor) { struct vsock_sock *vsk = vsock_sk(sk); + if (WARN_ON_ONCE(!vsk->transport)) + return -ENODEV; + return vsk->transport->read_skb(vsk, read_actor); } diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index b58c3818f284..f0e48e6911fc 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -670,6 +670,13 @@ static int virtio_vsock_vqs_init(struct virtio_vsock *vsock) }; int ret; + mutex_lock(&vsock->rx_lock); + vsock->rx_buf_nr = 0; + vsock->rx_buf_max_nr = 0; + mutex_unlock(&vsock->rx_lock); + + atomic_set(&vsock->queued_replies, 0); + ret = virtio_find_vqs(vdev, VSOCK_VQ_MAX, vsock->vqs, vqs_info, NULL); if (ret < 0) return ret; @@ -779,9 +786,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev) vsock->vdev = vdev; - vsock->rx_buf_nr = 0; - vsock->rx_buf_max_nr = 0; - atomic_set(&vsock->queued_replies, 0); mutex_init(&vsock->tx_lock); mutex_init(&vsock->rx_lock); diff --git a/net/vmw_vsock/vsock_bpf.c b/net/vmw_vsock/vsock_bpf.c index f201d9eca1df..07b96d56f3a5 100644 --- a/net/vmw_vsock/vsock_bpf.c +++ b/net/vmw_vsock/vsock_bpf.c @@ -87,7 +87,7 @@ static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg, lock_sock(sk); vsk = vsock_sk(sk); - if (!vsk->transport) { + if (WARN_ON_ONCE(!vsk->transport)) { copied = -ENODEV; goto out; } diff --git a/net/wireless/core.c b/net/wireless/core.c index 12b780de8779..828e29872633 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1191,6 +1191,13 @@ void cfg80211_dev_free(struct cfg80211_registered_device *rdev) { struct cfg80211_internal_bss *scan, *tmp; struct cfg80211_beacon_registration *reg, *treg; + unsigned long flags; + + spin_lock_irqsave(&rdev->wiphy_work_lock, flags); + WARN_ON(!list_empty(&rdev->wiphy_work_list)); + spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags); + cancel_work_sync(&rdev->wiphy_work); + rfkill_destroy(rdev->wiphy.rfkill); list_for_each_entry_safe(reg, treg, &rdev->beacon_registrations, list) { list_del(®->list); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d7d3da0f6833..aac0e7298dc7 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4220,6 +4220,11 @@ static int parse_monitor_flags(struct nlattr *nla, u32 *mntrflags) if (flags[flag]) *mntrflags |= (1<<flag); + /* cooked monitor mode is incompatible with other modes */ + if (*mntrflags & MONITOR_FLAG_COOK_FRAMES && + *mntrflags != MONITOR_FLAG_COOK_FRAMES) + return -EOPNOTSUPP; + *mntrflags |= MONITOR_FLAG_CHANGED; return 0; @@ -11118,6 +11123,7 @@ static struct cfg80211_bss *nl80211_assoc_bss(struct cfg80211_registered_device static int nl80211_process_links(struct cfg80211_registered_device *rdev, struct cfg80211_assoc_link *links, + int assoc_link_id, const u8 *ssid, int ssid_len, struct genl_info *info) { @@ -11148,7 +11154,7 @@ static int nl80211_process_links(struct cfg80211_registered_device *rdev, } links[link_id].bss = nl80211_assoc_bss(rdev, ssid, ssid_len, attrs, - link_id, link_id); + assoc_link_id, link_id); if (IS_ERR(links[link_id].bss)) { err = PTR_ERR(links[link_id].bss); links[link_id].bss = NULL; @@ -11345,8 +11351,8 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) req.ap_mld_addr = nla_data(info->attrs[NL80211_ATTR_MLD_ADDR]); ap_addr = req.ap_mld_addr; - err = nl80211_process_links(rdev, req.links, ssid, ssid_len, - info); + err = nl80211_process_links(rdev, req.links, req.link_id, + ssid, ssid_len, info); if (err) goto free; @@ -16501,7 +16507,10 @@ static int nl80211_assoc_ml_reconf(struct sk_buff *skb, struct genl_info *info) add_links = 0; if (info->attrs[NL80211_ATTR_MLO_LINKS]) { - err = nl80211_process_links(rdev, links, NULL, 0, info); + err = nl80211_process_links(rdev, links, + /* mark as MLO, but not assoc */ + IEEE80211_MLD_MAX_NUM_LINKS, + NULL, 0, info); if (err) return err; @@ -16529,7 +16538,7 @@ static int nl80211_assoc_ml_reconf(struct sk_buff *skb, struct genl_info *info) goto out; } - err = cfg80211_assoc_ml_reconf(rdev, dev, links, rem_links); + err = -EOPNOTSUPP; out: for (link_id = 0; link_id < ARRAY_SIZE(links); link_id++) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 2dd0533e7660..212e9561aae7 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -407,7 +407,8 @@ static bool is_an_alpha2(const char *alpha2) { if (!alpha2) return false; - return isalpha(alpha2[0]) && isalpha(alpha2[1]); + return isascii(alpha2[0]) && isalpha(alpha2[0]) && + isascii(alpha2[1]) && isalpha(alpha2[1]); } static bool alpha2_equal(const char *alpha2_x, const char *alpha2_y) diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 1f7975b49657..d158cb6dd391 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -105,7 +105,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, if (pool->unaligned) pool->free_heads[i] = xskb; else - xp_init_xskb_addr(xskb, pool, i * pool->chunk_size); + xp_init_xskb_addr(xskb, pool, (u64)i * pool->chunk_size); } return pool; diff --git a/net/xfrm/xfrm_iptfs.c b/net/xfrm/xfrm_iptfs.c index 755f1eea8bfa..3b6d7284fc70 100644 --- a/net/xfrm/xfrm_iptfs.c +++ b/net/xfrm/xfrm_iptfs.c @@ -2625,12 +2625,10 @@ static void __iptfs_init_state(struct xfrm_state *x, struct xfrm_iptfs_data *xtfs) { __skb_queue_head_init(&xtfs->queue); - hrtimer_init(&xtfs->iptfs_timer, CLOCK_MONOTONIC, IPTFS_HRTIMER_MODE); - xtfs->iptfs_timer.function = iptfs_delay_timer; + hrtimer_setup(&xtfs->iptfs_timer, iptfs_delay_timer, CLOCK_MONOTONIC, IPTFS_HRTIMER_MODE); spin_lock_init(&xtfs->drop_lock); - hrtimer_init(&xtfs->drop_timer, CLOCK_MONOTONIC, IPTFS_HRTIMER_MODE); - xtfs->drop_timer.function = iptfs_drop_timer; + hrtimer_setup(&xtfs->drop_timer, iptfs_drop_timer, CLOCK_MONOTONIC, IPTFS_HRTIMER_MODE); /* Modify type (esp) adjustment values */ diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index f7abd42c077d..3cabc87978dd 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -612,6 +612,40 @@ out: } EXPORT_SYMBOL_GPL(xfrm_output_resume); +static int xfrm_dev_direct_output(struct sock *sk, struct xfrm_state *x, + struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + struct net *net = xs_net(x); + int err; + + dst = skb_dst_pop(skb); + if (!dst) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); + kfree_skb(skb); + return -EHOSTUNREACH; + } + skb_dst_set(skb, dst); + nf_reset_ct(skb); + + err = skb_dst(skb)->ops->local_out(net, sk, skb); + if (unlikely(err != 1)) { + kfree_skb(skb); + return err; + } + + /* In transport mode, network destination is + * directly reachable, while in tunnel mode, + * inner packet network may not be. In packet + * offload type, HW is responsible for hard + * header packet mangling so directly xmit skb + * to netdevice. + */ + skb->dev = x->xso.dev; + __skb_push(skb, skb->dev->hard_header_len); + return dev_queue_xmit(skb); +} + static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb) { return xfrm_output_resume(sk, skb, 1); @@ -735,6 +769,13 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb) return -EHOSTUNREACH; } + /* Exclusive direct xmit for tunnel mode, as + * some filtering or matching rules may apply + * in transport mode. + */ + if (x->props.mode == XFRM_MODE_TUNNEL) + return xfrm_dev_direct_output(sk, x, skb); + return xfrm_output_resume(sk, skb, 0); } @@ -758,7 +799,7 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb) skb->encapsulation = 1; if (skb_is_gso(skb)) { - if (skb->inner_protocol) + if (skb->inner_protocol && x->props.mode == XFRM_MODE_TUNNEL) return xfrm_output_gso(net, sk, skb); skb_shinfo(skb)->gso_type |= SKB_GSO_ESP; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index ad2202fa82f3..9bd14fdb67a5 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -746,8 +746,8 @@ struct xfrm_state *xfrm_state_alloc(struct net *net) INIT_HLIST_NODE(&x->bysrc); INIT_HLIST_NODE(&x->byspi); INIT_HLIST_NODE(&x->byseq); - hrtimer_init(&x->mtimer, CLOCK_BOOTTIME, HRTIMER_MODE_ABS_SOFT); - x->mtimer.function = xfrm_timer_handler; + hrtimer_setup(&x->mtimer, xfrm_timer_handler, CLOCK_BOOTTIME, + HRTIMER_MODE_ABS_SOFT); timer_setup(&x->rtimer, xfrm_replay_timer_handler, 0); x->curlft.add_time = ktime_get_real_seconds(); x->lft.soft_byte_limit = XFRM_INF; |