summaryrefslogtreecommitdiff
path: root/net/core/dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c485
1 files changed, 355 insertions, 130 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 5c925ac50b95..f1fe26f66458 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -94,6 +94,7 @@
#include <linux/ethtool.h>
#include <linux/notifier.h>
#include <linux/skbuff.h>
+#include <linux/bpf.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/busy_poll.h>
@@ -139,6 +140,7 @@
#include <linux/hrtimer.h>
#include <linux/netfilter_ingress.h>
#include <linux/sctp.h>
+#include <linux/crash_dump.h>
#include "net-sysfs.h"
@@ -196,7 +198,7 @@ static inline void dev_base_seq_inc(struct net *net)
static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
{
- unsigned int hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
+ unsigned int hash = full_name_hash(net, name, strnlen(name, IFNAMSIZ));
return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
}
@@ -1741,7 +1743,7 @@ static inline void net_timestamp_set(struct sk_buff *skb)
__net_timestamp(SKB); \
} \
-bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb)
+bool is_skb_forwardable(const struct net_device *dev, const struct sk_buff *skb)
{
unsigned int len;
@@ -1850,7 +1852,7 @@ static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
* taps currently in use.
*/
-static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
+void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
{
struct packet_type *ptype;
struct sk_buff *skb2 = NULL;
@@ -1907,6 +1909,7 @@ out_unlock:
pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
rcu_read_unlock();
}
+EXPORT_SYMBOL_GPL(dev_queue_xmit_nit);
/**
* netif_setup_tc - Handle tc mappings on real_num_tx_queues change
@@ -2248,11 +2251,12 @@ EXPORT_SYMBOL(netif_set_real_num_rx_queues);
*/
int netif_get_num_default_rss_queues(void)
{
- return min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus());
+ return is_kdump_kernel() ?
+ 1 : min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus());
}
EXPORT_SYMBOL(netif_get_num_default_rss_queues);
-static inline void __netif_reschedule(struct Qdisc *q)
+static void __netif_reschedule(struct Qdisc *q)
{
struct softnet_data *sd;
unsigned long flags;
@@ -2419,7 +2423,7 @@ EXPORT_SYMBOL(__skb_tx_hash);
static void skb_warn_bad_offload(const struct sk_buff *skb)
{
- static const netdev_features_t null_features = 0;
+ static const netdev_features_t null_features;
struct net_device *dev = skb->dev;
const char *name = "";
@@ -2711,6 +2715,19 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
return ERR_PTR(err);
}
+ /* Only report GSO partial support if it will enable us to
+ * support segmentation on this frame without needing additional
+ * work.
+ */
+ if (features & NETIF_F_GSO_PARTIAL) {
+ netdev_features_t partial_features = NETIF_F_GSO_ROBUST;
+ struct net_device *dev = skb->dev;
+
+ partial_features |= dev->features & dev->gso_partial_features;
+ if (!skb_gso_ok(skb, features | partial_features))
+ features &= ~NETIF_F_GSO_PARTIAL;
+ }
+
BUILD_BUG_ON(SKB_SGO_CB_OFFSET +
sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb));
@@ -2825,14 +2842,45 @@ static netdev_features_t dflt_features_check(const struct sk_buff *skb,
return vlan_features_check(skb, features);
}
+static netdev_features_t gso_features_check(const struct sk_buff *skb,
+ struct net_device *dev,
+ netdev_features_t features)
+{
+ u16 gso_segs = skb_shinfo(skb)->gso_segs;
+
+ if (gso_segs > dev->gso_max_segs)
+ return features & ~NETIF_F_GSO_MASK;
+
+ /* Support for GSO partial features requires software
+ * intervention before we can actually process the packets
+ * so we need to strip support for any partial features now
+ * and we can pull them back in after we have partially
+ * segmented the frame.
+ */
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL))
+ features &= ~dev->gso_partial_features;
+
+ /* Make sure to clear the IPv4 ID mangling feature if the
+ * IPv4 header has the potential to be fragmented.
+ */
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) {
+ struct iphdr *iph = skb->encapsulation ?
+ inner_ip_hdr(skb) : ip_hdr(skb);
+
+ if (!(iph->frag_off & htons(IP_DF)))
+ features &= ~NETIF_F_TSO_MANGLEID;
+ }
+
+ return features;
+}
+
netdev_features_t netif_skb_features(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
netdev_features_t features = dev->features;
- u16 gso_segs = skb_shinfo(skb)->gso_segs;
- if (gso_segs > dev->gso_max_segs || gso_segs < dev->gso_min_segs)
- features &= ~NETIF_F_GSO_MASK;
+ if (skb_is_gso(skb))
+ features = gso_features_check(skb, dev, features);
/* If encapsulation offload request, verify we are testing
* hardware encapsulation features instead of standard
@@ -2915,9 +2963,6 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
{
netdev_features_t features;
- if (skb->next)
- return skb;
-
features = netif_skb_features(skb);
skb = validate_xmit_vlan(skb, features);
if (unlikely(!skb))
@@ -2960,6 +3005,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
out_kfree_skb:
kfree_skb(skb);
out_null:
+ atomic_long_inc(&dev->tx_dropped);
return NULL;
}
@@ -3025,6 +3071,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
struct netdev_queue *txq)
{
spinlock_t *root_lock = qdisc_lock(q);
+ struct sk_buff *to_free = NULL;
bool contended;
int rc;
@@ -3032,7 +3079,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
/*
* Heuristic to force contended enqueues to serialize on a
* separate lock before trying to get qdisc main lock.
- * This permits __QDISC___STATE_RUNNING owner to get the lock more
+ * This permits qdisc->running owner to get the lock more
* often and dequeue packets faster.
*/
contended = qdisc_is_running(q);
@@ -3041,7 +3088,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
spin_lock(root_lock);
if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
- kfree_skb(skb);
+ __qdisc_drop(skb, &to_free);
rc = NET_XMIT_DROP;
} else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
qdisc_run_begin(q)) {
@@ -3064,7 +3111,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
rc = NET_XMIT_SUCCESS;
} else {
- rc = q->enqueue(skb, q) & NET_XMIT_MASK;
+ rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
if (qdisc_run_begin(q)) {
if (unlikely(contended)) {
spin_unlock(&q->busylock);
@@ -3074,6 +3121,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
}
}
spin_unlock(root_lock);
+ if (unlikely(to_free))
+ kfree_skb_list(to_free);
if (unlikely(contended))
spin_unlock(&q->busylock);
return rc;
@@ -3099,8 +3148,6 @@ static void skb_update_prio(struct sk_buff *skb)
DEFINE_PER_CPU(int, xmit_recursion);
EXPORT_SYMBOL(xmit_recursion);
-#define RECURSION_LIMIT 10
-
/**
* dev_loopback_xmit - loop back @skb
* @net: network namespace this loopback is happening in
@@ -3143,12 +3190,12 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
case TC_ACT_SHOT:
qdisc_qstats_cpu_drop(cl->q);
*ret = NET_XMIT_DROP;
- goto drop;
+ kfree_skb(skb);
+ return NULL;
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
*ret = NET_XMIT_SUCCESS;
-drop:
- kfree_skb(skb);
+ consume_skb(skb);
return NULL;
case TC_ACT_REDIRECT:
/* No need to push/pop skb's mac_header here on egress! */
@@ -3308,16 +3355,6 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
else
skb_dst_force(skb);
-#ifdef CONFIG_NET_SWITCHDEV
- /* Don't forward if offload device already forwarded */
- if (skb->offload_fwd_mark &&
- skb->offload_fwd_mark == dev->offload_fwd_mark) {
- consume_skb(skb);
- rc = NET_XMIT_SUCCESS;
- goto out;
- }
-#endif
-
txq = netdev_pick_tx(dev, skb, accel_priv);
q = rcu_dereference_bh(txq->qdisc);
@@ -3343,13 +3380,13 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
int cpu = smp_processor_id(); /* ok because BHs are off */
if (txq->xmit_lock_owner != cpu) {
-
- if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
+ if (unlikely(__this_cpu_read(xmit_recursion) >
+ XMIT_RECURSION_LIMIT))
goto recursion_alert;
skb = validate_xmit_skb(skb, dev);
if (!skb)
- goto drop;
+ goto out;
HARD_TX_LOCK(dev, txq, cpu);
@@ -3376,7 +3413,6 @@ recursion_alert:
}
rc = -ENETDOWN;
-drop:
rcu_read_unlock_bh();
atomic_long_inc(&dev->tx_dropped);
@@ -3428,6 +3464,7 @@ u32 rps_cpu_mask __read_mostly;
EXPORT_SYMBOL(rps_cpu_mask);
struct static_key rps_needed __read_mostly;
+EXPORT_SYMBOL(rps_needed);
static struct rps_dev_flow *
set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
@@ -3855,28 +3892,19 @@ static void net_tx_action(struct softirq_action *h)
head = head->next_sched;
root_lock = qdisc_lock(q);
- if (spin_trylock(root_lock)) {
- smp_mb__before_atomic();
- clear_bit(__QDISC_STATE_SCHED,
- &q->state);
- qdisc_run(q);
- spin_unlock(root_lock);
- } else {
- if (!test_bit(__QDISC_STATE_DEACTIVATED,
- &q->state)) {
- __netif_reschedule(q);
- } else {
- smp_mb__before_atomic();
- clear_bit(__QDISC_STATE_SCHED,
- &q->state);
- }
- }
+ spin_lock(root_lock);
+ /* We need to make sure head->next_sched is read
+ * before clearing __QDISC_STATE_SCHED
+ */
+ smp_mb__before_atomic();
+ clear_bit(__QDISC_STATE_SCHED, &q->state);
+ qdisc_run(q);
+ spin_unlock(root_lock);
}
}
}
-#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
- (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
+#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_ATM_LANE)
/* This hook is defined here for ATM LANE */
int (*br_fdb_test_addr_hook)(struct net_device *dev,
unsigned char *addr) __read_mostly;
@@ -3914,9 +3942,11 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
break;
case TC_ACT_SHOT:
qdisc_qstats_cpu_drop(cl->q);
+ kfree_skb(skb);
+ return NULL;
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
- kfree_skb(skb);
+ consume_skb(skb);
return NULL;
case TC_ACT_REDIRECT:
/* skb_mac_header check was done by cls/act_bpf, so
@@ -3934,6 +3964,22 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
}
/**
+ * netdev_is_rx_handler_busy - check if receive handler is registered
+ * @dev: device to check
+ *
+ * Check if a receive handler is already registered for a given device.
+ * Return true if there one.
+ *
+ * The caller must hold the rtnl_mutex.
+ */
+bool netdev_is_rx_handler_busy(struct net_device *dev)
+{
+ ASSERT_RTNL();
+ return dev && rtnl_dereference(dev->rx_handler);
+}
+EXPORT_SYMBOL_GPL(netdev_is_rx_handler_busy);
+
+/**
* netdev_rx_handler_register - register receive handler
* @dev: device to register a handler for
* @rx_handler: receive handler to register
@@ -4009,12 +4055,17 @@ static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
{
#ifdef CONFIG_NETFILTER_INGRESS
if (nf_hook_ingress_active(skb)) {
+ int ingress_retval;
+
if (*pt_prev) {
*ret = deliver_skb(skb, *pt_prev, orig_dev);
*pt_prev = NULL;
}
- return nf_hook_ingress(skb);
+ rcu_read_lock();
+ ingress_retval = nf_hook_ingress(skb);
+ rcu_read_unlock();
+ return ingress_retval;
}
#endif /* CONFIG_NETFILTER_INGRESS */
return 0;
@@ -4251,32 +4302,53 @@ int netif_receive_skb(struct sk_buff *skb)
}
EXPORT_SYMBOL(netif_receive_skb);
-/* Network device is going away, flush any packets still pending
- * Called with irqs disabled.
- */
-static void flush_backlog(void *arg)
+DEFINE_PER_CPU(struct work_struct, flush_works);
+
+/* Network device is going away, flush any packets still pending */
+static void flush_backlog(struct work_struct *work)
{
- struct net_device *dev = arg;
- struct softnet_data *sd = this_cpu_ptr(&softnet_data);
struct sk_buff *skb, *tmp;
+ struct softnet_data *sd;
+
+ local_bh_disable();
+ sd = this_cpu_ptr(&softnet_data);
+ local_irq_disable();
rps_lock(sd);
skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
- if (skb->dev == dev) {
+ if (skb->dev->reg_state == NETREG_UNREGISTERING) {
__skb_unlink(skb, &sd->input_pkt_queue);
kfree_skb(skb);
input_queue_head_incr(sd);
}
}
rps_unlock(sd);
+ local_irq_enable();
skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
- if (skb->dev == dev) {
+ if (skb->dev->reg_state == NETREG_UNREGISTERING) {
__skb_unlink(skb, &sd->process_queue);
kfree_skb(skb);
input_queue_head_incr(sd);
}
}
+ local_bh_enable();
+}
+
+static void flush_all_backlogs(void)
+{
+ unsigned int cpu;
+
+ get_online_cpus();
+
+ for_each_online_cpu(cpu)
+ queue_work_on(cpu, system_highpri_wq,
+ per_cpu_ptr(&flush_works, cpu));
+
+ for_each_online_cpu(cpu)
+ flush_work(per_cpu_ptr(&flush_works, cpu));
+
+ put_online_cpus();
}
static int napi_gro_complete(struct sk_buff *skb)
@@ -4440,6 +4512,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
NAPI_GRO_CB(skb)->free = 0;
NAPI_GRO_CB(skb)->encap_mark = 0;
NAPI_GRO_CB(skb)->is_fou = 0;
+ NAPI_GRO_CB(skb)->is_atomic = 1;
NAPI_GRO_CB(skb)->gro_remcsum_start = 0;
/* Setup for GRO checksum validation */
@@ -4664,6 +4737,8 @@ static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
if (unlikely(skb_gro_header_hard(skb, hlen))) {
eth = skb_gro_header_slow(skb, hlen, 0);
if (unlikely(!eth)) {
+ net_warn_ratelimited("%s: dropping impossible skb from %s\n",
+ __func__, napi->dev->name);
napi_reuse_skb(napi, skb);
return NULL;
}
@@ -4761,8 +4836,9 @@ static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
static int process_backlog(struct napi_struct *napi, int quota)
{
- int work = 0;
struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
+ bool again = true;
+ int work = 0;
/* Check if we have pending ipi, its better to send them now,
* not waiting net_rx_action() end.
@@ -4773,23 +4849,20 @@ static int process_backlog(struct napi_struct *napi, int quota)
}
napi->weight = weight_p;
- local_irq_disable();
- while (1) {
+ while (again) {
struct sk_buff *skb;
while ((skb = __skb_dequeue(&sd->process_queue))) {
rcu_read_lock();
- local_irq_enable();
__netif_receive_skb(skb);
rcu_read_unlock();
- local_irq_disable();
input_queue_head_incr(sd);
- if (++work >= quota) {
- local_irq_enable();
+ if (++work >= quota)
return work;
- }
+
}
+ local_irq_disable();
rps_lock(sd);
if (skb_queue_empty(&sd->input_pkt_queue)) {
/*
@@ -4801,16 +4874,14 @@ static int process_backlog(struct napi_struct *napi, int quota)
* and we dont need an smp_mb() memory barrier.
*/
napi->state = 0;
- rps_unlock(sd);
-
- break;
+ again = false;
+ } else {
+ skb_queue_splice_tail_init(&sd->input_pkt_queue,
+ &sd->process_queue);
}
-
- skb_queue_splice_tail_init(&sd->input_pkt_queue,
- &sd->process_queue);
rps_unlock(sd);
+ local_irq_enable();
}
- local_irq_enable();
return work;
}
@@ -4929,7 +5000,7 @@ bool sk_busy_loop(struct sock *sk, int nonblock)
if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
rc = napi->poll(napi, BUSY_POLL_BUDGET);
- trace_napi_poll(napi);
+ trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
if (rc == BUSY_POLL_BUDGET) {
napi_complete_done(napi, rc);
napi_schedule(napi);
@@ -4938,8 +5009,8 @@ bool sk_busy_loop(struct sock *sk, int nonblock)
netpoll_poll_unlock(have);
}
if (rc > 0)
- NET_ADD_STATS_BH(sock_net(sk),
- LINUX_MIB_BUSYPOLLRXPACKETS, rc);
+ __NET_ADD_STATS(sock_net(sk),
+ LINUX_MIB_BUSYPOLLRXPACKETS, rc);
local_bh_enable();
if (rc == LL_FLUSH_FAILED)
@@ -5085,7 +5156,7 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
work = 0;
if (test_bit(NAPI_STATE_SCHED, &n->state)) {
work = n->poll(n, weight);
- trace_napi_poll(n);
+ trace_napi_poll(n, work, weight);
}
WARN_ON_ONCE(work > weight);
@@ -5402,6 +5473,52 @@ void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter)
EXPORT_SYMBOL(netdev_lower_get_next);
/**
+ * netdev_all_lower_get_next - Get the next device from all lower neighbour list
+ * @dev: device
+ * @iter: list_head ** of the current position
+ *
+ * Gets the next netdev_adjacent from the dev's all lower neighbour
+ * list, starting from iter position. The caller must hold RTNL lock or
+ * its own locking that guarantees that the neighbour all lower
+ * list will remain unchanged.
+ */
+struct net_device *netdev_all_lower_get_next(struct net_device *dev, struct list_head **iter)
+{
+ struct netdev_adjacent *lower;
+
+ lower = list_entry(*iter, struct netdev_adjacent, list);
+
+ if (&lower->list == &dev->all_adj_list.lower)
+ return NULL;
+
+ *iter = lower->list.next;
+
+ return lower->dev;
+}
+EXPORT_SYMBOL(netdev_all_lower_get_next);
+
+/**
+ * netdev_all_lower_get_next_rcu - Get the next device from all
+ * lower neighbour list, RCU variant
+ * @dev: device
+ * @iter: list_head ** of the current position
+ *
+ * Gets the next netdev_adjacent from the dev's all lower neighbour
+ * list, starting from iter position. The caller must hold RCU read lock.
+ */
+struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev,
+ struct list_head **iter)
+{
+ struct netdev_adjacent *lower;
+
+ lower = list_first_or_null_rcu(&dev->all_adj_list.lower,
+ struct netdev_adjacent, list);
+
+ return lower ? lower->dev : NULL;
+}
+EXPORT_SYMBOL(netdev_all_lower_get_next_rcu);
+
+/**
* netdev_lower_get_first_private_rcu - Get the first ->private from the
* lower neighbour list, RCU
* variant
@@ -5472,6 +5589,7 @@ static inline bool netdev_adjacent_is_neigh_list(struct net_device *dev,
static int __netdev_adjacent_dev_insert(struct net_device *dev,
struct net_device *adj_dev,
+ u16 ref_nr,
struct list_head *dev_list,
void *private, bool master)
{
@@ -5481,7 +5599,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
adj = __netdev_find_adj(adj_dev, dev_list);
if (adj) {
- adj->ref_nr++;
+ adj->ref_nr += ref_nr;
return 0;
}
@@ -5491,7 +5609,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
adj->dev = adj_dev;
adj->master = master;
- adj->ref_nr = 1;
+ adj->ref_nr = ref_nr;
adj->private = private;
dev_hold(adj_dev);
@@ -5530,6 +5648,7 @@ free_adj:
static void __netdev_adjacent_dev_remove(struct net_device *dev,
struct net_device *adj_dev,
+ u16 ref_nr,
struct list_head *dev_list)
{
struct netdev_adjacent *adj;
@@ -5542,10 +5661,10 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev,
BUG();
}
- if (adj->ref_nr > 1) {
- pr_debug("%s to %s ref_nr-- = %d\n", dev->name, adj_dev->name,
- adj->ref_nr-1);
- adj->ref_nr--;
+ if (adj->ref_nr > ref_nr) {
+ pr_debug("%s to %s ref_nr-%d = %d\n", dev->name, adj_dev->name,
+ ref_nr, adj->ref_nr-ref_nr);
+ adj->ref_nr -= ref_nr;
return;
}
@@ -5564,21 +5683,22 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev,
static int __netdev_adjacent_dev_link_lists(struct net_device *dev,
struct net_device *upper_dev,
+ u16 ref_nr,
struct list_head *up_list,
struct list_head *down_list,
void *private, bool master)
{
int ret;
- ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list, private,
- master);
+ ret = __netdev_adjacent_dev_insert(dev, upper_dev, ref_nr, up_list,
+ private, master);
if (ret)
return ret;
- ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list, private,
- false);
+ ret = __netdev_adjacent_dev_insert(upper_dev, dev, ref_nr, down_list,
+ private, false);
if (ret) {
- __netdev_adjacent_dev_remove(dev, upper_dev, up_list);
+ __netdev_adjacent_dev_remove(dev, upper_dev, ref_nr, up_list);
return ret;
}
@@ -5586,9 +5706,10 @@ static int __netdev_adjacent_dev_link_lists(struct net_device *dev,
}
static int __netdev_adjacent_dev_link(struct net_device *dev,
- struct net_device *upper_dev)
+ struct net_device *upper_dev,
+ u16 ref_nr)
{
- return __netdev_adjacent_dev_link_lists(dev, upper_dev,
+ return __netdev_adjacent_dev_link_lists(dev, upper_dev, ref_nr,
&dev->all_adj_list.upper,
&upper_dev->all_adj_list.lower,
NULL, false);
@@ -5596,17 +5717,19 @@ static int __netdev_adjacent_dev_link(struct net_device *dev,
static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev,
struct net_device *upper_dev,
+ u16 ref_nr,
struct list_head *up_list,
struct list_head *down_list)
{
- __netdev_adjacent_dev_remove(dev, upper_dev, up_list);
- __netdev_adjacent_dev_remove(upper_dev, dev, down_list);
+ __netdev_adjacent_dev_remove(dev, upper_dev, ref_nr, up_list);
+ __netdev_adjacent_dev_remove(upper_dev, dev, ref_nr, down_list);
}
static void __netdev_adjacent_dev_unlink(struct net_device *dev,
- struct net_device *upper_dev)
+ struct net_device *upper_dev,
+ u16 ref_nr)
{
- __netdev_adjacent_dev_unlink_lists(dev, upper_dev,
+ __netdev_adjacent_dev_unlink_lists(dev, upper_dev, ref_nr,
&dev->all_adj_list.upper,
&upper_dev->all_adj_list.lower);
}
@@ -5615,17 +5738,17 @@ static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
struct net_device *upper_dev,
void *private, bool master)
{
- int ret = __netdev_adjacent_dev_link(dev, upper_dev);
+ int ret = __netdev_adjacent_dev_link(dev, upper_dev, 1);
if (ret)
return ret;
- ret = __netdev_adjacent_dev_link_lists(dev, upper_dev,
+ ret = __netdev_adjacent_dev_link_lists(dev, upper_dev, 1,
&dev->adj_list.upper,
&upper_dev->adj_list.lower,
private, master);
if (ret) {
- __netdev_adjacent_dev_unlink(dev, upper_dev);
+ __netdev_adjacent_dev_unlink(dev, upper_dev, 1);
return ret;
}
@@ -5635,8 +5758,8 @@ static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
struct net_device *upper_dev)
{
- __netdev_adjacent_dev_unlink(dev, upper_dev);
- __netdev_adjacent_dev_unlink_lists(dev, upper_dev,
+ __netdev_adjacent_dev_unlink(dev, upper_dev, 1);
+ __netdev_adjacent_dev_unlink_lists(dev, upper_dev, 1,
&dev->adj_list.upper,
&upper_dev->adj_list.lower);
}
@@ -5689,7 +5812,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {
pr_debug("Interlinking %s with %s, non-neighbour\n",
i->dev->name, j->dev->name);
- ret = __netdev_adjacent_dev_link(i->dev, j->dev);
+ ret = __netdev_adjacent_dev_link(i->dev, j->dev, i->ref_nr);
if (ret)
goto rollback_mesh;
}
@@ -5699,7 +5822,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {
pr_debug("linking %s's upper device %s with %s\n",
upper_dev->name, i->dev->name, dev->name);
- ret = __netdev_adjacent_dev_link(dev, i->dev);
+ ret = __netdev_adjacent_dev_link(dev, i->dev, i->ref_nr);
if (ret)
goto rollback_upper_mesh;
}
@@ -5708,7 +5831,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
list_for_each_entry(i, &dev->all_adj_list.lower, list) {
pr_debug("linking %s's lower device %s with %s\n", dev->name,
i->dev->name, upper_dev->name);
- ret = __netdev_adjacent_dev_link(i->dev, upper_dev);
+ ret = __netdev_adjacent_dev_link(i->dev, upper_dev, i->ref_nr);
if (ret)
goto rollback_lower_mesh;
}
@@ -5726,7 +5849,7 @@ rollback_lower_mesh:
list_for_each_entry(i, &dev->all_adj_list.lower, list) {
if (i == to_i)
break;
- __netdev_adjacent_dev_unlink(i->dev, upper_dev);
+ __netdev_adjacent_dev_unlink(i->dev, upper_dev, i->ref_nr);
}
i = NULL;
@@ -5736,7 +5859,7 @@ rollback_upper_mesh:
list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {
if (i == to_i)
break;
- __netdev_adjacent_dev_unlink(dev, i->dev);
+ __netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr);
}
i = j = NULL;
@@ -5748,7 +5871,7 @@ rollback_mesh:
list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {
if (i == to_i && j == to_j)
break;
- __netdev_adjacent_dev_unlink(i->dev, j->dev);
+ __netdev_adjacent_dev_unlink(i->dev, j->dev, i->ref_nr);
}
if (i == to_i)
break;
@@ -5828,16 +5951,16 @@ void netdev_upper_dev_unlink(struct net_device *dev,
*/
list_for_each_entry(i, &dev->all_adj_list.lower, list)
list_for_each_entry(j, &upper_dev->all_adj_list.upper, list)
- __netdev_adjacent_dev_unlink(i->dev, j->dev);
+ __netdev_adjacent_dev_unlink(i->dev, j->dev, i->ref_nr);
/* remove also the devices itself from lower/upper device
* list
*/
list_for_each_entry(i, &dev->all_adj_list.lower, list)
- __netdev_adjacent_dev_unlink(i->dev, upper_dev);
+ __netdev_adjacent_dev_unlink(i->dev, upper_dev, i->ref_nr);
list_for_each_entry(i, &upper_dev->all_adj_list.upper, list)
- __netdev_adjacent_dev_unlink(dev, i->dev);
+ __netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr);
call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
&changeupper_info.info);
@@ -5871,7 +5994,7 @@ static void netdev_adjacent_add_links(struct net_device *dev)
struct net *net = dev_net(dev);
list_for_each_entry(iter, &dev->adj_list.upper, list) {
- if (!net_eq(net,dev_net(iter->dev)))
+ if (!net_eq(net, dev_net(iter->dev)))
continue;
netdev_adjacent_sysfs_add(iter->dev, dev,
&iter->dev->adj_list.lower);
@@ -5880,7 +6003,7 @@ static void netdev_adjacent_add_links(struct net_device *dev)
}
list_for_each_entry(iter, &dev->adj_list.lower, list) {
- if (!net_eq(net,dev_net(iter->dev)))
+ if (!net_eq(net, dev_net(iter->dev)))
continue;
netdev_adjacent_sysfs_add(iter->dev, dev,
&iter->dev->adj_list.upper);
@@ -5896,7 +6019,7 @@ static void netdev_adjacent_del_links(struct net_device *dev)
struct net *net = dev_net(dev);
list_for_each_entry(iter, &dev->adj_list.upper, list) {
- if (!net_eq(net,dev_net(iter->dev)))
+ if (!net_eq(net, dev_net(iter->dev)))
continue;
netdev_adjacent_sysfs_del(iter->dev, dev->name,
&iter->dev->adj_list.lower);
@@ -5905,7 +6028,7 @@ static void netdev_adjacent_del_links(struct net_device *dev)
}
list_for_each_entry(iter, &dev->adj_list.lower, list) {
- if (!net_eq(net,dev_net(iter->dev)))
+ if (!net_eq(net, dev_net(iter->dev)))
continue;
netdev_adjacent_sysfs_del(iter->dev, dev->name,
&iter->dev->adj_list.upper);
@@ -5921,7 +6044,7 @@ void netdev_adjacent_rename_links(struct net_device *dev, char *oldname)
struct net *net = dev_net(dev);
list_for_each_entry(iter, &dev->adj_list.upper, list) {
- if (!net_eq(net,dev_net(iter->dev)))
+ if (!net_eq(net, dev_net(iter->dev)))
continue;
netdev_adjacent_sysfs_del(iter->dev, oldname,
&iter->dev->adj_list.lower);
@@ -5930,7 +6053,7 @@ void netdev_adjacent_rename_links(struct net_device *dev, char *oldname)
}
list_for_each_entry(iter, &dev->adj_list.lower, list) {
- if (!net_eq(net,dev_net(iter->dev)))
+ if (!net_eq(net, dev_net(iter->dev)))
continue;
netdev_adjacent_sysfs_del(iter->dev, oldname,
&iter->dev->adj_list.upper);
@@ -5955,8 +6078,7 @@ void *netdev_lower_dev_get_private(struct net_device *dev,
EXPORT_SYMBOL(netdev_lower_dev_get_private);
-int dev_get_nest_level(struct net_device *dev,
- bool (*type_check)(const struct net_device *dev))
+int dev_get_nest_level(struct net_device *dev)
{
struct net_device *lower = NULL;
struct list_head *iter;
@@ -5966,15 +6088,12 @@ int dev_get_nest_level(struct net_device *dev,
ASSERT_RTNL();
netdev_for_each_lower_dev(dev, lower, iter) {
- nest = dev_get_nest_level(lower, type_check);
+ nest = dev_get_nest_level(lower);
if (max_nest < nest)
max_nest = nest;
}
- if (type_check(dev))
- max_nest++;
-
- return max_nest;
+ return max_nest + 1;
}
EXPORT_SYMBOL(dev_get_nest_level);
@@ -5998,6 +6117,50 @@ void netdev_lower_state_changed(struct net_device *lower_dev,
}
EXPORT_SYMBOL(netdev_lower_state_changed);
+int netdev_default_l2upper_neigh_construct(struct net_device *dev,
+ struct neighbour *n)
+{
+ struct net_device *lower_dev, *stop_dev;
+ struct list_head *iter;
+ int err;
+
+ netdev_for_each_lower_dev(dev, lower_dev, iter) {
+ if (!lower_dev->netdev_ops->ndo_neigh_construct)
+ continue;
+ err = lower_dev->netdev_ops->ndo_neigh_construct(lower_dev, n);
+ if (err) {
+ stop_dev = lower_dev;
+ goto rollback;
+ }
+ }
+ return 0;
+
+rollback:
+ netdev_for_each_lower_dev(dev, lower_dev, iter) {
+ if (lower_dev == stop_dev)
+ break;
+ if (!lower_dev->netdev_ops->ndo_neigh_destroy)
+ continue;
+ lower_dev->netdev_ops->ndo_neigh_destroy(lower_dev, n);
+ }
+ return err;
+}
+EXPORT_SYMBOL_GPL(netdev_default_l2upper_neigh_construct);
+
+void netdev_default_l2upper_neigh_destroy(struct net_device *dev,
+ struct neighbour *n)
+{
+ struct net_device *lower_dev;
+ struct list_head *iter;
+
+ netdev_for_each_lower_dev(dev, lower_dev, iter) {
+ if (!lower_dev->netdev_ops->ndo_neigh_destroy)
+ continue;
+ lower_dev->netdev_ops->ndo_neigh_destroy(lower_dev, n);
+ }
+}
+EXPORT_SYMBOL_GPL(netdev_default_l2upper_neigh_destroy);
+
static void dev_change_rx_flags(struct net_device *dev, int flags)
{
const struct net_device_ops *ops = dev->netdev_ops;
@@ -6482,6 +6645,38 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
EXPORT_SYMBOL(dev_change_proto_down);
/**
+ * dev_change_xdp_fd - set or clear a bpf program for a device rx path
+ * @dev: device
+ * @fd: new program fd or negative value to clear
+ *
+ * Set or clear a bpf program for a device
+ */
+int dev_change_xdp_fd(struct net_device *dev, int fd)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+ struct bpf_prog *prog = NULL;
+ struct netdev_xdp xdp = {};
+ int err;
+
+ if (!ops->ndo_xdp)
+ return -EOPNOTSUPP;
+ if (fd >= 0) {
+ prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+ }
+
+ xdp.command = XDP_SETUP_PROG;
+ xdp.prog = prog;
+ err = ops->ndo_xdp(dev, &xdp);
+ if (err < 0 && prog)
+ bpf_prog_put(prog);
+
+ return err;
+}
+EXPORT_SYMBOL(dev_change_xdp_fd);
+
+/**
* dev_new_index - allocate an ifindex
* @net: the applicable net namespace
*
@@ -6545,8 +6740,8 @@ static void rollback_registered_many(struct list_head *head)
unlist_netdevice(dev);
dev->reg_state = NETREG_UNREGISTERING;
- on_each_cpu(flush_backlog, dev, 1);
}
+ flush_all_backlogs();
synchronize_net();
@@ -6676,6 +6871,10 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
features &= ~NETIF_F_TSO6;
}
+ /* TSO with IPv4 ID mangling requires IPv4 TSO be enabled */
+ if ((features & NETIF_F_TSO_MANGLEID) && !(features & NETIF_F_TSO))
+ features &= ~NETIF_F_TSO_MANGLEID;
+
/* TSO ECN requires that TSO is present as well. */
if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)
features &= ~NETIF_F_TSO_ECN;
@@ -6704,6 +6903,14 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
}
}
+ /* GSO partial features require GSO partial be set */
+ if ((features & dev->gso_partial_features) &&
+ !(features & NETIF_F_GSO_PARTIAL)) {
+ netdev_dbg(dev,
+ "Dropping partially supported GSO features since no GSO partial.\n");
+ features &= ~dev->gso_partial_features;
+ }
+
#ifdef CONFIG_NET_RX_BUSY_POLL
if (dev->netdev_ops->ndo_busy_poll)
features |= NETIF_F_BUSY_POLL;
@@ -6974,9 +7181,22 @@ int register_netdevice(struct net_device *dev)
dev->features |= NETIF_F_SOFT_FEATURES;
dev->wanted_features = dev->features & dev->hw_features;
- if (!(dev->flags & IFF_LOOPBACK)) {
+ if (!(dev->flags & IFF_LOOPBACK))
dev->hw_features |= NETIF_F_NOCACHE_COPY;
- }
+
+ /* If IPv4 TCP segmentation offload is supported we should also
+ * allow the device to enable segmenting the frame with the option
+ * of ignoring a static IP ID value. This doesn't enable the
+ * feature itself but allows the user to enable it later.
+ */
+ if (dev->hw_features & NETIF_F_TSO)
+ dev->hw_features |= NETIF_F_TSO_MANGLEID;
+ if (dev->vlan_features & NETIF_F_TSO)
+ dev->vlan_features |= NETIF_F_TSO_MANGLEID;
+ if (dev->mpls_features & NETIF_F_TSO)
+ dev->mpls_features |= NETIF_F_TSO_MANGLEID;
+ if (dev->hw_enc_features & NETIF_F_TSO)
+ dev->hw_enc_features |= NETIF_F_TSO_MANGLEID;
/* Make NETIF_F_HIGHDMA inheritable to VLAN devices.
*/
@@ -6984,7 +7204,7 @@ int register_netdevice(struct net_device *dev)
/* Make NETIF_F_SG inheritable to tunnel devices.
*/
- dev->hw_enc_features |= NETIF_F_SG;
+ dev->hw_enc_features |= NETIF_F_SG | NETIF_F_GSO_PARTIAL;
/* Make NETIF_F_SG inheritable to MPLS.
*/
@@ -7427,7 +7647,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev->gso_max_size = GSO_MAX_SIZE;
dev->gso_max_segs = GSO_MAX_SEGS;
- dev->gso_min_segs = 0;
INIT_LIST_HEAD(&dev->napi_list);
INIT_LIST_HEAD(&dev->unreg_list);
@@ -7439,6 +7658,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
INIT_LIST_HEAD(&dev->all_adj_list.lower);
INIT_LIST_HEAD(&dev->ptype_all);
INIT_LIST_HEAD(&dev->ptype_specific);
+#ifdef CONFIG_NET_SCHED
+ hash_init(dev->qdisc_hash);
+#endif
dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
setup(dev);
@@ -8084,8 +8306,11 @@ static int __init net_dev_init(void)
*/
for_each_possible_cpu(i) {
+ struct work_struct *flush = per_cpu_ptr(&flush_works, i);
struct softnet_data *sd = &per_cpu(softnet_data, i);
+ INIT_WORK(flush, flush_backlog);
+
skb_queue_head_init(&sd->input_pkt_queue);
skb_queue_head_init(&sd->process_queue);
INIT_LIST_HEAD(&sd->poll_list);