diff options
Diffstat (limited to 'net/core/dev.c')
-rw-r--r-- | net/core/dev.c | 485 |
1 files changed, 355 insertions, 130 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 5c925ac50b95..f1fe26f66458 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -94,6 +94,7 @@ #include <linux/ethtool.h> #include <linux/notifier.h> #include <linux/skbuff.h> +#include <linux/bpf.h> #include <net/net_namespace.h> #include <net/sock.h> #include <net/busy_poll.h> @@ -139,6 +140,7 @@ #include <linux/hrtimer.h> #include <linux/netfilter_ingress.h> #include <linux/sctp.h> +#include <linux/crash_dump.h> #include "net-sysfs.h" @@ -196,7 +198,7 @@ static inline void dev_base_seq_inc(struct net *net) static inline struct hlist_head *dev_name_hash(struct net *net, const char *name) { - unsigned int hash = full_name_hash(name, strnlen(name, IFNAMSIZ)); + unsigned int hash = full_name_hash(net, name, strnlen(name, IFNAMSIZ)); return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)]; } @@ -1741,7 +1743,7 @@ static inline void net_timestamp_set(struct sk_buff *skb) __net_timestamp(SKB); \ } \ -bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb) +bool is_skb_forwardable(const struct net_device *dev, const struct sk_buff *skb) { unsigned int len; @@ -1850,7 +1852,7 @@ static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb) * taps currently in use. */ -static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) +void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) { struct packet_type *ptype; struct sk_buff *skb2 = NULL; @@ -1907,6 +1909,7 @@ out_unlock: pt_prev->func(skb2, skb->dev, pt_prev, skb->dev); rcu_read_unlock(); } +EXPORT_SYMBOL_GPL(dev_queue_xmit_nit); /** * netif_setup_tc - Handle tc mappings on real_num_tx_queues change @@ -2248,11 +2251,12 @@ EXPORT_SYMBOL(netif_set_real_num_rx_queues); */ int netif_get_num_default_rss_queues(void) { - return min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus()); + return is_kdump_kernel() ? + 1 : min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus()); } EXPORT_SYMBOL(netif_get_num_default_rss_queues); -static inline void __netif_reschedule(struct Qdisc *q) +static void __netif_reschedule(struct Qdisc *q) { struct softnet_data *sd; unsigned long flags; @@ -2419,7 +2423,7 @@ EXPORT_SYMBOL(__skb_tx_hash); static void skb_warn_bad_offload(const struct sk_buff *skb) { - static const netdev_features_t null_features = 0; + static const netdev_features_t null_features; struct net_device *dev = skb->dev; const char *name = ""; @@ -2711,6 +2715,19 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb, return ERR_PTR(err); } + /* Only report GSO partial support if it will enable us to + * support segmentation on this frame without needing additional + * work. + */ + if (features & NETIF_F_GSO_PARTIAL) { + netdev_features_t partial_features = NETIF_F_GSO_ROBUST; + struct net_device *dev = skb->dev; + + partial_features |= dev->features & dev->gso_partial_features; + if (!skb_gso_ok(skb, features | partial_features)) + features &= ~NETIF_F_GSO_PARTIAL; + } + BUILD_BUG_ON(SKB_SGO_CB_OFFSET + sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb)); @@ -2825,14 +2842,45 @@ static netdev_features_t dflt_features_check(const struct sk_buff *skb, return vlan_features_check(skb, features); } +static netdev_features_t gso_features_check(const struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +{ + u16 gso_segs = skb_shinfo(skb)->gso_segs; + + if (gso_segs > dev->gso_max_segs) + return features & ~NETIF_F_GSO_MASK; + + /* Support for GSO partial features requires software + * intervention before we can actually process the packets + * so we need to strip support for any partial features now + * and we can pull them back in after we have partially + * segmented the frame. + */ + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL)) + features &= ~dev->gso_partial_features; + + /* Make sure to clear the IPv4 ID mangling feature if the + * IPv4 header has the potential to be fragmented. + */ + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) { + struct iphdr *iph = skb->encapsulation ? + inner_ip_hdr(skb) : ip_hdr(skb); + + if (!(iph->frag_off & htons(IP_DF))) + features &= ~NETIF_F_TSO_MANGLEID; + } + + return features; +} + netdev_features_t netif_skb_features(struct sk_buff *skb) { struct net_device *dev = skb->dev; netdev_features_t features = dev->features; - u16 gso_segs = skb_shinfo(skb)->gso_segs; - if (gso_segs > dev->gso_max_segs || gso_segs < dev->gso_min_segs) - features &= ~NETIF_F_GSO_MASK; + if (skb_is_gso(skb)) + features = gso_features_check(skb, dev, features); /* If encapsulation offload request, verify we are testing * hardware encapsulation features instead of standard @@ -2915,9 +2963,6 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device { netdev_features_t features; - if (skb->next) - return skb; - features = netif_skb_features(skb); skb = validate_xmit_vlan(skb, features); if (unlikely(!skb)) @@ -2960,6 +3005,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device out_kfree_skb: kfree_skb(skb); out_null: + atomic_long_inc(&dev->tx_dropped); return NULL; } @@ -3025,6 +3071,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, struct netdev_queue *txq) { spinlock_t *root_lock = qdisc_lock(q); + struct sk_buff *to_free = NULL; bool contended; int rc; @@ -3032,7 +3079,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, /* * Heuristic to force contended enqueues to serialize on a * separate lock before trying to get qdisc main lock. - * This permits __QDISC___STATE_RUNNING owner to get the lock more + * This permits qdisc->running owner to get the lock more * often and dequeue packets faster. */ contended = qdisc_is_running(q); @@ -3041,7 +3088,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, spin_lock(root_lock); if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { - kfree_skb(skb); + __qdisc_drop(skb, &to_free); rc = NET_XMIT_DROP; } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) && qdisc_run_begin(q)) { @@ -3064,7 +3111,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, rc = NET_XMIT_SUCCESS; } else { - rc = q->enqueue(skb, q) & NET_XMIT_MASK; + rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; if (qdisc_run_begin(q)) { if (unlikely(contended)) { spin_unlock(&q->busylock); @@ -3074,6 +3121,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, } } spin_unlock(root_lock); + if (unlikely(to_free)) + kfree_skb_list(to_free); if (unlikely(contended)) spin_unlock(&q->busylock); return rc; @@ -3099,8 +3148,6 @@ static void skb_update_prio(struct sk_buff *skb) DEFINE_PER_CPU(int, xmit_recursion); EXPORT_SYMBOL(xmit_recursion); -#define RECURSION_LIMIT 10 - /** * dev_loopback_xmit - loop back @skb * @net: network namespace this loopback is happening in @@ -3143,12 +3190,12 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) case TC_ACT_SHOT: qdisc_qstats_cpu_drop(cl->q); *ret = NET_XMIT_DROP; - goto drop; + kfree_skb(skb); + return NULL; case TC_ACT_STOLEN: case TC_ACT_QUEUED: *ret = NET_XMIT_SUCCESS; -drop: - kfree_skb(skb); + consume_skb(skb); return NULL; case TC_ACT_REDIRECT: /* No need to push/pop skb's mac_header here on egress! */ @@ -3308,16 +3355,6 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) else skb_dst_force(skb); -#ifdef CONFIG_NET_SWITCHDEV - /* Don't forward if offload device already forwarded */ - if (skb->offload_fwd_mark && - skb->offload_fwd_mark == dev->offload_fwd_mark) { - consume_skb(skb); - rc = NET_XMIT_SUCCESS; - goto out; - } -#endif - txq = netdev_pick_tx(dev, skb, accel_priv); q = rcu_dereference_bh(txq->qdisc); @@ -3343,13 +3380,13 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) int cpu = smp_processor_id(); /* ok because BHs are off */ if (txq->xmit_lock_owner != cpu) { - - if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT) + if (unlikely(__this_cpu_read(xmit_recursion) > + XMIT_RECURSION_LIMIT)) goto recursion_alert; skb = validate_xmit_skb(skb, dev); if (!skb) - goto drop; + goto out; HARD_TX_LOCK(dev, txq, cpu); @@ -3376,7 +3413,6 @@ recursion_alert: } rc = -ENETDOWN; -drop: rcu_read_unlock_bh(); atomic_long_inc(&dev->tx_dropped); @@ -3428,6 +3464,7 @@ u32 rps_cpu_mask __read_mostly; EXPORT_SYMBOL(rps_cpu_mask); struct static_key rps_needed __read_mostly; +EXPORT_SYMBOL(rps_needed); static struct rps_dev_flow * set_rps_cpu(struct net_device *dev, struct sk_buff *skb, @@ -3855,28 +3892,19 @@ static void net_tx_action(struct softirq_action *h) head = head->next_sched; root_lock = qdisc_lock(q); - if (spin_trylock(root_lock)) { - smp_mb__before_atomic(); - clear_bit(__QDISC_STATE_SCHED, - &q->state); - qdisc_run(q); - spin_unlock(root_lock); - } else { - if (!test_bit(__QDISC_STATE_DEACTIVATED, - &q->state)) { - __netif_reschedule(q); - } else { - smp_mb__before_atomic(); - clear_bit(__QDISC_STATE_SCHED, - &q->state); - } - } + spin_lock(root_lock); + /* We need to make sure head->next_sched is read + * before clearing __QDISC_STATE_SCHED + */ + smp_mb__before_atomic(); + clear_bit(__QDISC_STATE_SCHED, &q->state); + qdisc_run(q); + spin_unlock(root_lock); } } } -#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \ - (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)) +#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_ATM_LANE) /* This hook is defined here for ATM LANE */ int (*br_fdb_test_addr_hook)(struct net_device *dev, unsigned char *addr) __read_mostly; @@ -3914,9 +3942,11 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, break; case TC_ACT_SHOT: qdisc_qstats_cpu_drop(cl->q); + kfree_skb(skb); + return NULL; case TC_ACT_STOLEN: case TC_ACT_QUEUED: - kfree_skb(skb); + consume_skb(skb); return NULL; case TC_ACT_REDIRECT: /* skb_mac_header check was done by cls/act_bpf, so @@ -3934,6 +3964,22 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, } /** + * netdev_is_rx_handler_busy - check if receive handler is registered + * @dev: device to check + * + * Check if a receive handler is already registered for a given device. + * Return true if there one. + * + * The caller must hold the rtnl_mutex. + */ +bool netdev_is_rx_handler_busy(struct net_device *dev) +{ + ASSERT_RTNL(); + return dev && rtnl_dereference(dev->rx_handler); +} +EXPORT_SYMBOL_GPL(netdev_is_rx_handler_busy); + +/** * netdev_rx_handler_register - register receive handler * @dev: device to register a handler for * @rx_handler: receive handler to register @@ -4009,12 +4055,17 @@ static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev, { #ifdef CONFIG_NETFILTER_INGRESS if (nf_hook_ingress_active(skb)) { + int ingress_retval; + if (*pt_prev) { *ret = deliver_skb(skb, *pt_prev, orig_dev); *pt_prev = NULL; } - return nf_hook_ingress(skb); + rcu_read_lock(); + ingress_retval = nf_hook_ingress(skb); + rcu_read_unlock(); + return ingress_retval; } #endif /* CONFIG_NETFILTER_INGRESS */ return 0; @@ -4251,32 +4302,53 @@ int netif_receive_skb(struct sk_buff *skb) } EXPORT_SYMBOL(netif_receive_skb); -/* Network device is going away, flush any packets still pending - * Called with irqs disabled. - */ -static void flush_backlog(void *arg) +DEFINE_PER_CPU(struct work_struct, flush_works); + +/* Network device is going away, flush any packets still pending */ +static void flush_backlog(struct work_struct *work) { - struct net_device *dev = arg; - struct softnet_data *sd = this_cpu_ptr(&softnet_data); struct sk_buff *skb, *tmp; + struct softnet_data *sd; + + local_bh_disable(); + sd = this_cpu_ptr(&softnet_data); + local_irq_disable(); rps_lock(sd); skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) { - if (skb->dev == dev) { + if (skb->dev->reg_state == NETREG_UNREGISTERING) { __skb_unlink(skb, &sd->input_pkt_queue); kfree_skb(skb); input_queue_head_incr(sd); } } rps_unlock(sd); + local_irq_enable(); skb_queue_walk_safe(&sd->process_queue, skb, tmp) { - if (skb->dev == dev) { + if (skb->dev->reg_state == NETREG_UNREGISTERING) { __skb_unlink(skb, &sd->process_queue); kfree_skb(skb); input_queue_head_incr(sd); } } + local_bh_enable(); +} + +static void flush_all_backlogs(void) +{ + unsigned int cpu; + + get_online_cpus(); + + for_each_online_cpu(cpu) + queue_work_on(cpu, system_highpri_wq, + per_cpu_ptr(&flush_works, cpu)); + + for_each_online_cpu(cpu) + flush_work(per_cpu_ptr(&flush_works, cpu)); + + put_online_cpus(); } static int napi_gro_complete(struct sk_buff *skb) @@ -4440,6 +4512,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff NAPI_GRO_CB(skb)->free = 0; NAPI_GRO_CB(skb)->encap_mark = 0; NAPI_GRO_CB(skb)->is_fou = 0; + NAPI_GRO_CB(skb)->is_atomic = 1; NAPI_GRO_CB(skb)->gro_remcsum_start = 0; /* Setup for GRO checksum validation */ @@ -4664,6 +4737,8 @@ static struct sk_buff *napi_frags_skb(struct napi_struct *napi) if (unlikely(skb_gro_header_hard(skb, hlen))) { eth = skb_gro_header_slow(skb, hlen, 0); if (unlikely(!eth)) { + net_warn_ratelimited("%s: dropping impossible skb from %s\n", + __func__, napi->dev->name); napi_reuse_skb(napi, skb); return NULL; } @@ -4761,8 +4836,9 @@ static bool sd_has_rps_ipi_waiting(struct softnet_data *sd) static int process_backlog(struct napi_struct *napi, int quota) { - int work = 0; struct softnet_data *sd = container_of(napi, struct softnet_data, backlog); + bool again = true; + int work = 0; /* Check if we have pending ipi, its better to send them now, * not waiting net_rx_action() end. @@ -4773,23 +4849,20 @@ static int process_backlog(struct napi_struct *napi, int quota) } napi->weight = weight_p; - local_irq_disable(); - while (1) { + while (again) { struct sk_buff *skb; while ((skb = __skb_dequeue(&sd->process_queue))) { rcu_read_lock(); - local_irq_enable(); __netif_receive_skb(skb); rcu_read_unlock(); - local_irq_disable(); input_queue_head_incr(sd); - if (++work >= quota) { - local_irq_enable(); + if (++work >= quota) return work; - } + } + local_irq_disable(); rps_lock(sd); if (skb_queue_empty(&sd->input_pkt_queue)) { /* @@ -4801,16 +4874,14 @@ static int process_backlog(struct napi_struct *napi, int quota) * and we dont need an smp_mb() memory barrier. */ napi->state = 0; - rps_unlock(sd); - - break; + again = false; + } else { + skb_queue_splice_tail_init(&sd->input_pkt_queue, + &sd->process_queue); } - - skb_queue_splice_tail_init(&sd->input_pkt_queue, - &sd->process_queue); rps_unlock(sd); + local_irq_enable(); } - local_irq_enable(); return work; } @@ -4929,7 +5000,7 @@ bool sk_busy_loop(struct sock *sk, int nonblock) if (test_bit(NAPI_STATE_SCHED, &napi->state)) { rc = napi->poll(napi, BUSY_POLL_BUDGET); - trace_napi_poll(napi); + trace_napi_poll(napi, rc, BUSY_POLL_BUDGET); if (rc == BUSY_POLL_BUDGET) { napi_complete_done(napi, rc); napi_schedule(napi); @@ -4938,8 +5009,8 @@ bool sk_busy_loop(struct sock *sk, int nonblock) netpoll_poll_unlock(have); } if (rc > 0) - NET_ADD_STATS_BH(sock_net(sk), - LINUX_MIB_BUSYPOLLRXPACKETS, rc); + __NET_ADD_STATS(sock_net(sk), + LINUX_MIB_BUSYPOLLRXPACKETS, rc); local_bh_enable(); if (rc == LL_FLUSH_FAILED) @@ -5085,7 +5156,7 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll) work = 0; if (test_bit(NAPI_STATE_SCHED, &n->state)) { work = n->poll(n, weight); - trace_napi_poll(n); + trace_napi_poll(n, work, weight); } WARN_ON_ONCE(work > weight); @@ -5402,6 +5473,52 @@ void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter) EXPORT_SYMBOL(netdev_lower_get_next); /** + * netdev_all_lower_get_next - Get the next device from all lower neighbour list + * @dev: device + * @iter: list_head ** of the current position + * + * Gets the next netdev_adjacent from the dev's all lower neighbour + * list, starting from iter position. The caller must hold RTNL lock or + * its own locking that guarantees that the neighbour all lower + * list will remain unchanged. + */ +struct net_device *netdev_all_lower_get_next(struct net_device *dev, struct list_head **iter) +{ + struct netdev_adjacent *lower; + + lower = list_entry(*iter, struct netdev_adjacent, list); + + if (&lower->list == &dev->all_adj_list.lower) + return NULL; + + *iter = lower->list.next; + + return lower->dev; +} +EXPORT_SYMBOL(netdev_all_lower_get_next); + +/** + * netdev_all_lower_get_next_rcu - Get the next device from all + * lower neighbour list, RCU variant + * @dev: device + * @iter: list_head ** of the current position + * + * Gets the next netdev_adjacent from the dev's all lower neighbour + * list, starting from iter position. The caller must hold RCU read lock. + */ +struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev, + struct list_head **iter) +{ + struct netdev_adjacent *lower; + + lower = list_first_or_null_rcu(&dev->all_adj_list.lower, + struct netdev_adjacent, list); + + return lower ? lower->dev : NULL; +} +EXPORT_SYMBOL(netdev_all_lower_get_next_rcu); + +/** * netdev_lower_get_first_private_rcu - Get the first ->private from the * lower neighbour list, RCU * variant @@ -5472,6 +5589,7 @@ static inline bool netdev_adjacent_is_neigh_list(struct net_device *dev, static int __netdev_adjacent_dev_insert(struct net_device *dev, struct net_device *adj_dev, + u16 ref_nr, struct list_head *dev_list, void *private, bool master) { @@ -5481,7 +5599,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, adj = __netdev_find_adj(adj_dev, dev_list); if (adj) { - adj->ref_nr++; + adj->ref_nr += ref_nr; return 0; } @@ -5491,7 +5609,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, adj->dev = adj_dev; adj->master = master; - adj->ref_nr = 1; + adj->ref_nr = ref_nr; adj->private = private; dev_hold(adj_dev); @@ -5530,6 +5648,7 @@ free_adj: static void __netdev_adjacent_dev_remove(struct net_device *dev, struct net_device *adj_dev, + u16 ref_nr, struct list_head *dev_list) { struct netdev_adjacent *adj; @@ -5542,10 +5661,10 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev, BUG(); } - if (adj->ref_nr > 1) { - pr_debug("%s to %s ref_nr-- = %d\n", dev->name, adj_dev->name, - adj->ref_nr-1); - adj->ref_nr--; + if (adj->ref_nr > ref_nr) { + pr_debug("%s to %s ref_nr-%d = %d\n", dev->name, adj_dev->name, + ref_nr, adj->ref_nr-ref_nr); + adj->ref_nr -= ref_nr; return; } @@ -5564,21 +5683,22 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev, static int __netdev_adjacent_dev_link_lists(struct net_device *dev, struct net_device *upper_dev, + u16 ref_nr, struct list_head *up_list, struct list_head *down_list, void *private, bool master) { int ret; - ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list, private, - master); + ret = __netdev_adjacent_dev_insert(dev, upper_dev, ref_nr, up_list, + private, master); if (ret) return ret; - ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list, private, - false); + ret = __netdev_adjacent_dev_insert(upper_dev, dev, ref_nr, down_list, + private, false); if (ret) { - __netdev_adjacent_dev_remove(dev, upper_dev, up_list); + __netdev_adjacent_dev_remove(dev, upper_dev, ref_nr, up_list); return ret; } @@ -5586,9 +5706,10 @@ static int __netdev_adjacent_dev_link_lists(struct net_device *dev, } static int __netdev_adjacent_dev_link(struct net_device *dev, - struct net_device *upper_dev) + struct net_device *upper_dev, + u16 ref_nr) { - return __netdev_adjacent_dev_link_lists(dev, upper_dev, + return __netdev_adjacent_dev_link_lists(dev, upper_dev, ref_nr, &dev->all_adj_list.upper, &upper_dev->all_adj_list.lower, NULL, false); @@ -5596,17 +5717,19 @@ static int __netdev_adjacent_dev_link(struct net_device *dev, static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev, struct net_device *upper_dev, + u16 ref_nr, struct list_head *up_list, struct list_head *down_list) { - __netdev_adjacent_dev_remove(dev, upper_dev, up_list); - __netdev_adjacent_dev_remove(upper_dev, dev, down_list); + __netdev_adjacent_dev_remove(dev, upper_dev, ref_nr, up_list); + __netdev_adjacent_dev_remove(upper_dev, dev, ref_nr, down_list); } static void __netdev_adjacent_dev_unlink(struct net_device *dev, - struct net_device *upper_dev) + struct net_device *upper_dev, + u16 ref_nr) { - __netdev_adjacent_dev_unlink_lists(dev, upper_dev, + __netdev_adjacent_dev_unlink_lists(dev, upper_dev, ref_nr, &dev->all_adj_list.upper, &upper_dev->all_adj_list.lower); } @@ -5615,17 +5738,17 @@ static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, struct net_device *upper_dev, void *private, bool master) { - int ret = __netdev_adjacent_dev_link(dev, upper_dev); + int ret = __netdev_adjacent_dev_link(dev, upper_dev, 1); if (ret) return ret; - ret = __netdev_adjacent_dev_link_lists(dev, upper_dev, + ret = __netdev_adjacent_dev_link_lists(dev, upper_dev, 1, &dev->adj_list.upper, &upper_dev->adj_list.lower, private, master); if (ret) { - __netdev_adjacent_dev_unlink(dev, upper_dev); + __netdev_adjacent_dev_unlink(dev, upper_dev, 1); return ret; } @@ -5635,8 +5758,8 @@ static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, struct net_device *upper_dev) { - __netdev_adjacent_dev_unlink(dev, upper_dev); - __netdev_adjacent_dev_unlink_lists(dev, upper_dev, + __netdev_adjacent_dev_unlink(dev, upper_dev, 1); + __netdev_adjacent_dev_unlink_lists(dev, upper_dev, 1, &dev->adj_list.upper, &upper_dev->adj_list.lower); } @@ -5689,7 +5812,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) { pr_debug("Interlinking %s with %s, non-neighbour\n", i->dev->name, j->dev->name); - ret = __netdev_adjacent_dev_link(i->dev, j->dev); + ret = __netdev_adjacent_dev_link(i->dev, j->dev, i->ref_nr); if (ret) goto rollback_mesh; } @@ -5699,7 +5822,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) { pr_debug("linking %s's upper device %s with %s\n", upper_dev->name, i->dev->name, dev->name); - ret = __netdev_adjacent_dev_link(dev, i->dev); + ret = __netdev_adjacent_dev_link(dev, i->dev, i->ref_nr); if (ret) goto rollback_upper_mesh; } @@ -5708,7 +5831,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, list_for_each_entry(i, &dev->all_adj_list.lower, list) { pr_debug("linking %s's lower device %s with %s\n", dev->name, i->dev->name, upper_dev->name); - ret = __netdev_adjacent_dev_link(i->dev, upper_dev); + ret = __netdev_adjacent_dev_link(i->dev, upper_dev, i->ref_nr); if (ret) goto rollback_lower_mesh; } @@ -5726,7 +5849,7 @@ rollback_lower_mesh: list_for_each_entry(i, &dev->all_adj_list.lower, list) { if (i == to_i) break; - __netdev_adjacent_dev_unlink(i->dev, upper_dev); + __netdev_adjacent_dev_unlink(i->dev, upper_dev, i->ref_nr); } i = NULL; @@ -5736,7 +5859,7 @@ rollback_upper_mesh: list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) { if (i == to_i) break; - __netdev_adjacent_dev_unlink(dev, i->dev); + __netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr); } i = j = NULL; @@ -5748,7 +5871,7 @@ rollback_mesh: list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) { if (i == to_i && j == to_j) break; - __netdev_adjacent_dev_unlink(i->dev, j->dev); + __netdev_adjacent_dev_unlink(i->dev, j->dev, i->ref_nr); } if (i == to_i) break; @@ -5828,16 +5951,16 @@ void netdev_upper_dev_unlink(struct net_device *dev, */ list_for_each_entry(i, &dev->all_adj_list.lower, list) list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) - __netdev_adjacent_dev_unlink(i->dev, j->dev); + __netdev_adjacent_dev_unlink(i->dev, j->dev, i->ref_nr); /* remove also the devices itself from lower/upper device * list */ list_for_each_entry(i, &dev->all_adj_list.lower, list) - __netdev_adjacent_dev_unlink(i->dev, upper_dev); + __netdev_adjacent_dev_unlink(i->dev, upper_dev, i->ref_nr); list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) - __netdev_adjacent_dev_unlink(dev, i->dev); + __netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr); call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, &changeupper_info.info); @@ -5871,7 +5994,7 @@ static void netdev_adjacent_add_links(struct net_device *dev) struct net *net = dev_net(dev); list_for_each_entry(iter, &dev->adj_list.upper, list) { - if (!net_eq(net,dev_net(iter->dev))) + if (!net_eq(net, dev_net(iter->dev))) continue; netdev_adjacent_sysfs_add(iter->dev, dev, &iter->dev->adj_list.lower); @@ -5880,7 +6003,7 @@ static void netdev_adjacent_add_links(struct net_device *dev) } list_for_each_entry(iter, &dev->adj_list.lower, list) { - if (!net_eq(net,dev_net(iter->dev))) + if (!net_eq(net, dev_net(iter->dev))) continue; netdev_adjacent_sysfs_add(iter->dev, dev, &iter->dev->adj_list.upper); @@ -5896,7 +6019,7 @@ static void netdev_adjacent_del_links(struct net_device *dev) struct net *net = dev_net(dev); list_for_each_entry(iter, &dev->adj_list.upper, list) { - if (!net_eq(net,dev_net(iter->dev))) + if (!net_eq(net, dev_net(iter->dev))) continue; netdev_adjacent_sysfs_del(iter->dev, dev->name, &iter->dev->adj_list.lower); @@ -5905,7 +6028,7 @@ static void netdev_adjacent_del_links(struct net_device *dev) } list_for_each_entry(iter, &dev->adj_list.lower, list) { - if (!net_eq(net,dev_net(iter->dev))) + if (!net_eq(net, dev_net(iter->dev))) continue; netdev_adjacent_sysfs_del(iter->dev, dev->name, &iter->dev->adj_list.upper); @@ -5921,7 +6044,7 @@ void netdev_adjacent_rename_links(struct net_device *dev, char *oldname) struct net *net = dev_net(dev); list_for_each_entry(iter, &dev->adj_list.upper, list) { - if (!net_eq(net,dev_net(iter->dev))) + if (!net_eq(net, dev_net(iter->dev))) continue; netdev_adjacent_sysfs_del(iter->dev, oldname, &iter->dev->adj_list.lower); @@ -5930,7 +6053,7 @@ void netdev_adjacent_rename_links(struct net_device *dev, char *oldname) } list_for_each_entry(iter, &dev->adj_list.lower, list) { - if (!net_eq(net,dev_net(iter->dev))) + if (!net_eq(net, dev_net(iter->dev))) continue; netdev_adjacent_sysfs_del(iter->dev, oldname, &iter->dev->adj_list.upper); @@ -5955,8 +6078,7 @@ void *netdev_lower_dev_get_private(struct net_device *dev, EXPORT_SYMBOL(netdev_lower_dev_get_private); -int dev_get_nest_level(struct net_device *dev, - bool (*type_check)(const struct net_device *dev)) +int dev_get_nest_level(struct net_device *dev) { struct net_device *lower = NULL; struct list_head *iter; @@ -5966,15 +6088,12 @@ int dev_get_nest_level(struct net_device *dev, ASSERT_RTNL(); netdev_for_each_lower_dev(dev, lower, iter) { - nest = dev_get_nest_level(lower, type_check); + nest = dev_get_nest_level(lower); if (max_nest < nest) max_nest = nest; } - if (type_check(dev)) - max_nest++; - - return max_nest; + return max_nest + 1; } EXPORT_SYMBOL(dev_get_nest_level); @@ -5998,6 +6117,50 @@ void netdev_lower_state_changed(struct net_device *lower_dev, } EXPORT_SYMBOL(netdev_lower_state_changed); +int netdev_default_l2upper_neigh_construct(struct net_device *dev, + struct neighbour *n) +{ + struct net_device *lower_dev, *stop_dev; + struct list_head *iter; + int err; + + netdev_for_each_lower_dev(dev, lower_dev, iter) { + if (!lower_dev->netdev_ops->ndo_neigh_construct) + continue; + err = lower_dev->netdev_ops->ndo_neigh_construct(lower_dev, n); + if (err) { + stop_dev = lower_dev; + goto rollback; + } + } + return 0; + +rollback: + netdev_for_each_lower_dev(dev, lower_dev, iter) { + if (lower_dev == stop_dev) + break; + if (!lower_dev->netdev_ops->ndo_neigh_destroy) + continue; + lower_dev->netdev_ops->ndo_neigh_destroy(lower_dev, n); + } + return err; +} +EXPORT_SYMBOL_GPL(netdev_default_l2upper_neigh_construct); + +void netdev_default_l2upper_neigh_destroy(struct net_device *dev, + struct neighbour *n) +{ + struct net_device *lower_dev; + struct list_head *iter; + + netdev_for_each_lower_dev(dev, lower_dev, iter) { + if (!lower_dev->netdev_ops->ndo_neigh_destroy) + continue; + lower_dev->netdev_ops->ndo_neigh_destroy(lower_dev, n); + } +} +EXPORT_SYMBOL_GPL(netdev_default_l2upper_neigh_destroy); + static void dev_change_rx_flags(struct net_device *dev, int flags) { const struct net_device_ops *ops = dev->netdev_ops; @@ -6482,6 +6645,38 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down) EXPORT_SYMBOL(dev_change_proto_down); /** + * dev_change_xdp_fd - set or clear a bpf program for a device rx path + * @dev: device + * @fd: new program fd or negative value to clear + * + * Set or clear a bpf program for a device + */ +int dev_change_xdp_fd(struct net_device *dev, int fd) +{ + const struct net_device_ops *ops = dev->netdev_ops; + struct bpf_prog *prog = NULL; + struct netdev_xdp xdp = {}; + int err; + + if (!ops->ndo_xdp) + return -EOPNOTSUPP; + if (fd >= 0) { + prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP); + if (IS_ERR(prog)) + return PTR_ERR(prog); + } + + xdp.command = XDP_SETUP_PROG; + xdp.prog = prog; + err = ops->ndo_xdp(dev, &xdp); + if (err < 0 && prog) + bpf_prog_put(prog); + + return err; +} +EXPORT_SYMBOL(dev_change_xdp_fd); + +/** * dev_new_index - allocate an ifindex * @net: the applicable net namespace * @@ -6545,8 +6740,8 @@ static void rollback_registered_many(struct list_head *head) unlist_netdevice(dev); dev->reg_state = NETREG_UNREGISTERING; - on_each_cpu(flush_backlog, dev, 1); } + flush_all_backlogs(); synchronize_net(); @@ -6676,6 +6871,10 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, features &= ~NETIF_F_TSO6; } + /* TSO with IPv4 ID mangling requires IPv4 TSO be enabled */ + if ((features & NETIF_F_TSO_MANGLEID) && !(features & NETIF_F_TSO)) + features &= ~NETIF_F_TSO_MANGLEID; + /* TSO ECN requires that TSO is present as well. */ if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN) features &= ~NETIF_F_TSO_ECN; @@ -6704,6 +6903,14 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, } } + /* GSO partial features require GSO partial be set */ + if ((features & dev->gso_partial_features) && + !(features & NETIF_F_GSO_PARTIAL)) { + netdev_dbg(dev, + "Dropping partially supported GSO features since no GSO partial.\n"); + features &= ~dev->gso_partial_features; + } + #ifdef CONFIG_NET_RX_BUSY_POLL if (dev->netdev_ops->ndo_busy_poll) features |= NETIF_F_BUSY_POLL; @@ -6974,9 +7181,22 @@ int register_netdevice(struct net_device *dev) dev->features |= NETIF_F_SOFT_FEATURES; dev->wanted_features = dev->features & dev->hw_features; - if (!(dev->flags & IFF_LOOPBACK)) { + if (!(dev->flags & IFF_LOOPBACK)) dev->hw_features |= NETIF_F_NOCACHE_COPY; - } + + /* If IPv4 TCP segmentation offload is supported we should also + * allow the device to enable segmenting the frame with the option + * of ignoring a static IP ID value. This doesn't enable the + * feature itself but allows the user to enable it later. + */ + if (dev->hw_features & NETIF_F_TSO) + dev->hw_features |= NETIF_F_TSO_MANGLEID; + if (dev->vlan_features & NETIF_F_TSO) + dev->vlan_features |= NETIF_F_TSO_MANGLEID; + if (dev->mpls_features & NETIF_F_TSO) + dev->mpls_features |= NETIF_F_TSO_MANGLEID; + if (dev->hw_enc_features & NETIF_F_TSO) + dev->hw_enc_features |= NETIF_F_TSO_MANGLEID; /* Make NETIF_F_HIGHDMA inheritable to VLAN devices. */ @@ -6984,7 +7204,7 @@ int register_netdevice(struct net_device *dev) /* Make NETIF_F_SG inheritable to tunnel devices. */ - dev->hw_enc_features |= NETIF_F_SG; + dev->hw_enc_features |= NETIF_F_SG | NETIF_F_GSO_PARTIAL; /* Make NETIF_F_SG inheritable to MPLS. */ @@ -7427,7 +7647,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->gso_max_size = GSO_MAX_SIZE; dev->gso_max_segs = GSO_MAX_SEGS; - dev->gso_min_segs = 0; INIT_LIST_HEAD(&dev->napi_list); INIT_LIST_HEAD(&dev->unreg_list); @@ -7439,6 +7658,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, INIT_LIST_HEAD(&dev->all_adj_list.lower); INIT_LIST_HEAD(&dev->ptype_all); INIT_LIST_HEAD(&dev->ptype_specific); +#ifdef CONFIG_NET_SCHED + hash_init(dev->qdisc_hash); +#endif dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM; setup(dev); @@ -8084,8 +8306,11 @@ static int __init net_dev_init(void) */ for_each_possible_cpu(i) { + struct work_struct *flush = per_cpu_ptr(&flush_works, i); struct softnet_data *sd = &per_cpu(softnet_data, i); + INIT_WORK(flush, flush_backlog); + skb_queue_head_init(&sd->input_pkt_queue); skb_queue_head_init(&sd->process_queue); INIT_LIST_HEAD(&sd->poll_list); |