diff options
Diffstat (limited to 'net/core/dev.c')
-rw-r--r-- | net/core/dev.c | 595 |
1 files changed, 525 insertions, 70 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index f64e439b4a00..862eaa744a54 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1857,6 +1857,228 @@ static void netif_setup_tc(struct net_device *dev, unsigned int txq) } } +#ifdef CONFIG_XPS +static DEFINE_MUTEX(xps_map_mutex); +#define xmap_dereference(P) \ + rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex)) + +static struct xps_map *remove_xps_queue(struct xps_dev_maps *dev_maps, + int cpu, u16 index) +{ + struct xps_map *map = NULL; + int pos; + + if (dev_maps) + map = xmap_dereference(dev_maps->cpu_map[cpu]); + + for (pos = 0; map && pos < map->len; pos++) { + if (map->queues[pos] == index) { + if (map->len > 1) { + map->queues[pos] = map->queues[--map->len]; + } else { + RCU_INIT_POINTER(dev_maps->cpu_map[cpu], NULL); + kfree_rcu(map, rcu); + map = NULL; + } + break; + } + } + + return map; +} + +static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index) +{ + struct xps_dev_maps *dev_maps; + int cpu, i; + bool active = false; + + mutex_lock(&xps_map_mutex); + dev_maps = xmap_dereference(dev->xps_maps); + + if (!dev_maps) + goto out_no_maps; + + for_each_possible_cpu(cpu) { + for (i = index; i < dev->num_tx_queues; i++) { + if (!remove_xps_queue(dev_maps, cpu, i)) + break; + } + if (i == dev->num_tx_queues) + active = true; + } + + if (!active) { + RCU_INIT_POINTER(dev->xps_maps, NULL); + kfree_rcu(dev_maps, rcu); + } + + for (i = index; i < dev->num_tx_queues; i++) + netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i), + NUMA_NO_NODE); + +out_no_maps: + mutex_unlock(&xps_map_mutex); +} + +static struct xps_map *expand_xps_map(struct xps_map *map, + int cpu, u16 index) +{ + struct xps_map *new_map; + int alloc_len = XPS_MIN_MAP_ALLOC; + int i, pos; + + for (pos = 0; map && pos < map->len; pos++) { + if (map->queues[pos] != index) + continue; + return map; + } + + /* Need to add queue to this CPU's existing map */ + if (map) { + if (pos < map->alloc_len) + return map; + + alloc_len = map->alloc_len * 2; + } + + /* Need to allocate new map to store queue on this CPU's map */ + new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL, + cpu_to_node(cpu)); + if (!new_map) + return NULL; + + for (i = 0; i < pos; i++) + new_map->queues[i] = map->queues[i]; + new_map->alloc_len = alloc_len; + new_map->len = pos; + + return new_map; +} + +int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask, u16 index) +{ + struct xps_dev_maps *dev_maps, *new_dev_maps = NULL; + struct xps_map *map, *new_map; + int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES); + int cpu, numa_node_id = -2; + bool active = false; + + mutex_lock(&xps_map_mutex); + + dev_maps = xmap_dereference(dev->xps_maps); + + /* allocate memory for queue storage */ + for_each_online_cpu(cpu) { + if (!cpumask_test_cpu(cpu, mask)) + continue; + + if (!new_dev_maps) + new_dev_maps = kzalloc(maps_sz, GFP_KERNEL); + if (!new_dev_maps) + return -ENOMEM; + + map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) : + NULL; + + map = expand_xps_map(map, cpu, index); + if (!map) + goto error; + + RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map); + } + + if (!new_dev_maps) + goto out_no_new_maps; + + for_each_possible_cpu(cpu) { + if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) { + /* add queue to CPU maps */ + int pos = 0; + + map = xmap_dereference(new_dev_maps->cpu_map[cpu]); + while ((pos < map->len) && (map->queues[pos] != index)) + pos++; + + if (pos == map->len) + map->queues[map->len++] = index; +#ifdef CONFIG_NUMA + if (numa_node_id == -2) + numa_node_id = cpu_to_node(cpu); + else if (numa_node_id != cpu_to_node(cpu)) + numa_node_id = -1; +#endif + } else if (dev_maps) { + /* fill in the new device map from the old device map */ + map = xmap_dereference(dev_maps->cpu_map[cpu]); + RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map); + } + + } + + rcu_assign_pointer(dev->xps_maps, new_dev_maps); + + /* Cleanup old maps */ + if (dev_maps) { + for_each_possible_cpu(cpu) { + new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]); + map = xmap_dereference(dev_maps->cpu_map[cpu]); + if (map && map != new_map) + kfree_rcu(map, rcu); + } + + kfree_rcu(dev_maps, rcu); + } + + dev_maps = new_dev_maps; + active = true; + +out_no_new_maps: + /* update Tx queue numa node */ + netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index), + (numa_node_id >= 0) ? numa_node_id : + NUMA_NO_NODE); + + if (!dev_maps) + goto out_no_maps; + + /* removes queue from unused CPUs */ + for_each_possible_cpu(cpu) { + if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) + continue; + + if (remove_xps_queue(dev_maps, cpu, index)) + active = true; + } + + /* free map if not active */ + if (!active) { + RCU_INIT_POINTER(dev->xps_maps, NULL); + kfree_rcu(dev_maps, rcu); + } + +out_no_maps: + mutex_unlock(&xps_map_mutex); + + return 0; +error: + /* remove any maps that we added */ + for_each_possible_cpu(cpu) { + new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]); + map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) : + NULL; + if (new_map && new_map != map) + kfree(new_map); + } + + mutex_unlock(&xps_map_mutex); + + kfree(new_dev_maps); + return -ENOMEM; +} +EXPORT_SYMBOL(netif_set_xps_queue); + +#endif /* * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. @@ -1880,8 +2102,12 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) if (dev->num_tc) netif_setup_tc(dev, txq); - if (txq < dev->real_num_tx_queues) + if (txq < dev->real_num_tx_queues) { qdisc_reset_all_tx_gt(dev, txq); +#ifdef CONFIG_XPS + netif_reset_xps_queues_gt(dev, txq); +#endif + } } dev->real_num_tx_queues = txq; @@ -2495,43 +2721,71 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) #endif } -struct netdev_queue *netdev_pick_tx(struct net_device *dev, - struct sk_buff *skb) +u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb) { - int queue_index; - const struct net_device_ops *ops = dev->netdev_ops; - - if (dev->real_num_tx_queues == 1) - queue_index = 0; - else if (ops->ndo_select_queue) { - queue_index = ops->ndo_select_queue(dev, skb); - queue_index = dev_cap_txqueue(dev, queue_index); - } else { - struct sock *sk = skb->sk; - queue_index = sk_tx_queue_get(sk); - - if (queue_index < 0 || skb->ooo_okay || - queue_index >= dev->real_num_tx_queues) { - int old_index = queue_index; + struct sock *sk = skb->sk; + int queue_index = sk_tx_queue_get(sk); - queue_index = get_xps_queue(dev, skb); - if (queue_index < 0) - queue_index = skb_tx_hash(dev, skb); + if (queue_index < 0 || skb->ooo_okay || + queue_index >= dev->real_num_tx_queues) { + int new_index = get_xps_queue(dev, skb); + if (new_index < 0) + new_index = skb_tx_hash(dev, skb); - if (queue_index != old_index && sk) { - struct dst_entry *dst = + if (queue_index != new_index && sk) { + struct dst_entry *dst = rcu_dereference_check(sk->sk_dst_cache, 1); - if (dst && skb_dst(skb) == dst) - sk_tx_queue_set(sk, queue_index); - } + if (dst && skb_dst(skb) == dst) + sk_tx_queue_set(sk, queue_index); + } + + queue_index = new_index; + } + + return queue_index; +} +EXPORT_SYMBOL(__netdev_pick_tx); + +struct netdev_queue *netdev_pick_tx(struct net_device *dev, + struct sk_buff *skb) +{ + int queue_index = 0; + + if (dev->real_num_tx_queues != 1) { + const struct net_device_ops *ops = dev->netdev_ops; + if (ops->ndo_select_queue) + queue_index = ops->ndo_select_queue(dev, skb); + else + queue_index = __netdev_pick_tx(dev, skb); + queue_index = dev_cap_txqueue(dev, queue_index); } skb_set_queue_mapping(skb, queue_index); return netdev_get_tx_queue(dev, queue_index); } +static void qdisc_pkt_len_init(struct sk_buff *skb) +{ + const struct skb_shared_info *shinfo = skb_shinfo(skb); + + qdisc_skb_cb(skb)->pkt_len = skb->len; + + /* To get more precise estimation of bytes sent on wire, + * we add to pkt_len the headers size of all segments + */ + if (shinfo->gso_size) { + unsigned int hdr_len = skb_transport_offset(skb); + + if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) + hdr_len += tcp_hdrlen(skb); + else + hdr_len += sizeof(struct udphdr); + qdisc_skb_cb(skb)->pkt_len += (shinfo->gso_segs - 1) * hdr_len; + } +} + static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, struct net_device *dev, struct netdev_queue *txq) @@ -2540,7 +2794,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, bool contended; int rc; - qdisc_skb_cb(skb)->pkt_len = skb->len; + qdisc_pkt_len_init(skb); qdisc_calculate_pkt_len(skb, q); /* * Heuristic to force contended enqueues to serialize on a @@ -3352,7 +3606,8 @@ static int __netif_receive_skb(struct sk_buff *skb) orig_dev = skb->dev; skb_reset_network_header(skb); - skb_reset_transport_header(skb); + if (!skb_transport_header_was_set(skb)) + skb_reset_transport_header(skb); skb_reset_mac_len(skb); pt_prev = NULL; @@ -4600,64 +4855,231 @@ static int __init dev_proc_init(void) #endif /* CONFIG_PROC_FS */ +struct netdev_upper { + struct net_device *dev; + bool master; + struct list_head list; + struct rcu_head rcu; + struct list_head search_list; +}; + +static void __append_search_uppers(struct list_head *search_list, + struct net_device *dev) +{ + struct netdev_upper *upper; + + list_for_each_entry(upper, &dev->upper_dev_list, list) { + /* check if this upper is not already in search list */ + if (list_empty(&upper->search_list)) + list_add_tail(&upper->search_list, search_list); + } +} + +static bool __netdev_search_upper_dev(struct net_device *dev, + struct net_device *upper_dev) +{ + LIST_HEAD(search_list); + struct netdev_upper *upper; + struct netdev_upper *tmp; + bool ret = false; + + __append_search_uppers(&search_list, dev); + list_for_each_entry(upper, &search_list, search_list) { + if (upper->dev == upper_dev) { + ret = true; + break; + } + __append_search_uppers(&search_list, upper->dev); + } + list_for_each_entry_safe(upper, tmp, &search_list, search_list) + INIT_LIST_HEAD(&upper->search_list); + return ret; +} + +static struct netdev_upper *__netdev_find_upper(struct net_device *dev, + struct net_device *upper_dev) +{ + struct netdev_upper *upper; + + list_for_each_entry(upper, &dev->upper_dev_list, list) { + if (upper->dev == upper_dev) + return upper; + } + return NULL; +} + /** - * netdev_set_master - set up master pointer - * @slave: slave device - * @master: new master device - * - * Changes the master device of the slave. Pass %NULL to break the - * bonding. The caller must hold the RTNL semaphore. On a failure - * a negative errno code is returned. On success the reference counts - * are adjusted and the function returns zero. + * netdev_has_upper_dev - Check if device is linked to an upper device + * @dev: device + * @upper_dev: upper device to check + * + * Find out if a device is linked to specified upper device and return true + * in case it is. Note that this checks only immediate upper device, + * not through a complete stack of devices. The caller must hold the RTNL lock. */ -int netdev_set_master(struct net_device *slave, struct net_device *master) +bool netdev_has_upper_dev(struct net_device *dev, + struct net_device *upper_dev) { - struct net_device *old = slave->master; + ASSERT_RTNL(); + return __netdev_find_upper(dev, upper_dev); +} +EXPORT_SYMBOL(netdev_has_upper_dev); + +/** + * netdev_has_any_upper_dev - Check if device is linked to some device + * @dev: device + * + * Find out if a device is linked to an upper device and return true in case + * it is. The caller must hold the RTNL lock. + */ +bool netdev_has_any_upper_dev(struct net_device *dev) +{ ASSERT_RTNL(); - if (master) { - if (old) - return -EBUSY; - dev_hold(master); - } + return !list_empty(&dev->upper_dev_list); +} +EXPORT_SYMBOL(netdev_has_any_upper_dev); + +/** + * netdev_master_upper_dev_get - Get master upper device + * @dev: device + * + * Find a master upper device and return pointer to it or NULL in case + * it's not there. The caller must hold the RTNL lock. + */ +struct net_device *netdev_master_upper_dev_get(struct net_device *dev) +{ + struct netdev_upper *upper; - slave->master = master; + ASSERT_RTNL(); - if (old) - dev_put(old); - return 0; + if (list_empty(&dev->upper_dev_list)) + return NULL; + + upper = list_first_entry(&dev->upper_dev_list, + struct netdev_upper, list); + if (likely(upper->master)) + return upper->dev; + return NULL; } -EXPORT_SYMBOL(netdev_set_master); +EXPORT_SYMBOL(netdev_master_upper_dev_get); /** - * netdev_set_bond_master - set up bonding master/slave pair - * @slave: slave device - * @master: new master device - * - * Changes the master device of the slave. Pass %NULL to break the - * bonding. The caller must hold the RTNL semaphore. On a failure - * a negative errno code is returned. On success %RTM_NEWLINK is sent - * to the routing socket and the function returns zero. + * netdev_master_upper_dev_get_rcu - Get master upper device + * @dev: device + * + * Find a master upper device and return pointer to it or NULL in case + * it's not there. The caller must hold the RCU read lock. */ -int netdev_set_bond_master(struct net_device *slave, struct net_device *master) +struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev) { - int err; + struct netdev_upper *upper; + + upper = list_first_or_null_rcu(&dev->upper_dev_list, + struct netdev_upper, list); + if (upper && likely(upper->master)) + return upper->dev; + return NULL; +} +EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu); + +static int __netdev_upper_dev_link(struct net_device *dev, + struct net_device *upper_dev, bool master) +{ + struct netdev_upper *upper; ASSERT_RTNL(); - err = netdev_set_master(slave, master); - if (err) - return err; + if (dev == upper_dev) + return -EBUSY; + + /* To prevent loops, check if dev is not upper device to upper_dev. */ + if (__netdev_search_upper_dev(upper_dev, dev)) + return -EBUSY; + + if (__netdev_find_upper(dev, upper_dev)) + return -EEXIST; + + if (master && netdev_master_upper_dev_get(dev)) + return -EBUSY; + + upper = kmalloc(sizeof(*upper), GFP_KERNEL); + if (!upper) + return -ENOMEM; + + upper->dev = upper_dev; + upper->master = master; + INIT_LIST_HEAD(&upper->search_list); + + /* Ensure that master upper link is always the first item in list. */ if (master) - slave->flags |= IFF_SLAVE; + list_add_rcu(&upper->list, &dev->upper_dev_list); else - slave->flags &= ~IFF_SLAVE; + list_add_tail_rcu(&upper->list, &dev->upper_dev_list); + dev_hold(upper_dev); - rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE); return 0; } -EXPORT_SYMBOL(netdev_set_bond_master); + +/** + * netdev_upper_dev_link - Add a link to the upper device + * @dev: device + * @upper_dev: new upper device + * + * Adds a link to device which is upper to this one. The caller must hold + * the RTNL lock. On a failure a negative errno code is returned. + * On success the reference counts are adjusted and the function + * returns zero. + */ +int netdev_upper_dev_link(struct net_device *dev, + struct net_device *upper_dev) +{ + return __netdev_upper_dev_link(dev, upper_dev, false); +} +EXPORT_SYMBOL(netdev_upper_dev_link); + +/** + * netdev_master_upper_dev_link - Add a master link to the upper device + * @dev: device + * @upper_dev: new upper device + * + * Adds a link to device which is upper to this one. In this case, only + * one master upper device can be linked, although other non-master devices + * might be linked as well. The caller must hold the RTNL lock. + * On a failure a negative errno code is returned. On success the reference + * counts are adjusted and the function returns zero. + */ +int netdev_master_upper_dev_link(struct net_device *dev, + struct net_device *upper_dev) +{ + return __netdev_upper_dev_link(dev, upper_dev, true); +} +EXPORT_SYMBOL(netdev_master_upper_dev_link); + +/** + * netdev_upper_dev_unlink - Removes a link to upper device + * @dev: device + * @upper_dev: new upper device + * + * Removes a link to device which is upper to this one. The caller must hold + * the RTNL lock. + */ +void netdev_upper_dev_unlink(struct net_device *dev, + struct net_device *upper_dev) +{ + struct netdev_upper *upper; + + ASSERT_RTNL(); + + upper = __netdev_find_upper(dev, upper_dev); + if (!upper) + return; + list_del_rcu(&upper->list); + dev_put(upper_dev); + kfree_rcu(upper, rcu); +} +EXPORT_SYMBOL(netdev_upper_dev_unlink); static void dev_change_rx_flags(struct net_device *dev, int flags) { @@ -5020,13 +5442,34 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) if (!netif_device_present(dev)) return -ENODEV; err = ops->ndo_set_mac_address(dev, sa); - if (!err) - call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); + if (err) + return err; + dev->addr_assign_type = NET_ADDR_SET; + call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); add_device_randomness(dev->dev_addr, dev->addr_len); - return err; + return 0; } EXPORT_SYMBOL(dev_set_mac_address); +/** + * dev_change_carrier - Change device carrier + * @dev: device + * @new_carries: new value + * + * Change device carrier + */ +int dev_change_carrier(struct net_device *dev, bool new_carrier) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + if (!ops->ndo_change_carrier) + return -EOPNOTSUPP; + if (!netif_device_present(dev)) + return -ENODEV; + return ops->ndo_change_carrier(dev, new_carrier); +} +EXPORT_SYMBOL(dev_change_carrier); + /* * Perform the SIOCxIFxxx calls, inside rcu_read_lock() */ @@ -5482,11 +5925,15 @@ static void rollback_registered_many(struct list_head *head) if (dev->netdev_ops->ndo_uninit) dev->netdev_ops->ndo_uninit(dev); - /* Notifier chain MUST detach us from master device. */ - WARN_ON(dev->master); + /* Notifier chain MUST detach us all upper devices. */ + WARN_ON(netdev_has_any_upper_dev(dev)); /* Remove entries from kobject tree */ netdev_unregister_kobject(dev); +#ifdef CONFIG_XPS + /* Remove XPS queueing entries */ + netif_reset_xps_queues_gt(dev, 0); +#endif } synchronize_net(); @@ -5815,6 +6262,13 @@ int register_netdevice(struct net_device *dev) list_netdevice(dev); add_device_randomness(dev->dev_addr, dev->addr_len); + /* If the device has permanent device address, driver should + * set dev_addr and also addr_assign_type should be set to + * NET_ADDR_PERM (default value). + */ + if (dev->addr_assign_type == NET_ADDR_PERM) + memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len); + /* Notify protocols, that a new device appeared. */ ret = call_netdevice_notifiers(NETDEV_REGISTER, dev); ret = notifier_to_errno(ret); @@ -6199,6 +6653,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, INIT_LIST_HEAD(&dev->napi_list); INIT_LIST_HEAD(&dev->unreg_list); INIT_LIST_HEAD(&dev->link_watch_list); + INIT_LIST_HEAD(&dev->upper_dev_list); dev->priv_flags = IFF_XMIT_DST_RELEASE; setup(dev); |