summaryrefslogtreecommitdiff
path: root/net/core/dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c514
1 files changed, 387 insertions, 127 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 343883f65ea7..ed4550fd9ece 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -126,6 +126,7 @@
#include <linux/in.h>
#include <linux/jhash.h>
#include <linux/random.h>
+#include <trace/napi.h>
#include "net-sysfs.h"
@@ -1336,7 +1337,12 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
{
struct packet_type *ptype;
+#ifdef CONFIG_NET_CLS_ACT
+ if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
+ net_timestamp(skb);
+#else
net_timestamp(skb);
+#endif
rcu_read_lock();
list_for_each_entry_rcu(ptype, &ptype_all, list) {
@@ -1683,7 +1689,17 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
goto gso;
}
+ /*
+ * If device doesnt need skb->dst, release it right now while
+ * its hot in this cpu cache
+ */
+ if ((dev->priv_flags & IFF_XMIT_DST_RELEASE) && skb->dst) {
+ dst_release(skb->dst);
+ skb->dst = NULL;
+ }
rc = ops->ndo_start_xmit(skb, dev);
+ if (rc == 0)
+ txq_trans_update(txq);
/*
* TODO: if skb_orphan() was called by
* dev->hard_start_xmit() (for example, the unmodified
@@ -1713,6 +1729,7 @@ gso:
skb->next = nskb;
return rc;
}
+ txq_trans_update(txq);
if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
return NETDEV_TX_BUSY;
} while (skb->next);
@@ -1732,9 +1749,14 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
if (skb_rx_queue_recorded(skb)) {
hash = skb_get_rx_queue(skb);
- } else if (skb->sk && skb->sk->sk_hash) {
+ while (unlikely (hash >= dev->real_num_tx_queues))
+ hash -= dev->real_num_tx_queues;
+ return hash;
+ }
+
+ if (skb->sk && skb->sk->sk_hash)
hash = skb->sk->sk_hash;
- } else
+ else
hash = skb->protocol;
hash = jhash_1word(hash, skb_tx_hashrnd);
@@ -2368,26 +2390,6 @@ void napi_gro_flush(struct napi_struct *napi)
}
EXPORT_SYMBOL(napi_gro_flush);
-void *skb_gro_header(struct sk_buff *skb, unsigned int hlen)
-{
- unsigned int offset = skb_gro_offset(skb);
-
- hlen += offset;
- if (hlen <= skb_headlen(skb))
- return skb->data + offset;
-
- if (unlikely(!skb_shinfo(skb)->nr_frags ||
- skb_shinfo(skb)->frags[0].size <=
- hlen - skb_headlen(skb) ||
- PageHighMem(skb_shinfo(skb)->frags[0].page)))
- return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL;
-
- return page_address(skb_shinfo(skb)->frags[0].page) +
- skb_shinfo(skb)->frags[0].page_offset +
- offset - skb_headlen(skb);
-}
-EXPORT_SYMBOL(skb_gro_header);
-
int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
struct sk_buff **pp = NULL;
@@ -2450,10 +2452,25 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
ret = GRO_HELD;
pull:
- if (unlikely(!pskb_may_pull(skb, skb_gro_offset(skb)))) {
- if (napi->gro_list == skb)
- napi->gro_list = skb->next;
- ret = GRO_DROP;
+ if (skb_headlen(skb) < skb_gro_offset(skb)) {
+ int grow = skb_gro_offset(skb) - skb_headlen(skb);
+
+ BUG_ON(skb->end - skb->tail < grow);
+
+ memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
+
+ skb->tail += grow;
+ skb->data_len -= grow;
+
+ skb_shinfo(skb)->frags[0].page_offset += grow;
+ skb_shinfo(skb)->frags[0].size -= grow;
+
+ if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
+ put_page(skb_shinfo(skb)->frags[0].page);
+ memmove(skb_shinfo(skb)->frags,
+ skb_shinfo(skb)->frags + 1,
+ --skb_shinfo(skb)->nr_frags);
+ }
}
ok:
@@ -2503,6 +2520,22 @@ int napi_skb_finish(int ret, struct sk_buff *skb)
}
EXPORT_SYMBOL(napi_skb_finish);
+void skb_gro_reset_offset(struct sk_buff *skb)
+{
+ NAPI_GRO_CB(skb)->data_offset = 0;
+ NAPI_GRO_CB(skb)->frag0 = NULL;
+ NAPI_GRO_CB(skb)->frag0_len = 0;
+
+ if (skb->mac_header == skb->tail &&
+ !PageHighMem(skb_shinfo(skb)->frags[0].page)) {
+ NAPI_GRO_CB(skb)->frag0 =
+ page_address(skb_shinfo(skb)->frags[0].page) +
+ skb_shinfo(skb)->frags[0].page_offset;
+ NAPI_GRO_CB(skb)->frag0_len = skb_shinfo(skb)->frags[0].size;
+ }
+}
+EXPORT_SYMBOL(skb_gro_reset_offset);
+
int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
skb_gro_reset_offset(skb);
@@ -2520,16 +2553,10 @@ void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
}
EXPORT_SYMBOL(napi_reuse_skb);
-struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi,
- struct napi_gro_fraginfo *info)
+struct sk_buff *napi_get_frags(struct napi_struct *napi)
{
struct net_device *dev = napi->dev;
struct sk_buff *skb = napi->skb;
- struct ethhdr *eth;
- skb_frag_t *frag;
- int i;
-
- napi->skb = NULL;
if (!skb) {
skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN);
@@ -2537,47 +2564,14 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi,
goto out;
skb_reserve(skb, NET_IP_ALIGN);
- }
-
- BUG_ON(info->nr_frags > MAX_SKB_FRAGS);
- frag = &info->frags[info->nr_frags - 1];
-
- for (i = skb_shinfo(skb)->nr_frags; i < info->nr_frags; i++) {
- skb_fill_page_desc(skb, i, frag->page, frag->page_offset,
- frag->size);
- frag++;
- }
- skb_shinfo(skb)->nr_frags = info->nr_frags;
-
- skb->data_len = info->len;
- skb->len += info->len;
- skb->truesize += info->len;
- skb_reset_mac_header(skb);
- skb_gro_reset_offset(skb);
-
- eth = skb_gro_header(skb, sizeof(*eth));
- if (!eth) {
- napi_reuse_skb(napi, skb);
- skb = NULL;
- goto out;
+ napi->skb = skb;
}
- skb_gro_pull(skb, sizeof(*eth));
-
- /*
- * This works because the only protocols we care about don't require
- * special handling. We'll fix it up properly at the end.
- */
- skb->protocol = eth->h_proto;
-
- skb->ip_summed = info->ip_summed;
- skb->csum = info->csum;
-
out:
return skb;
}
-EXPORT_SYMBOL(napi_fraginfo_skb);
+EXPORT_SYMBOL(napi_get_frags);
int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret)
{
@@ -2607,9 +2601,46 @@ int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret)
}
EXPORT_SYMBOL(napi_frags_finish);
-int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info)
+struct sk_buff *napi_frags_skb(struct napi_struct *napi)
{
- struct sk_buff *skb = napi_fraginfo_skb(napi, info);
+ struct sk_buff *skb = napi->skb;
+ struct ethhdr *eth;
+ unsigned int hlen;
+ unsigned int off;
+
+ napi->skb = NULL;
+
+ skb_reset_mac_header(skb);
+ skb_gro_reset_offset(skb);
+
+ off = skb_gro_offset(skb);
+ hlen = off + sizeof(*eth);
+ eth = skb_gro_header_fast(skb, off);
+ if (skb_gro_header_hard(skb, hlen)) {
+ eth = skb_gro_header_slow(skb, hlen, off);
+ if (unlikely(!eth)) {
+ napi_reuse_skb(napi, skb);
+ skb = NULL;
+ goto out;
+ }
+ }
+
+ skb_gro_pull(skb, sizeof(*eth));
+
+ /*
+ * This works because the only protocols we care about don't require
+ * special handling. We'll fix it up properly at the end.
+ */
+ skb->protocol = eth->h_proto;
+
+out:
+ return skb;
+}
+EXPORT_SYMBOL(napi_frags_skb);
+
+int napi_gro_frags(struct napi_struct *napi)
+{
+ struct sk_buff *skb = napi_frags_skb(napi);
if (!skb)
return NET_RX_DROP;
@@ -2713,7 +2744,7 @@ void netif_napi_del(struct napi_struct *napi)
struct sk_buff *skb, *next;
list_del_init(&napi->dev_list);
- kfree_skb(napi->skb);
+ napi_free_frags(napi);
for (skb = napi->gro_list; skb; skb = next) {
next = skb->next;
@@ -2767,8 +2798,10 @@ static void net_rx_action(struct softirq_action *h)
* accidently calling ->poll() when NAPI is not scheduled.
*/
work = 0;
- if (test_bit(NAPI_STATE_SCHED, &n->state))
+ if (test_bit(NAPI_STATE_SCHED, &n->state)) {
work = n->poll(n, weight);
+ trace_napi_poll(n);
+ }
WARN_ON_ONCE(work > weight);
@@ -3438,6 +3471,252 @@ void dev_set_rx_mode(struct net_device *dev)
netif_addr_unlock_bh(dev);
}
+/* hw addresses list handling functions */
+
+static int __hw_addr_add(struct list_head *list, unsigned char *addr,
+ int addr_len, unsigned char addr_type)
+{
+ struct netdev_hw_addr *ha;
+ int alloc_size;
+
+ if (addr_len > MAX_ADDR_LEN)
+ return -EINVAL;
+
+ alloc_size = sizeof(*ha);
+ if (alloc_size < L1_CACHE_BYTES)
+ alloc_size = L1_CACHE_BYTES;
+ ha = kmalloc(alloc_size, GFP_ATOMIC);
+ if (!ha)
+ return -ENOMEM;
+ memcpy(ha->addr, addr, addr_len);
+ ha->type = addr_type;
+ list_add_tail_rcu(&ha->list, list);
+ return 0;
+}
+
+static void ha_rcu_free(struct rcu_head *head)
+{
+ struct netdev_hw_addr *ha;
+
+ ha = container_of(head, struct netdev_hw_addr, rcu_head);
+ kfree(ha);
+}
+
+static int __hw_addr_del_ii(struct list_head *list, unsigned char *addr,
+ int addr_len, unsigned char addr_type,
+ int ignore_index)
+{
+ struct netdev_hw_addr *ha;
+ int i = 0;
+
+ list_for_each_entry(ha, list, list) {
+ if (i++ != ignore_index &&
+ !memcmp(ha->addr, addr, addr_len) &&
+ (ha->type == addr_type || !addr_type)) {
+ list_del_rcu(&ha->list);
+ call_rcu(&ha->rcu_head, ha_rcu_free);
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
+static int __hw_addr_add_multiple_ii(struct list_head *to_list,
+ struct list_head *from_list,
+ int addr_len, unsigned char addr_type,
+ int ignore_index)
+{
+ int err;
+ struct netdev_hw_addr *ha, *ha2;
+ unsigned char type;
+
+ list_for_each_entry(ha, from_list, list) {
+ type = addr_type ? addr_type : ha->type;
+ err = __hw_addr_add(to_list, ha->addr, addr_len, type);
+ if (err)
+ goto unroll;
+ }
+ return 0;
+
+unroll:
+ list_for_each_entry(ha2, from_list, list) {
+ if (ha2 == ha)
+ break;
+ type = addr_type ? addr_type : ha2->type;
+ __hw_addr_del_ii(to_list, ha2->addr, addr_len, type,
+ ignore_index);
+ }
+ return err;
+}
+
+static void __hw_addr_del_multiple_ii(struct list_head *to_list,
+ struct list_head *from_list,
+ int addr_len, unsigned char addr_type,
+ int ignore_index)
+{
+ struct netdev_hw_addr *ha;
+ unsigned char type;
+
+ list_for_each_entry(ha, from_list, list) {
+ type = addr_type ? addr_type : ha->type;
+ __hw_addr_del_ii(to_list, ha->addr, addr_len, addr_type,
+ ignore_index);
+ }
+}
+
+static void __hw_addr_flush(struct list_head *list)
+{
+ struct netdev_hw_addr *ha, *tmp;
+
+ list_for_each_entry_safe(ha, tmp, list, list) {
+ list_del_rcu(&ha->list);
+ call_rcu(&ha->rcu_head, ha_rcu_free);
+ }
+}
+
+/* Device addresses handling functions */
+
+static void dev_addr_flush(struct net_device *dev)
+{
+ /* rtnl_mutex must be held here */
+
+ __hw_addr_flush(&dev->dev_addr_list);
+ dev->dev_addr = NULL;
+}
+
+static int dev_addr_init(struct net_device *dev)
+{
+ unsigned char addr[MAX_ADDR_LEN];
+ struct netdev_hw_addr *ha;
+ int err;
+
+ /* rtnl_mutex must be held here */
+
+ INIT_LIST_HEAD(&dev->dev_addr_list);
+ memset(addr, 0, sizeof(*addr));
+ err = __hw_addr_add(&dev->dev_addr_list, addr, sizeof(*addr),
+ NETDEV_HW_ADDR_T_LAN);
+ if (!err) {
+ /*
+ * Get the first (previously created) address from the list
+ * and set dev_addr pointer to this location.
+ */
+ ha = list_first_entry(&dev->dev_addr_list,
+ struct netdev_hw_addr, list);
+ dev->dev_addr = ha->addr;
+ }
+ return err;
+}
+
+/**
+ * dev_addr_add - Add a device address
+ * @dev: device
+ * @addr: address to add
+ * @addr_type: address type
+ *
+ * Add a device address to the device or increase the reference count if
+ * it already exists.
+ *
+ * The caller must hold the rtnl_mutex.
+ */
+int dev_addr_add(struct net_device *dev, unsigned char *addr,
+ unsigned char addr_type)
+{
+ int err;
+
+ ASSERT_RTNL();
+
+ err = __hw_addr_add(&dev->dev_addr_list, addr, dev->addr_len,
+ addr_type);
+ if (!err)
+ call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+ return err;
+}
+EXPORT_SYMBOL(dev_addr_add);
+
+/**
+ * dev_addr_del - Release a device address.
+ * @dev: device
+ * @addr: address to delete
+ * @addr_type: address type
+ *
+ * Release reference to a device address and remove it from the device
+ * if the reference count drops to zero.
+ *
+ * The caller must hold the rtnl_mutex.
+ */
+int dev_addr_del(struct net_device *dev, unsigned char *addr,
+ unsigned char addr_type)
+{
+ int err;
+
+ ASSERT_RTNL();
+
+ err = __hw_addr_del_ii(&dev->dev_addr_list, addr, dev->addr_len,
+ addr_type, 0);
+ if (!err)
+ call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+ return err;
+}
+EXPORT_SYMBOL(dev_addr_del);
+
+/**
+ * dev_addr_add_multiple - Add device addresses from another device
+ * @to_dev: device to which addresses will be added
+ * @from_dev: device from which addresses will be added
+ * @addr_type: address type - 0 means type will be used from from_dev
+ *
+ * Add device addresses of the one device to another.
+ **
+ * The caller must hold the rtnl_mutex.
+ */
+int dev_addr_add_multiple(struct net_device *to_dev,
+ struct net_device *from_dev,
+ unsigned char addr_type)
+{
+ int err;
+
+ ASSERT_RTNL();
+
+ if (from_dev->addr_len != to_dev->addr_len)
+ return -EINVAL;
+ err = __hw_addr_add_multiple_ii(&to_dev->dev_addr_list,
+ &from_dev->dev_addr_list,
+ to_dev->addr_len, addr_type, 0);
+ if (!err)
+ call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
+ return err;
+}
+EXPORT_SYMBOL(dev_addr_add_multiple);
+
+/**
+ * dev_addr_del_multiple - Delete device addresses by another device
+ * @to_dev: device where the addresses will be deleted
+ * @from_dev: device by which addresses the addresses will be deleted
+ * @addr_type: address type - 0 means type will used from from_dev
+ *
+ * Deletes addresses in to device by the list of addresses in from device.
+ *
+ * The caller must hold the rtnl_mutex.
+ */
+int dev_addr_del_multiple(struct net_device *to_dev,
+ struct net_device *from_dev,
+ unsigned char addr_type)
+{
+ ASSERT_RTNL();
+
+ if (from_dev->addr_len != to_dev->addr_len)
+ return -EINVAL;
+ __hw_addr_del_multiple_ii(&to_dev->dev_addr_list,
+ &from_dev->dev_addr_list,
+ to_dev->addr_len, addr_type, 0);
+ call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
+ return 0;
+}
+EXPORT_SYMBOL(dev_addr_del_multiple);
+
+/* unicast and multicast addresses handling functions */
+
int __dev_addr_delete(struct dev_addr_list **list, int *count,
void *addr, int alen, int glbl)
{
@@ -4327,39 +4606,6 @@ unsigned long netdev_fix_features(unsigned long features, const char *name)
}
EXPORT_SYMBOL(netdev_fix_features);
-/* Some devices need to (re-)set their netdev_ops inside
- * ->init() or similar. If that happens, we have to setup
- * the compat pointers again.
- */
-void netdev_resync_ops(struct net_device *dev)
-{
-#ifdef CONFIG_COMPAT_NET_DEV_OPS
- const struct net_device_ops *ops = dev->netdev_ops;
-
- dev->init = ops->ndo_init;
- dev->uninit = ops->ndo_uninit;
- dev->open = ops->ndo_open;
- dev->change_rx_flags = ops->ndo_change_rx_flags;
- dev->set_rx_mode = ops->ndo_set_rx_mode;
- dev->set_multicast_list = ops->ndo_set_multicast_list;
- dev->set_mac_address = ops->ndo_set_mac_address;
- dev->validate_addr = ops->ndo_validate_addr;
- dev->do_ioctl = ops->ndo_do_ioctl;
- dev->set_config = ops->ndo_set_config;
- dev->change_mtu = ops->ndo_change_mtu;
- dev->neigh_setup = ops->ndo_neigh_setup;
- dev->tx_timeout = ops->ndo_tx_timeout;
- dev->get_stats = ops->ndo_get_stats;
- dev->vlan_rx_register = ops->ndo_vlan_rx_register;
- dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid;
- dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid;
-#ifdef CONFIG_NET_POLL_CONTROLLER
- dev->poll_controller = ops->ndo_poll_controller;
-#endif
-#endif
-}
-EXPORT_SYMBOL(netdev_resync_ops);
-
/**
* register_netdevice - register a network device
* @dev: device to register
@@ -4399,23 +4645,6 @@ int register_netdevice(struct net_device *dev)
dev->iflink = -1;
-#ifdef CONFIG_COMPAT_NET_DEV_OPS
- /* Netdevice_ops API compatiability support.
- * This is temporary until all network devices are converted.
- */
- if (dev->netdev_ops) {
- netdev_resync_ops(dev);
- } else {
- char drivername[64];
- pr_info("%s (%s): not using net_device_ops yet\n",
- dev->name, netdev_drivername(dev, drivername, 64));
-
- /* This works only because net_device_ops and the
- compatiablity structure are the same. */
- dev->netdev_ops = (void *) &(dev->init);
- }
-#endif
-
/* Init, if this function is available */
if (dev->netdev_ops->ndo_init) {
ret = dev->netdev_ops->ndo_init(dev);
@@ -4701,13 +4930,30 @@ void netdev_run_todo(void)
* the internal statistics structure is used.
*/
const struct net_device_stats *dev_get_stats(struct net_device *dev)
- {
+{
const struct net_device_ops *ops = dev->netdev_ops;
if (ops->ndo_get_stats)
return ops->ndo_get_stats(dev);
- else
- return &dev->stats;
+ else {
+ unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
+ struct net_device_stats *stats = &dev->stats;
+ unsigned int i;
+ struct netdev_queue *txq;
+
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ txq = netdev_get_tx_queue(dev, i);
+ tx_bytes += txq->tx_bytes;
+ tx_packets += txq->tx_packets;
+ tx_dropped += txq->tx_dropped;
+ }
+ if (tx_bytes || tx_packets || tx_dropped) {
+ stats->tx_bytes = tx_bytes;
+ stats->tx_packets = tx_packets;
+ stats->tx_dropped = tx_dropped;
+ }
+ return stats;
+ }
}
EXPORT_SYMBOL(dev_get_stats);
@@ -4765,13 +5011,16 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
if (!tx) {
printk(KERN_ERR "alloc_netdev: Unable to allocate "
"tx qdiscs.\n");
- kfree(p);
- return NULL;
+ goto free_p;
}
dev = (struct net_device *)
(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
dev->padded = (char *)dev - (char *)p;
+
+ if (dev_addr_init(dev))
+ goto free_tx;
+
dev_net_set(dev, &init_net);
dev->_tx = tx;
@@ -4783,9 +5032,17 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
netdev_init_queues(dev);
INIT_LIST_HEAD(&dev->napi_list);
+ dev->priv_flags = IFF_XMIT_DST_RELEASE;
setup(dev);
strcpy(dev->name, name);
return dev;
+
+free_tx:
+ kfree(tx);
+
+free_p:
+ kfree(p);
+ return NULL;
}
EXPORT_SYMBOL(alloc_netdev_mq);
@@ -4805,6 +5062,9 @@ void free_netdev(struct net_device *dev)
kfree(dev->_tx);
+ /* Flush device addresses */
+ dev_addr_flush(dev);
+
list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
netif_napi_del(p);