summaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/datagram.c2
-rw-r--r--net/core/dev.c1255
-rw-r--r--net/core/dev_addr_lists.c18
-rw-r--r--net/core/drop_monitor.c14
-rw-r--r--net/core/dst.c93
-rw-r--r--net/core/ethtool.c825
-rw-r--r--net/core/fib_rules.c31
-rw-r--r--net/core/filter.c482
-rw-r--r--net/core/flow.c14
-rw-r--r--net/core/gen_estimator.c9
-rw-r--r--net/core/iovec.c20
-rw-r--r--net/core/link_watch.c2
-rw-r--r--net/core/neighbour.c14
-rw-r--r--net/core/net-sysfs.c487
-rw-r--r--net/core/net-sysfs.h4
-rw-r--r--net/core/net_namespace.c26
-rw-r--r--net/core/netpoll.c47
-rw-r--r--net/core/pktgen.c473
-rw-r--r--net/core/request_sock.c5
-rw-r--r--net/core/rtnetlink.c292
-rw-r--r--net/core/scm.c12
-rw-r--r--net/core/skbuff.c61
-rw-r--r--net/core/sock.c92
-rw-r--r--net/core/sysctl_net_core.c12
-rw-r--r--net/core/timestamping.c10
-rw-r--r--net/core/utils.c24
26 files changed, 2796 insertions, 1528 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c
index cd1e039c8755..18ac112ea7ae 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -177,7 +177,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags,
* interrupt level will suddenly eat the receive_queue.
*
* Look at current nfs client by the way...
- * However, this function was corrent in any case. 8)
+ * However, this function was correct in any case. 8)
*/
unsigned long cpu_flags;
diff --git a/net/core/dev.c b/net/core/dev.c
index 78b5a89b0f40..d94537914a71 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -132,6 +132,7 @@
#include <trace/events/skb.h>
#include <linux/pci.h>
#include <linux/inetdevice.h>
+#include <linux/cpu_rmap.h>
#include "net-sysfs.h"
@@ -743,34 +744,32 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex)
EXPORT_SYMBOL(dev_get_by_index);
/**
- * dev_getbyhwaddr - find a device by its hardware address
+ * dev_getbyhwaddr_rcu - find a device by its hardware address
* @net: the applicable net namespace
* @type: media type of device
* @ha: hardware address
*
* Search for an interface by MAC address. Returns NULL if the device
- * is not found or a pointer to the device. The caller must hold the
- * rtnl semaphore. The returned device has not had its ref count increased
+ * is not found or a pointer to the device.
+ * The caller must hold RCU or RTNL.
+ * The returned device has not had its ref count increased
* and the caller must therefore be careful about locking
*
- * BUGS:
- * If the API was consistent this would be __dev_get_by_hwaddr
*/
-struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
+struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
+ const char *ha)
{
struct net_device *dev;
- ASSERT_RTNL();
-
- for_each_netdev(net, dev)
+ for_each_netdev_rcu(net, dev)
if (dev->type == type &&
!memcmp(dev->dev_addr, ha, dev->addr_len))
return dev;
return NULL;
}
-EXPORT_SYMBOL(dev_getbyhwaddr);
+EXPORT_SYMBOL(dev_getbyhwaddr_rcu);
struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
{
@@ -949,7 +948,7 @@ int dev_alloc_name(struct net_device *dev, const char *name)
}
EXPORT_SYMBOL(dev_alloc_name);
-static int dev_get_valid_name(struct net_device *dev, const char *name, bool fmt)
+static int dev_get_valid_name(struct net_device *dev, const char *name)
{
struct net *net;
@@ -959,7 +958,7 @@ static int dev_get_valid_name(struct net_device *dev, const char *name, bool fmt
if (!dev_valid_name(name))
return -EINVAL;
- if (fmt && strchr(name, '%'))
+ if (strchr(name, '%'))
return dev_alloc_name(dev, name);
else if (__dev_get_by_name(net, name))
return -EEXIST;
@@ -996,7 +995,7 @@ int dev_change_name(struct net_device *dev, const char *newname)
memcpy(oldname, dev->name, IFNAMSIZ);
- err = dev_get_valid_name(dev, newname, 1);
+ err = dev_get_valid_name(dev, newname);
if (err < 0)
return err;
@@ -1008,7 +1007,7 @@ rollback:
}
write_lock_bh(&dev_base_lock);
- hlist_del(&dev->name_hlist);
+ hlist_del_rcu(&dev->name_hlist);
write_unlock_bh(&dev_base_lock);
synchronize_rcu();
@@ -1116,13 +1115,21 @@ EXPORT_SYMBOL(netdev_bonding_change);
void dev_load(struct net *net, const char *name)
{
struct net_device *dev;
+ int no_module;
rcu_read_lock();
dev = dev_get_by_name_rcu(net, name);
rcu_read_unlock();
- if (!dev && capable(CAP_NET_ADMIN))
- request_module("%s", name);
+ no_module = !dev;
+ if (no_module && capable(CAP_NET_ADMIN))
+ no_module = request_module("netdev-%s", name);
+ if (no_module && capable(CAP_SYS_MODULE)) {
+ if (!request_module("%s", name))
+ pr_err("Loading kernel module for a network device "
+"with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s "
+"instead\n", name);
+ }
}
EXPORT_SYMBOL(dev_load);
@@ -1133,9 +1140,6 @@ static int __dev_open(struct net_device *dev)
ASSERT_RTNL();
- /*
- * Is it even present?
- */
if (!netif_device_present(dev))
return -ENODEV;
@@ -1144,9 +1148,6 @@ static int __dev_open(struct net_device *dev)
if (ret)
return ret;
- /*
- * Call device private open method
- */
set_bit(__LINK_STATE_START, &dev->state);
if (ops->ndo_validate_addr)
@@ -1155,31 +1156,12 @@ static int __dev_open(struct net_device *dev)
if (!ret && ops->ndo_open)
ret = ops->ndo_open(dev);
- /*
- * If it went open OK then:
- */
-
if (ret)
clear_bit(__LINK_STATE_START, &dev->state);
else {
- /*
- * Set the flags.
- */
dev->flags |= IFF_UP;
-
- /*
- * Enable NET_DMA
- */
net_dmaengine_get();
-
- /*
- * Initialize multicasting status
- */
dev_set_rx_mode(dev);
-
- /*
- * Wakeup transmit queue engine
- */
dev_activate(dev);
}
@@ -1202,22 +1184,13 @@ int dev_open(struct net_device *dev)
{
int ret;
- /*
- * Is it already up?
- */
if (dev->flags & IFF_UP)
return 0;
- /*
- * Open device
- */
ret = __dev_open(dev);
if (ret < 0)
return ret;
- /*
- * ... and announce new interface.
- */
rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
call_netdevice_notifiers(NETDEV_UP, dev);
@@ -1225,52 +1198,78 @@ int dev_open(struct net_device *dev)
}
EXPORT_SYMBOL(dev_open);
-static int __dev_close(struct net_device *dev)
+static int __dev_close_many(struct list_head *head)
{
- const struct net_device_ops *ops = dev->netdev_ops;
+ struct net_device *dev;
ASSERT_RTNL();
might_sleep();
- /*
- * Tell people we are going down, so that they can
- * prepare to death, when device is still operating.
- */
- call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
+ list_for_each_entry(dev, head, unreg_list) {
+ call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
- clear_bit(__LINK_STATE_START, &dev->state);
+ clear_bit(__LINK_STATE_START, &dev->state);
- /* Synchronize to scheduled poll. We cannot touch poll list,
- * it can be even on different cpu. So just clear netif_running().
- *
- * dev->stop() will invoke napi_disable() on all of it's
- * napi_struct instances on this device.
- */
- smp_mb__after_clear_bit(); /* Commit netif_running(). */
+ /* Synchronize to scheduled poll. We cannot touch poll list, it
+ * can be even on different cpu. So just clear netif_running().
+ *
+ * dev->stop() will invoke napi_disable() on all of it's
+ * napi_struct instances on this device.
+ */
+ smp_mb__after_clear_bit(); /* Commit netif_running(). */
+ }
- dev_deactivate(dev);
+ dev_deactivate_many(head);
- /*
- * Call the device specific close. This cannot fail.
- * Only if device is UP
- *
- * We allow it to be called even after a DETACH hot-plug
- * event.
- */
- if (ops->ndo_stop)
- ops->ndo_stop(dev);
+ list_for_each_entry(dev, head, unreg_list) {
+ const struct net_device_ops *ops = dev->netdev_ops;
- /*
- * Device is now down.
- */
+ /*
+ * Call the device specific close. This cannot fail.
+ * Only if device is UP
+ *
+ * We allow it to be called even after a DETACH hot-plug
+ * event.
+ */
+ if (ops->ndo_stop)
+ ops->ndo_stop(dev);
- dev->flags &= ~IFF_UP;
+ dev->flags &= ~IFF_UP;
+ net_dmaengine_put();
+ }
- /*
- * Shutdown NET_DMA
- */
- net_dmaengine_put();
+ return 0;
+}
+static int __dev_close(struct net_device *dev)
+{
+ int retval;
+ LIST_HEAD(single);
+
+ list_add(&dev->unreg_list, &single);
+ retval = __dev_close_many(&single);
+ list_del(&single);
+ return retval;
+}
+
+static int dev_close_many(struct list_head *head)
+{
+ struct net_device *dev, *tmp;
+ LIST_HEAD(tmp_list);
+
+ list_for_each_entry_safe(dev, tmp, head, unreg_list)
+ if (!(dev->flags & IFF_UP))
+ list_move(&dev->unreg_list, &tmp_list);
+
+ __dev_close_many(head);
+
+ list_for_each_entry(dev, head, unreg_list) {
+ rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
+ call_netdevice_notifiers(NETDEV_DOWN, dev);
+ }
+
+ /* rollback_registered_many needs the complete original list */
+ list_splice(&tmp_list, head);
return 0;
}
@@ -1285,17 +1284,13 @@ static int __dev_close(struct net_device *dev)
*/
int dev_close(struct net_device *dev)
{
- if (!(dev->flags & IFF_UP))
- return 0;
-
- __dev_close(dev);
-
- /*
- * Tell people we are down
- */
- rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
- call_netdevice_notifiers(NETDEV_DOWN, dev);
+ if (dev->flags & IFF_UP) {
+ LIST_HEAD(single);
+ list_add(&dev->unreg_list, &single);
+ dev_close_many(&single);
+ list_del(&single);
+ }
return 0;
}
EXPORT_SYMBOL(dev_close);
@@ -1311,26 +1306,25 @@ EXPORT_SYMBOL(dev_close);
*/
void dev_disable_lro(struct net_device *dev)
{
- if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
- dev->ethtool_ops->set_flags) {
- u32 flags = dev->ethtool_ops->get_flags(dev);
- if (flags & ETH_FLAG_LRO) {
- flags &= ~ETH_FLAG_LRO;
- dev->ethtool_ops->set_flags(dev, flags);
- }
- }
- WARN_ON(dev->features & NETIF_F_LRO);
+ u32 flags;
+
+ if (dev->ethtool_ops && dev->ethtool_ops->get_flags)
+ flags = dev->ethtool_ops->get_flags(dev);
+ else
+ flags = ethtool_op_get_flags(dev);
+
+ if (!(flags & ETH_FLAG_LRO))
+ return;
+
+ __ethtool_set_flags(dev, flags & ~ETH_FLAG_LRO);
+ if (unlikely(dev->features & NETIF_F_LRO))
+ netdev_WARN(dev, "failed to disable LRO!\n");
}
EXPORT_SYMBOL(dev_disable_lro);
static int dev_boot_phase = 1;
-/*
- * Device change register/unregister. These are not inline or static
- * as we export them to the world.
- */
-
/**
* register_netdevice_notifier - register a network notifier block
* @nb: notifier
@@ -1432,6 +1426,7 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
ASSERT_RTNL();
return raw_notifier_call_chain(&netdev_chain, val, dev);
}
+EXPORT_SYMBOL(call_netdevice_notifiers);
/* When > 0 there are consumers of rx skb time stamps */
static atomic_t netstamp_needed = ATOMIC_INIT(0);
@@ -1462,6 +1457,27 @@ static inline void net_timestamp_check(struct sk_buff *skb)
__net_timestamp(skb);
}
+static inline bool is_skb_forwardable(struct net_device *dev,
+ struct sk_buff *skb)
+{
+ unsigned int len;
+
+ if (!(dev->flags & IFF_UP))
+ return false;
+
+ len = dev->mtu + dev->hard_header_len + VLAN_HLEN;
+ if (skb->len <= len)
+ return true;
+
+ /* if TSO is enabled, we don't care about the length as the packet
+ * could be forwarded without being segmented before
+ */
+ if (skb_is_gso(skb))
+ return true;
+
+ return false;
+}
+
/**
* dev_forward_skb - loopback an skb to another netif
*
@@ -1485,8 +1501,7 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
skb_orphan(skb);
nf_reset(skb);
- if (unlikely(!(dev->flags & IFF_UP) ||
- (skb->len > (dev->mtu + dev->hard_header_len + VLAN_HLEN)))) {
+ if (unlikely(!is_skb_forwardable(dev, skb))) {
atomic_long_inc(&dev->rx_dropped);
kfree_skb(skb);
return NET_RX_DROP;
@@ -1499,6 +1514,14 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
}
EXPORT_SYMBOL_GPL(dev_forward_skb);
+static inline int deliver_skb(struct sk_buff *skb,
+ struct packet_type *pt_prev,
+ struct net_device *orig_dev)
+{
+ atomic_inc(&skb->users);
+ return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
+}
+
/*
* Support routine. Sends outgoing frames to any network
* taps currently in use.
@@ -1507,13 +1530,8 @@ EXPORT_SYMBOL_GPL(dev_forward_skb);
static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
{
struct packet_type *ptype;
-
-#ifdef CONFIG_NET_CLS_ACT
- if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
- net_timestamp_set(skb);
-#else
- net_timestamp_set(skb);
-#endif
+ struct sk_buff *skb2 = NULL;
+ struct packet_type *pt_prev = NULL;
rcu_read_lock();
list_for_each_entry_rcu(ptype, &ptype_all, list) {
@@ -1523,10 +1541,18 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
if ((ptype->dev == dev || !ptype->dev) &&
(ptype->af_packet_priv == NULL ||
(struct sock *)ptype->af_packet_priv != skb->sk)) {
- struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
+ if (pt_prev) {
+ deliver_skb(skb2, pt_prev, skb->dev);
+ pt_prev = ptype;
+ continue;
+ }
+
+ skb2 = skb_clone(skb, GFP_ATOMIC);
if (!skb2)
break;
+ net_timestamp_set(skb2);
+
/* skb->nh should be correctly
set by sender, so that the second statement is
just protection against buggy protocols.
@@ -1545,24 +1571,79 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
skb2->transport_header = skb2->network_header;
skb2->pkt_type = PACKET_OUTGOING;
- ptype->func(skb2, skb->dev, ptype, skb->dev);
+ pt_prev = ptype;
}
}
+ if (pt_prev)
+ pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
rcu_read_unlock();
}
+/* netif_setup_tc - Handle tc mappings on real_num_tx_queues change
+ * @dev: Network device
+ * @txq: number of queues available
+ *
+ * If real_num_tx_queues is changed the tc mappings may no longer be
+ * valid. To resolve this verify the tc mapping remains valid and if
+ * not NULL the mapping. With no priorities mapping to this
+ * offset/count pair it will no longer be used. In the worst case TC0
+ * is invalid nothing can be done so disable priority mappings. If is
+ * expected that drivers will fix this mapping if they can before
+ * calling netif_set_real_num_tx_queues.
+ */
+static void netif_setup_tc(struct net_device *dev, unsigned int txq)
+{
+ int i;
+ struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
+
+ /* If TC0 is invalidated disable TC mapping */
+ if (tc->offset + tc->count > txq) {
+ pr_warning("Number of in use tx queues changed "
+ "invalidating tc mappings. Priority "
+ "traffic classification disabled!\n");
+ dev->num_tc = 0;
+ return;
+ }
+
+ /* Invalidated prio to tc mappings set to TC0 */
+ for (i = 1; i < TC_BITMASK + 1; i++) {
+ int q = netdev_get_prio_tc_map(dev, i);
+
+ tc = &dev->tc_to_txq[q];
+ if (tc->offset + tc->count > txq) {
+ pr_warning("Number of in use tx queues "
+ "changed. Priority %i to tc "
+ "mapping %i is no longer valid "
+ "setting map to 0\n",
+ i, q);
+ netdev_set_prio_tc_map(dev, i, 0);
+ }
+ }
+}
+
/*
* Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
* greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
*/
int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
{
+ int rc;
+
if (txq < 1 || txq > dev->num_tx_queues)
return -EINVAL;
- if (dev->reg_state == NETREG_REGISTERED) {
+ if (dev->reg_state == NETREG_REGISTERED ||
+ dev->reg_state == NETREG_UNREGISTERING) {
ASSERT_RTNL();
+ rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
+ txq);
+ if (rc)
+ return rc;
+
+ if (dev->num_tc)
+ netif_setup_tc(dev, txq);
+
if (txq < dev->real_num_tx_queues)
qdisc_reset_all_tx_gt(dev, txq);
}
@@ -1683,37 +1764,6 @@ void netif_device_attach(struct net_device *dev)
}
EXPORT_SYMBOL(netif_device_attach);
-static bool can_checksum_protocol(unsigned long features, __be16 protocol)
-{
- return ((features & NETIF_F_GEN_CSUM) ||
- ((features & NETIF_F_IP_CSUM) &&
- protocol == htons(ETH_P_IP)) ||
- ((features & NETIF_F_IPV6_CSUM) &&
- protocol == htons(ETH_P_IPV6)) ||
- ((features & NETIF_F_FCOE_CRC) &&
- protocol == htons(ETH_P_FCOE)));
-}
-
-static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
-{
- int features = dev->features;
-
- if (vlan_tx_tag_present(skb))
- features &= dev->vlan_features;
-
- if (can_checksum_protocol(features, skb->protocol))
- return true;
-
- if (skb->protocol == htons(ETH_P_8021Q)) {
- struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
- if (can_checksum_protocol(dev->features & dev->vlan_features,
- veh->h_vlan_encapsulated_proto))
- return true;
- }
-
- return false;
-}
-
/**
* skb_dev_set -- assign a new device to a buffer
* @skb: buffer for the new device
@@ -1761,7 +1811,7 @@ int skb_checksum_help(struct sk_buff *skb)
goto out_set_summed;
}
- offset = skb->csum_start - skb_headroom(skb);
+ offset = skb_checksum_start_offset(skb);
BUG_ON(offset >= skb_headlen(skb));
csum = skb_checksum(skb, offset, skb->len - offset, 0);
@@ -1793,21 +1843,23 @@ EXPORT_SYMBOL(skb_checksum_help);
* It may return NULL if the skb requires no segmentation. This is
* only possible when GSO is used for verifying header integrity.
*/
-struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
+struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features)
{
struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
struct packet_type *ptype;
__be16 type = skb->protocol;
+ int vlan_depth = ETH_HLEN;
int err;
- if (type == htons(ETH_P_8021Q)) {
- struct vlan_ethhdr *veh;
+ while (type == htons(ETH_P_8021Q)) {
+ struct vlan_hdr *vh;
- if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN)))
+ if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
return ERR_PTR(-EINVAL);
- veh = (struct vlan_ethhdr *)skb->data;
- type = veh->h_vlan_encapsulated_proto;
+ vh = (struct vlan_hdr *)(skb->data + vlan_depth);
+ type = vh->h_vlan_encapsulated_proto;
+ vlan_depth += VLAN_HLEN;
}
skb_reset_mac_header(skb);
@@ -1821,8 +1873,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo)
dev->ethtool_ops->get_drvinfo(dev, &info);
- WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d "
- "ip_summed=%d",
+ WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d ip_summed=%d\n",
info.driver, dev ? dev->features : 0L,
skb->sk ? skb->sk->sk_route_caps : 0L,
skb->len, skb->data_len, skb->ip_summed);
@@ -1925,16 +1976,14 @@ static void dev_gso_skb_destructor(struct sk_buff *skb)
/**
* dev_gso_segment - Perform emulated hardware segmentation on skb.
* @skb: buffer to segment
+ * @features: device features as applicable to this skb
*
* This function segments the given skb and stores the list of segments
* in skb->next.
*/
-static int dev_gso_segment(struct sk_buff *skb)
+static int dev_gso_segment(struct sk_buff *skb, int features)
{
- struct net_device *dev = skb->dev;
struct sk_buff *segs;
- int features = dev->features & ~(illegal_highdma(dev, skb) ?
- NETIF_F_SG : 0);
segs = skb_gso_segment(skb, features);
@@ -1971,6 +2020,53 @@ static inline void skb_orphan_try(struct sk_buff *skb)
}
}
+static bool can_checksum_protocol(unsigned long features, __be16 protocol)
+{
+ return ((features & NETIF_F_GEN_CSUM) ||
+ ((features & NETIF_F_V4_CSUM) &&
+ protocol == htons(ETH_P_IP)) ||
+ ((features & NETIF_F_V6_CSUM) &&
+ protocol == htons(ETH_P_IPV6)) ||
+ ((features & NETIF_F_FCOE_CRC) &&
+ protocol == htons(ETH_P_FCOE)));
+}
+
+static u32 harmonize_features(struct sk_buff *skb, __be16 protocol, u32 features)
+{
+ if (!can_checksum_protocol(features, protocol)) {
+ features &= ~NETIF_F_ALL_CSUM;
+ features &= ~NETIF_F_SG;
+ } else if (illegal_highdma(skb->dev, skb)) {
+ features &= ~NETIF_F_SG;
+ }
+
+ return features;
+}
+
+u32 netif_skb_features(struct sk_buff *skb)
+{
+ __be16 protocol = skb->protocol;
+ u32 features = skb->dev->features;
+
+ if (protocol == htons(ETH_P_8021Q)) {
+ struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
+ protocol = veh->h_vlan_encapsulated_proto;
+ } else if (!vlan_tx_tag_present(skb)) {
+ return harmonize_features(skb, protocol, features);
+ }
+
+ features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_TX);
+
+ if (protocol != htons(ETH_P_8021Q)) {
+ return harmonize_features(skb, protocol, features);
+ } else {
+ features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |
+ NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_TX;
+ return harmonize_features(skb, protocol, features);
+ }
+}
+EXPORT_SYMBOL(netif_skb_features);
+
/*
* Returns true if either:
* 1. skb has frag_list and the device doesn't support FRAGLIST, or
@@ -1979,17 +2075,13 @@ static inline void skb_orphan_try(struct sk_buff *skb)
* support DMA from it.
*/
static inline int skb_needs_linearize(struct sk_buff *skb,
- struct net_device *dev)
+ int features)
{
- int features = dev->features;
-
- if (skb->protocol == htons(ETH_P_8021Q) || vlan_tx_tag_present(skb))
- features &= dev->vlan_features;
-
return skb_is_nonlinear(skb) &&
- ((skb_has_frag_list(skb) && !(features & NETIF_F_FRAGLIST)) ||
- (skb_shinfo(skb)->nr_frags && (!(features & NETIF_F_SG) ||
- illegal_highdma(dev, skb))));
+ ((skb_has_frag_list(skb) &&
+ !(features & NETIF_F_FRAGLIST)) ||
+ (skb_shinfo(skb)->nr_frags &&
+ !(features & NETIF_F_SG)));
}
int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
@@ -1999,20 +2091,24 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
int rc = NETDEV_TX_OK;
if (likely(!skb->next)) {
- if (!list_empty(&ptype_all))
- dev_queue_xmit_nit(skb, dev);
+ u32 features;
/*
- * If device doesnt need skb->dst, release it right now while
+ * If device doesn't need skb->dst, release it right now while
* its hot in this cpu cache
*/
if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
skb_dst_drop(skb);
+ if (!list_empty(&ptype_all))
+ dev_queue_xmit_nit(skb, dev);
+
skb_orphan_try(skb);
+ features = netif_skb_features(skb);
+
if (vlan_tx_tag_present(skb) &&
- !(dev->features & NETIF_F_HW_VLAN_TX)) {
+ !(features & NETIF_F_HW_VLAN_TX)) {
skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
if (unlikely(!skb))
goto out;
@@ -2020,13 +2116,13 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
skb->vlan_tci = 0;
}
- if (netif_needs_gso(dev, skb)) {
- if (unlikely(dev_gso_segment(skb)))
+ if (netif_needs_gso(skb, features)) {
+ if (unlikely(dev_gso_segment(skb, features)))
goto out_kfree_skb;
if (skb->next)
goto gso;
} else {
- if (skb_needs_linearize(skb, dev) &&
+ if (skb_needs_linearize(skb, features) &&
__skb_linearize(skb))
goto out_kfree_skb;
@@ -2035,9 +2131,9 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
* checksumming here.
*/
if (skb->ip_summed == CHECKSUM_PARTIAL) {
- skb_set_transport_header(skb, skb->csum_start -
- skb_headroom(skb));
- if (!dev_can_checksum(dev, skb) &&
+ skb_set_transport_header(skb,
+ skb_checksum_start_offset(skb));
+ if (!(features & NETIF_F_ALL_CSUM) &&
skb_checksum_help(skb))
goto out_kfree_skb;
}
@@ -2058,7 +2154,7 @@ gso:
nskb->next = NULL;
/*
- * If device doesnt need nskb->dst, release it right now while
+ * If device doesn't need nskb->dst, release it right now while
* its hot in this cpu cache
*/
if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
@@ -2089,26 +2185,39 @@ out:
static u32 hashrnd __read_mostly;
-u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
+/*
+ * Returns a Tx hash based on the given packet descriptor a Tx queues' number
+ * to be used as a distribution range.
+ */
+u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
+ unsigned int num_tx_queues)
{
u32 hash;
+ u16 qoffset = 0;
+ u16 qcount = num_tx_queues;
if (skb_rx_queue_recorded(skb)) {
hash = skb_get_rx_queue(skb);
- while (unlikely(hash >= dev->real_num_tx_queues))
- hash -= dev->real_num_tx_queues;
+ while (unlikely(hash >= num_tx_queues))
+ hash -= num_tx_queues;
return hash;
}
+ if (dev->num_tc) {
+ u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
+ qoffset = dev->tc_to_txq[tc].offset;
+ qcount = dev->tc_to_txq[tc].count;
+ }
+
if (skb->sk && skb->sk->sk_hash)
hash = skb->sk->sk_hash;
else
hash = (__force u16) skb->protocol ^ skb->rxhash;
hash = jhash_1word(hash, hashrnd);
- return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
+ return (u16) (((u64) hash * qcount) >> 32) + qoffset;
}
-EXPORT_SYMBOL(skb_tx_hash);
+EXPORT_SYMBOL(__skb_tx_hash);
static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
{
@@ -2123,26 +2232,70 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
return queue_index;
}
+static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
+{
+#ifdef CONFIG_XPS
+ struct xps_dev_maps *dev_maps;
+ struct xps_map *map;
+ int queue_index = -1;
+
+ rcu_read_lock();
+ dev_maps = rcu_dereference(dev->xps_maps);
+ if (dev_maps) {
+ map = rcu_dereference(
+ dev_maps->cpu_map[raw_smp_processor_id()]);
+ if (map) {
+ if (map->len == 1)
+ queue_index = map->queues[0];
+ else {
+ u32 hash;
+ if (skb->sk && skb->sk->sk_hash)
+ hash = skb->sk->sk_hash;
+ else
+ hash = (__force u16) skb->protocol ^
+ skb->rxhash;
+ hash = jhash_1word(hash, hashrnd);
+ queue_index = map->queues[
+ ((u64)hash * map->len) >> 32];
+ }
+ if (unlikely(queue_index >= dev->real_num_tx_queues))
+ queue_index = -1;
+ }
+ }
+ rcu_read_unlock();
+
+ return queue_index;
+#else
+ return -1;
+#endif
+}
+
static struct netdev_queue *dev_pick_tx(struct net_device *dev,
struct sk_buff *skb)
{
int queue_index;
const struct net_device_ops *ops = dev->netdev_ops;
- if (ops->ndo_select_queue) {
+ if (dev->real_num_tx_queues == 1)
+ queue_index = 0;
+ else if (ops->ndo_select_queue) {
queue_index = ops->ndo_select_queue(dev, skb);
queue_index = dev_cap_txqueue(dev, queue_index);
} else {
struct sock *sk = skb->sk;
queue_index = sk_tx_queue_get(sk);
- if (queue_index < 0) {
- queue_index = 0;
- if (dev->real_num_tx_queues > 1)
+ if (queue_index < 0 || skb->ooo_okay ||
+ queue_index >= dev->real_num_tx_queues) {
+ int old_index = queue_index;
+
+ queue_index = get_xps_queue(dev, skb);
+ if (queue_index < 0)
queue_index = skb_tx_hash(dev, skb);
- if (sk) {
- struct dst_entry *dst = rcu_dereference_check(sk->sk_dst_cache, 1);
+ if (queue_index != old_index && sk) {
+ struct dst_entry *dst =
+ rcu_dereference_check(sk->sk_dst_cache, 1);
if (dst && skb_dst(skb) == dst)
sk_tx_queue_set(sk, queue_index);
@@ -2159,15 +2312,18 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
struct netdev_queue *txq)
{
spinlock_t *root_lock = qdisc_lock(q);
- bool contended = qdisc_is_running(q);
+ bool contended;
int rc;
+ qdisc_skb_cb(skb)->pkt_len = skb->len;
+ qdisc_calculate_pkt_len(skb, q);
/*
* Heuristic to force contended enqueues to serialize on a
* separate lock before trying to get qdisc main lock.
* This permits __QDISC_STATE_RUNNING owner to get the lock more often
* and dequeue packets faster.
*/
+ contended = qdisc_is_running(q);
if (unlikely(contended))
spin_lock(&q->busylock);
@@ -2184,7 +2340,9 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
*/
if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
skb_dst_force(skb);
- __qdisc_update_bstats(q, skb->len);
+
+ qdisc_bstats_update(q, skb);
+
if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
if (unlikely(contended)) {
spin_unlock(&q->busylock);
@@ -2197,7 +2355,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
rc = NET_XMIT_SUCCESS;
} else {
skb_dst_force(skb);
- rc = qdisc_enqueue_root(skb, q);
+ rc = q->enqueue(skb, q) & NET_XMIT_MASK;
if (qdisc_run_begin(q)) {
if (unlikely(contended)) {
spin_unlock(&q->busylock);
@@ -2213,7 +2371,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
}
static DEFINE_PER_CPU(int, xmit_recursion);
-#define RECURSION_LIMIT 3
+#define RECURSION_LIMIT 10
/**
* dev_queue_xmit - transmit a buffer
@@ -2347,8 +2505,8 @@ static inline void ____napi_schedule(struct softnet_data *sd,
__u32 __skb_get_rxhash(struct sk_buff *skb)
{
int nhoff, hash = 0, poff;
- struct ipv6hdr *ip6;
- struct iphdr *ip;
+ const struct ipv6hdr *ip6;
+ const struct iphdr *ip;
u8 ip_proto;
u32 addr1, addr2, ihl;
union {
@@ -2363,7 +2521,7 @@ __u32 __skb_get_rxhash(struct sk_buff *skb)
if (!pskb_may_pull(skb, sizeof(*ip) + nhoff))
goto done;
- ip = (struct iphdr *) (skb->data + nhoff);
+ ip = (const struct iphdr *) (skb->data + nhoff);
if (ip->frag_off & htons(IP_MF | IP_OFFSET))
ip_proto = 0;
else
@@ -2376,7 +2534,7 @@ __u32 __skb_get_rxhash(struct sk_buff *skb)
if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff))
goto done;
- ip6 = (struct ipv6hdr *) (skb->data + nhoff);
+ ip6 = (const struct ipv6hdr *) (skb->data + nhoff);
ip_proto = ip6->nexthdr;
addr1 = (__force u32) ip6->saddr.s6_addr32[3];
addr2 = (__force u32) ip6->daddr.s6_addr32[3];
@@ -2413,9 +2571,57 @@ EXPORT_SYMBOL(__skb_get_rxhash);
#ifdef CONFIG_RPS
/* One global table that all flow-based protocols share. */
-struct rps_sock_flow_table *rps_sock_flow_table __read_mostly;
+struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
EXPORT_SYMBOL(rps_sock_flow_table);
+static struct rps_dev_flow *
+set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
+ struct rps_dev_flow *rflow, u16 next_cpu)
+{
+ u16 tcpu;
+
+ tcpu = rflow->cpu = next_cpu;
+ if (tcpu != RPS_NO_CPU) {
+#ifdef CONFIG_RFS_ACCEL
+ struct netdev_rx_queue *rxqueue;
+ struct rps_dev_flow_table *flow_table;
+ struct rps_dev_flow *old_rflow;
+ u32 flow_id;
+ u16 rxq_index;
+ int rc;
+
+ /* Should we steer this flow to a different hardware queue? */
+ if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap ||
+ !(dev->features & NETIF_F_NTUPLE))
+ goto out;
+ rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu);
+ if (rxq_index == skb_get_rx_queue(skb))
+ goto out;
+
+ rxqueue = dev->_rx + rxq_index;
+ flow_table = rcu_dereference(rxqueue->rps_flow_table);
+ if (!flow_table)
+ goto out;
+ flow_id = skb->rxhash & flow_table->mask;
+ rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
+ rxq_index, flow_id);
+ if (rc < 0)
+ goto out;
+ old_rflow = rflow;
+ rflow = &flow_table->flows[flow_id];
+ rflow->cpu = next_cpu;
+ rflow->filter = rc;
+ if (old_rflow->filter == rflow->filter)
+ old_rflow->filter = RPS_NO_FILTER;
+ out:
+#endif
+ rflow->last_qtail =
+ per_cpu(softnet_data, tcpu).input_queue_head;
+ }
+
+ return rflow;
+}
+
/*
* get_rps_cpu is called from netif_receive_skb and returns the target
* CPU from the RPS map of the receiving queue for a given skb.
@@ -2425,7 +2631,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
struct rps_dev_flow **rflowp)
{
struct netdev_rx_queue *rxqueue;
- struct rps_map *map = NULL;
+ struct rps_map *map;
struct rps_dev_flow_table *flow_table;
struct rps_sock_flow_table *sock_flow_table;
int cpu = -1;
@@ -2444,15 +2650,16 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
} else
rxqueue = dev->_rx;
- if (rxqueue->rps_map) {
- map = rcu_dereference(rxqueue->rps_map);
- if (map && map->len == 1) {
+ map = rcu_dereference(rxqueue->rps_map);
+ if (map) {
+ if (map->len == 1 &&
+ !rcu_dereference_raw(rxqueue->rps_flow_table)) {
tcpu = map->cpus[0];
if (cpu_online(tcpu))
cpu = tcpu;
goto done;
}
- } else if (!rxqueue->rps_flow_table) {
+ } else if (!rcu_dereference_raw(rxqueue->rps_flow_table)) {
goto done;
}
@@ -2486,12 +2693,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
if (unlikely(tcpu != next_cpu) &&
(tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
((int)(per_cpu(softnet_data, tcpu).input_queue_head -
- rflow->last_qtail)) >= 0)) {
- tcpu = rflow->cpu = next_cpu;
- if (tcpu != RPS_NO_CPU)
- rflow->last_qtail = per_cpu(softnet_data,
- tcpu).input_queue_head;
- }
+ rflow->last_qtail)) >= 0))
+ rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
+
if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
*rflowp = rflow;
cpu = tcpu;
@@ -2512,6 +2716,46 @@ done:
return cpu;
}
+#ifdef CONFIG_RFS_ACCEL
+
+/**
+ * rps_may_expire_flow - check whether an RFS hardware filter may be removed
+ * @dev: Device on which the filter was set
+ * @rxq_index: RX queue index
+ * @flow_id: Flow ID passed to ndo_rx_flow_steer()
+ * @filter_id: Filter ID returned by ndo_rx_flow_steer()
+ *
+ * Drivers that implement ndo_rx_flow_steer() should periodically call
+ * this function for each installed filter and remove the filters for
+ * which it returns %true.
+ */
+bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
+ u32 flow_id, u16 filter_id)
+{
+ struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
+ struct rps_dev_flow_table *flow_table;
+ struct rps_dev_flow *rflow;
+ bool expire = true;
+ int cpu;
+
+ rcu_read_lock();
+ flow_table = rcu_dereference(rxqueue->rps_flow_table);
+ if (flow_table && flow_id <= flow_table->mask) {
+ rflow = &flow_table->flows[flow_id];
+ cpu = ACCESS_ONCE(rflow->cpu);
+ if (rflow->filter == filter_id && cpu != RPS_NO_CPU &&
+ ((int)(per_cpu(softnet_data, cpu).input_queue_head -
+ rflow->last_qtail) <
+ (int)(10 * flow_table->mask)))
+ expire = false;
+ }
+ rcu_read_unlock();
+ return expire;
+}
+EXPORT_SYMBOL(rps_may_expire_flow);
+
+#endif /* CONFIG_RFS_ACCEL */
+
/* Called from hardirq (IPI) context */
static void rps_trigger_softirq(void *data)
{
@@ -2716,14 +2960,6 @@ static void net_tx_action(struct softirq_action *h)
}
}
-static inline int deliver_skb(struct sk_buff *skb,
- struct packet_type *pt_prev,
- struct net_device *orig_dev)
-{
- atomic_inc(&skb->users);
- return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
-}
-
#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
(defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
/* This hook is defined here for ATM LANE */
@@ -2737,8 +2973,8 @@ EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
* when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
* a compare and 2 stores extra right now if we dont have it on
* but have CONFIG_NET_CLS_ACT
- * NOTE: This doesnt stop any functionality; if you dont have
- * the ingress scheduler, you just cant add policies on ingress.
+ * NOTE: This doesn't stop any functionality; if you dont have
+ * the ingress scheduler, you just can't add policies on ingress.
*
*/
static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
@@ -2807,6 +3043,8 @@ out:
* on a failure.
*
* The caller must hold the rtnl_mutex.
+ *
+ * For a general description of rx_handler, see enum rx_handler_result.
*/
int netdev_rx_handler_register(struct net_device *dev,
rx_handler_func_t *rx_handler,
@@ -2841,64 +3079,13 @@ void netdev_rx_handler_unregister(struct net_device *dev)
}
EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
-static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
- struct net_device *master)
-{
- if (skb->pkt_type == PACKET_HOST) {
- u16 *dest = (u16 *) eth_hdr(skb)->h_dest;
-
- memcpy(dest, master->dev_addr, ETH_ALEN);
- }
-}
-
-/* On bonding slaves other than the currently active slave, suppress
- * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and
- * ARP on active-backup slaves with arp_validate enabled.
- */
-int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master)
-{
- struct net_device *dev = skb->dev;
-
- if (master->priv_flags & IFF_MASTER_ARPMON)
- dev->last_rx = jiffies;
-
- if ((master->priv_flags & IFF_MASTER_ALB) &&
- (master->priv_flags & IFF_BRIDGE_PORT)) {
- /* Do address unmangle. The local destination address
- * will be always the one master has. Provides the right
- * functionality in a bridge.
- */
- skb_bond_set_mac_by_master(skb, master);
- }
-
- if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
- if ((dev->priv_flags & IFF_SLAVE_NEEDARP) &&
- skb->protocol == __cpu_to_be16(ETH_P_ARP))
- return 0;
-
- if (master->priv_flags & IFF_MASTER_ALB) {
- if (skb->pkt_type != PACKET_BROADCAST &&
- skb->pkt_type != PACKET_MULTICAST)
- return 0;
- }
- if (master->priv_flags & IFF_MASTER_8023AD &&
- skb->protocol == __cpu_to_be16(ETH_P_SLOW))
- return 0;
-
- return 1;
- }
- return 0;
-}
-EXPORT_SYMBOL(__skb_bond_should_drop);
-
static int __netif_receive_skb(struct sk_buff *skb)
{
struct packet_type *ptype, *pt_prev;
rx_handler_func_t *rx_handler;
struct net_device *orig_dev;
- struct net_device *master;
- struct net_device *null_or_orig;
- struct net_device *orig_or_bond;
+ struct net_device *null_or_dev;
+ bool deliver_exact = false;
int ret = NET_RX_DROP;
__be16 type;
@@ -2913,28 +3100,8 @@ static int __netif_receive_skb(struct sk_buff *skb)
if (!skb->skb_iif)
skb->skb_iif = skb->dev->ifindex;
-
- /*
- * bonding note: skbs received on inactive slaves should only
- * be delivered to pkt handlers that are exact matches. Also
- * the deliver_no_wcard flag will be set. If packet handlers
- * are sensitive to duplicate packets these skbs will need to
- * be dropped at the handler.
- */
- null_or_orig = NULL;
orig_dev = skb->dev;
- master = ACCESS_ONCE(orig_dev->master);
- if (skb->deliver_no_wcard)
- null_or_orig = orig_dev;
- else if (master) {
- if (skb_bond_should_drop(skb, master)) {
- skb->deliver_no_wcard = 1;
- null_or_orig = orig_dev; /* deliver only exact match */
- } else
- skb->dev = master;
- }
- __this_cpu_inc(softnet_data.processed);
skb_reset_network_header(skb);
skb_reset_transport_header(skb);
skb->mac_len = skb->network_header - skb->mac_header;
@@ -2943,6 +3110,16 @@ static int __netif_receive_skb(struct sk_buff *skb)
rcu_read_lock();
+another_round:
+
+ __this_cpu_inc(softnet_data.processed);
+
+ if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
+ skb = vlan_untag(skb);
+ if (unlikely(!skb))
+ goto out;
+ }
+
#ifdef CONFIG_NET_CLS_ACT
if (skb->tc_verd & TC_NCLS) {
skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
@@ -2951,8 +3128,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
#endif
list_for_each_entry_rcu(ptype, &ptype_all, list) {
- if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
- ptype->dev == orig_dev) {
+ if (!ptype->dev || ptype->dev == skb->dev) {
if (pt_prev)
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
@@ -2966,16 +3142,24 @@ static int __netif_receive_skb(struct sk_buff *skb)
ncls:
#endif
- /* Handle special case of bridge or macvlan */
rx_handler = rcu_dereference(skb->dev->rx_handler);
if (rx_handler) {
if (pt_prev) {
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = NULL;
}
- skb = rx_handler(skb);
- if (!skb)
+ switch (rx_handler(&skb)) {
+ case RX_HANDLER_CONSUMED:
goto out;
+ case RX_HANDLER_ANOTHER:
+ goto another_round;
+ case RX_HANDLER_EXACT:
+ deliver_exact = true;
+ case RX_HANDLER_PASS:
+ break;
+ default:
+ BUG();
+ }
}
if (vlan_tx_tag_present(skb)) {
@@ -2983,31 +3167,22 @@ ncls:
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = NULL;
}
- if (vlan_hwaccel_do_receive(&skb)) {
+ if (vlan_do_receive(&skb)) {
ret = __netif_receive_skb(skb);
goto out;
} else if (unlikely(!skb))
goto out;
}
- /*
- * Make sure frames received on VLAN interfaces stacked on
- * bonding interfaces still make their way to any base bonding
- * device that may have registered for a specific ptype. The
- * handler may have to adjust skb->dev and orig_dev.
- */
- orig_or_bond = orig_dev;
- if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) &&
- (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) {
- orig_or_bond = vlan_dev_real_dev(skb->dev);
- }
+ /* deliver only exact match when indicated */
+ null_or_dev = deliver_exact ? skb->dev : NULL;
type = skb->protocol;
list_for_each_entry_rcu(ptype,
&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
- if (ptype->type == type && (ptype->dev == null_or_orig ||
- ptype->dev == skb->dev || ptype->dev == orig_dev ||
- ptype->dev == orig_or_bond)) {
+ if (ptype->type == type &&
+ (ptype->dev == null_or_dev || ptype->dev == skb->dev ||
+ ptype->dev == orig_dev)) {
if (pt_prev)
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
@@ -3315,6 +3490,8 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
__skb_pull(skb, skb_headlen(skb));
skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
skb->vlan_tci = 0;
+ skb->dev = napi->dev;
+ skb->skb_iif = 0;
napi->skb = skb;
}
@@ -3611,7 +3788,7 @@ static void net_rx_action(struct softirq_action *h)
* with netpoll's poll_napi(). Only the entity which
* obtains the lock and sees NAPI_STATE_SCHED set will
* actually make the ->poll() call. Therefore we avoid
- * accidently calling ->poll() when NAPI is not scheduled.
+ * accidentally calling ->poll() when NAPI is not scheduled.
*/
work = 0;
if (test_bit(NAPI_STATE_SCHED, &n->state)) {
@@ -3802,12 +3979,15 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos)
void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct net_device *dev = (v == SEQ_START_TOKEN) ?
- first_net_device(seq_file_net(seq)) :
- next_net_device((struct net_device *)v);
+ struct net_device *dev = v;
+
+ if (v == SEQ_START_TOKEN)
+ dev = first_net_device_rcu(seq_file_net(seq));
+ else
+ dev = next_net_device_rcu(dev);
++*pos;
- return rcu_dereference(dev);
+ return dev;
}
void dev_seq_stop(struct seq_file *seq, void *v)
@@ -4091,15 +4271,14 @@ static int __init dev_proc_init(void)
/**
- * netdev_set_master - set up master/slave pair
+ * netdev_set_master - set up master pointer
* @slave: slave device
* @master: new master device
*
* Changes the master device of the slave. Pass %NULL to break the
* bonding. The caller must hold the RTNL semaphore. On a failure
* a negative errno code is returned. On success the reference counts
- * are adjusted, %RTM_NEWLINK is sent to the routing socket and the
- * function returns zero.
+ * are adjusted and the function returns zero.
*/
int netdev_set_master(struct net_device *slave, struct net_device *master)
{
@@ -4119,6 +4298,29 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
synchronize_net();
dev_put(old);
}
+ return 0;
+}
+EXPORT_SYMBOL(netdev_set_master);
+
+/**
+ * netdev_set_bond_master - set up bonding master/slave pair
+ * @slave: slave device
+ * @master: new master device
+ *
+ * Changes the master device of the slave. Pass %NULL to break the
+ * bonding. The caller must hold the RTNL semaphore. On a failure
+ * a negative errno code is returned. On success %RTM_NEWLINK is sent
+ * to the routing socket and the function returns zero.
+ */
+int netdev_set_bond_master(struct net_device *slave, struct net_device *master)
+{
+ int err;
+
+ ASSERT_RTNL();
+
+ err = netdev_set_master(slave, master);
+ if (err)
+ return err;
if (master)
slave->flags |= IFF_SLAVE;
else
@@ -4127,7 +4329,7 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
return 0;
}
-EXPORT_SYMBOL(netdev_set_master);
+EXPORT_SYMBOL(netdev_set_bond_master);
static void dev_change_rx_flags(struct net_device *dev, int flags)
{
@@ -4296,6 +4498,30 @@ void dev_set_rx_mode(struct net_device *dev)
}
/**
+ * dev_ethtool_get_settings - call device's ethtool_ops::get_settings()
+ * @dev: device
+ * @cmd: memory area for ethtool_ops::get_settings() result
+ *
+ * The cmd arg is initialized properly (cleared and
+ * ethtool_cmd::cmd field set to ETHTOOL_GSET).
+ *
+ * Return device's ethtool_ops::get_settings() result value or
+ * -EOPNOTSUPP when device doesn't expose
+ * ethtool_ops::get_settings() operation.
+ */
+int dev_ethtool_get_settings(struct net_device *dev,
+ struct ethtool_cmd *cmd)
+{
+ if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings)
+ return -EOPNOTSUPP;
+
+ memset(cmd, 0, sizeof(struct ethtool_cmd));
+ cmd->cmd = ETHTOOL_GSET;
+ return dev->ethtool_ops->get_settings(dev, cmd);
+}
+EXPORT_SYMBOL(dev_ethtool_get_settings);
+
+/**
* dev_get_flags - get flags reported to userspace
* @dev: device
*
@@ -4464,6 +4690,17 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
EXPORT_SYMBOL(dev_set_mtu);
/**
+ * dev_set_group - Change group this device belongs to
+ * @dev: device
+ * @new_group: group this device should belong to
+ */
+void dev_set_group(struct net_device *dev, int new_group)
+{
+ dev->group = new_group;
+}
+EXPORT_SYMBOL(dev_set_group);
+
+/**
* dev_set_mac_address - Change Media Access Control Address
* @dev: device
* @sa: new address
@@ -4548,7 +4785,7 @@ static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cm
* is never reached
*/
WARN_ON(1);
- err = -EINVAL;
+ err = -ENOTTY;
break;
}
@@ -4816,7 +5053,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
/* Set the per device memory buffer space.
* Not applicable in our case */
case SIOCSIFLINK:
- return -EINVAL;
+ return -ENOTTY;
/*
* Unknown or private ioctl.
@@ -4837,7 +5074,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
/* Take care of Wireless Extensions */
if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
return wext_handle_ioctl(net, &ifr, cmd, arg);
- return -EINVAL;
+ return -ENOTTY;
}
}
@@ -4889,12 +5126,14 @@ static void rollback_registered_many(struct list_head *head)
list_del(&dev->unreg_list);
continue;
}
-
+ dev->dismantle = true;
BUG_ON(dev->reg_state != NETREG_REGISTERED);
+ }
- /* If device is running, close it first. */
- dev_close(dev);
+ /* If device is running, close it first. */
+ dev_close_many(head);
+ list_for_each_entry(dev, head, unreg_list) {
/* And unlink it from device chain. */
unlist_netdevice(dev);
@@ -4949,40 +5188,62 @@ static void rollback_registered(struct net_device *dev)
list_add(&dev->unreg_list, &single);
rollback_registered_many(&single);
+ list_del(&single);
}
-unsigned long netdev_fix_features(unsigned long features, const char *name)
+u32 netdev_fix_features(struct net_device *dev, u32 features)
{
+ /* Fix illegal checksum combinations */
+ if ((features & NETIF_F_HW_CSUM) &&
+ (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
+ netdev_warn(dev, "mixed HW and IP checksum settings.\n");
+ features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
+ }
+
+ if ((features & NETIF_F_NO_CSUM) &&
+ (features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
+ netdev_warn(dev, "mixed no checksumming and other settings.\n");
+ features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
+ }
+
/* Fix illegal SG+CSUM combinations. */
if ((features & NETIF_F_SG) &&
!(features & NETIF_F_ALL_CSUM)) {
- if (name)
- printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no "
- "checksum feature.\n", name);
+ netdev_dbg(dev,
+ "Dropping NETIF_F_SG since no checksum feature.\n");
features &= ~NETIF_F_SG;
}
/* TSO requires that SG is present as well. */
- if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) {
- if (name)
- printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no "
- "SG feature.\n", name);
- features &= ~NETIF_F_TSO;
+ if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) {
+ netdev_dbg(dev, "Dropping TSO features since no SG feature.\n");
+ features &= ~NETIF_F_ALL_TSO;
+ }
+
+ /* TSO ECN requires that TSO is present as well. */
+ if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)
+ features &= ~NETIF_F_TSO_ECN;
+
+ /* Software GSO depends on SG. */
+ if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) {
+ netdev_dbg(dev, "Dropping NETIF_F_GSO since no SG feature.\n");
+ features &= ~NETIF_F_GSO;
}
+ /* UFO needs SG and checksumming */
if (features & NETIF_F_UFO) {
- if (!(features & NETIF_F_GEN_CSUM)) {
- if (name)
- printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
- "since no NETIF_F_HW_CSUM feature.\n",
- name);
+ /* maybe split UFO into V4 and V6? */
+ if (!((features & NETIF_F_GEN_CSUM) ||
+ (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
+ == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
+ netdev_dbg(dev,
+ "Dropping NETIF_F_UFO since no checksum offload features.\n");
features &= ~NETIF_F_UFO;
}
if (!(features & NETIF_F_SG)) {
- if (name)
- printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
- "since no NETIF_F_SG feature.\n", name);
+ netdev_dbg(dev,
+ "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n");
features &= ~NETIF_F_UFO;
}
}
@@ -4991,6 +5252,75 @@ unsigned long netdev_fix_features(unsigned long features, const char *name)
}
EXPORT_SYMBOL(netdev_fix_features);
+int __netdev_update_features(struct net_device *dev)
+{
+ u32 features;
+ int err = 0;
+
+ ASSERT_RTNL();
+
+ features = netdev_get_wanted_features(dev);
+
+ if (dev->netdev_ops->ndo_fix_features)
+ features = dev->netdev_ops->ndo_fix_features(dev, features);
+
+ /* driver might be less strict about feature dependencies */
+ features = netdev_fix_features(dev, features);
+
+ if (dev->features == features)
+ return 0;
+
+ netdev_dbg(dev, "Features changed: 0x%08x -> 0x%08x\n",
+ dev->features, features);
+
+ if (dev->netdev_ops->ndo_set_features)
+ err = dev->netdev_ops->ndo_set_features(dev, features);
+
+ if (unlikely(err < 0)) {
+ netdev_err(dev,
+ "set_features() failed (%d); wanted 0x%08x, left 0x%08x\n",
+ err, features, dev->features);
+ return -1;
+ }
+
+ if (!err)
+ dev->features = features;
+
+ return 1;
+}
+
+/**
+ * netdev_update_features - recalculate device features
+ * @dev: the device to check
+ *
+ * Recalculate dev->features set and send notifications if it
+ * has changed. Should be called after driver or hardware dependent
+ * conditions might have changed that influence the features.
+ */
+void netdev_update_features(struct net_device *dev)
+{
+ if (__netdev_update_features(dev))
+ netdev_features_change(dev);
+}
+EXPORT_SYMBOL(netdev_update_features);
+
+/**
+ * netdev_change_features - recalculate device features
+ * @dev: the device to check
+ *
+ * Recalculate dev->features set and send notifications even
+ * if they have not changed. Should be called instead of
+ * netdev_update_features() if also dev->vlan_features might
+ * have changed to allow the changes to be propagated to stacked
+ * VLAN devices.
+ */
+void netdev_change_features(struct net_device *dev)
+{
+ __netdev_update_features(dev);
+ netdev_features_change(dev);
+}
+EXPORT_SYMBOL(netdev_change_features);
+
/**
* netif_stacked_transfer_operstate - transfer operstate
* @rootdev: the root or lower level device to transfer state from
@@ -5018,9 +5348,9 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev,
}
EXPORT_SYMBOL(netif_stacked_transfer_operstate);
+#ifdef CONFIG_RPS
static int netif_alloc_rx_queues(struct net_device *dev)
{
-#ifdef CONFIG_RPS
unsigned int i, count = dev->num_rx_queues;
struct netdev_rx_queue *rx;
@@ -5033,15 +5363,22 @@ static int netif_alloc_rx_queues(struct net_device *dev)
}
dev->_rx = rx;
- /*
- * Set a pointer to first element in the array which holds the
- * reference count.
- */
for (i = 0; i < count; i++)
- rx[i].first = rx;
-#endif
+ rx[i].dev = dev;
return 0;
}
+#endif
+
+static void netdev_init_one_queue(struct net_device *dev,
+ struct netdev_queue *queue, void *_unused)
+{
+ /* Initialize queue lock */
+ spin_lock_init(&queue->_xmit_lock);
+ netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
+ queue->xmit_lock_owner = -1;
+ netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
+ queue->dev = dev;
+}
static int netif_alloc_netdev_queues(struct net_device *dev)
{
@@ -5057,25 +5394,11 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
return -ENOMEM;
}
dev->_tx = tx;
- return 0;
-}
-static void netdev_init_one_queue(struct net_device *dev,
- struct netdev_queue *queue,
- void *_unused)
-{
- queue->dev = dev;
-
- /* Initialize queue lock */
- spin_lock_init(&queue->_xmit_lock);
- netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
- queue->xmit_lock_owner = -1;
-}
-
-static void netdev_init_queues(struct net_device *dev)
-{
netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
spin_lock_init(&dev->tx_global_lock);
+
+ return 0;
}
/**
@@ -5114,16 +5437,10 @@ int register_netdevice(struct net_device *dev)
dev->iflink = -1;
- ret = netif_alloc_rx_queues(dev);
- if (ret)
- goto out;
-
- ret = netif_alloc_netdev_queues(dev);
- if (ret)
+ ret = dev_get_valid_name(dev, dev->name);
+ if (ret < 0)
goto out;
- netdev_init_queues(dev);
-
/* Init, if this function is available */
if (dev->netdev_ops->ndo_init) {
ret = dev->netdev_ops->ndo_init(dev);
@@ -5134,35 +5451,25 @@ int register_netdevice(struct net_device *dev)
}
}
- ret = dev_get_valid_name(dev, dev->name, 0);
- if (ret)
- goto err_uninit;
-
dev->ifindex = dev_new_index(net);
if (dev->iflink == -1)
dev->iflink = dev->ifindex;
- /* Fix illegal checksum combinations */
- if ((dev->features & NETIF_F_HW_CSUM) &&
- (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
- printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
- dev->name);
- dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
- }
+ /* Transfer changeable features to wanted_features and enable
+ * software offloads (GSO and GRO).
+ */
+ dev->hw_features |= NETIF_F_SOFT_FEATURES;
+ dev->features |= NETIF_F_SOFT_FEATURES;
+ dev->wanted_features = dev->features & dev->hw_features;
- if ((dev->features & NETIF_F_NO_CSUM) &&
- (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
- printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
- dev->name);
- dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
+ /* Turn on no cache copy if HW is doing checksum */
+ dev->hw_features |= NETIF_F_NOCACHE_COPY;
+ if ((dev->features & NETIF_F_ALL_CSUM) &&
+ !(dev->features & NETIF_F_NO_CSUM)) {
+ dev->wanted_features |= NETIF_F_NOCACHE_COPY;
+ dev->features |= NETIF_F_NOCACHE_COPY;
}
- dev->features = netdev_fix_features(dev->features, dev->name);
-
- /* Enable software GSO if SG is supported. */
- if (dev->features & NETIF_F_SG)
- dev->features |= NETIF_F_GSO;
-
/* Enable GRO and NETIF_F_HIGHDMA for vlans by default,
* vlan_dev_init() will do the dev->features check, so these features
* are enabled only if supported by underlying device.
@@ -5179,6 +5486,8 @@ int register_netdevice(struct net_device *dev)
goto err_uninit;
dev->reg_state = NETREG_REGISTERED;
+ __netdev_update_features(dev);
+
/*
* Default initial state at registry is that the
* device is present.
@@ -5274,19 +5583,7 @@ int register_netdev(struct net_device *dev)
int err;
rtnl_lock();
-
- /*
- * If the name is a format string the caller wants us to do a
- * name allocation.
- */
- if (strchr(dev->name, '%')) {
- err = dev_alloc_name(dev, dev->name);
- if (err < 0)
- goto out;
- }
-
err = register_netdevice(dev);
-out:
rtnl_unlock();
return err;
}
@@ -5416,7 +5713,7 @@ void netdev_run_todo(void)
/* paranoia */
BUG_ON(netdev_refcnt_read(dev));
WARN_ON(rcu_dereference_raw(dev->ip_ptr));
- WARN_ON(dev->ip6_ptr);
+ WARN_ON(rcu_dereference_raw(dev->ip6_ptr));
WARN_ON(dev->dn_ptr);
if (dev->destructor)
@@ -5427,34 +5724,6 @@ void netdev_run_todo(void)
}
}
-/**
- * dev_txq_stats_fold - fold tx_queues stats
- * @dev: device to get statistics from
- * @stats: struct rtnl_link_stats64 to hold results
- */
-void dev_txq_stats_fold(const struct net_device *dev,
- struct rtnl_link_stats64 *stats)
-{
- u64 tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
- unsigned int i;
- struct netdev_queue *txq;
-
- for (i = 0; i < dev->num_tx_queues; i++) {
- txq = netdev_get_tx_queue(dev, i);
- spin_lock_bh(&txq->_xmit_lock);
- tx_bytes += txq->tx_bytes;
- tx_packets += txq->tx_packets;
- tx_dropped += txq->tx_dropped;
- spin_unlock_bh(&txq->_xmit_lock);
- }
- if (tx_bytes || tx_packets || tx_dropped) {
- stats->tx_bytes = tx_bytes;
- stats->tx_packets = tx_packets;
- stats->tx_dropped = tx_dropped;
- }
-}
-EXPORT_SYMBOL(dev_txq_stats_fold);
-
/* Convert net_device_stats to rtnl_link_stats64. They have the same
* fields in the same order, with only the type differing.
*/
@@ -5498,7 +5767,6 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
} else {
netdev_stats_to_stats64(storage, &dev->stats);
- dev_txq_stats_fold(dev, storage);
}
storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
return storage;
@@ -5524,18 +5792,20 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
}
/**
- * alloc_netdev_mq - allocate network device
+ * alloc_netdev_mqs - allocate network device
* @sizeof_priv: size of private data to allocate space for
* @name: device name format string
* @setup: callback to initialize device
- * @queue_count: the number of subqueues to allocate
+ * @txqs: the number of TX subqueues to allocate
+ * @rxqs: the number of RX subqueues to allocate
*
* Allocates a struct net_device with private data area for driver use
* and performs basic initialization. Also allocates subquue structs
- * for each queue on the device at the end of the netdevice.
+ * for each queue on the device.
*/
-struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
- void (*setup)(struct net_device *), unsigned int queue_count)
+struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
+ void (*setup)(struct net_device *),
+ unsigned int txqs, unsigned int rxqs)
{
struct net_device *dev;
size_t alloc_size;
@@ -5543,12 +5813,20 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
BUG_ON(strlen(name) >= sizeof(dev->name));
- if (queue_count < 1) {
+ if (txqs < 1) {
pr_err("alloc_netdev: Unable to allocate device "
"with zero queues.\n");
return NULL;
}
+#ifdef CONFIG_RPS
+ if (rxqs < 1) {
+ pr_err("alloc_netdev: Unable to allocate device "
+ "with zero RX queues.\n");
+ return NULL;
+ }
+#endif
+
alloc_size = sizeof(struct net_device);
if (sizeof_priv) {
/* ensure 32-byte alignment of private area */
@@ -5579,14 +5857,6 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
dev_net_set(dev, &init_net);
- dev->num_tx_queues = queue_count;
- dev->real_num_tx_queues = queue_count;
-
-#ifdef CONFIG_RPS
- dev->num_rx_queues = queue_count;
- dev->real_num_rx_queues = queue_count;
-#endif
-
dev->gso_max_size = GSO_MAX_SIZE;
INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list);
@@ -5596,16 +5866,39 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
INIT_LIST_HEAD(&dev->link_watch_list);
dev->priv_flags = IFF_XMIT_DST_RELEASE;
setup(dev);
+
+ dev->num_tx_queues = txqs;
+ dev->real_num_tx_queues = txqs;
+ if (netif_alloc_netdev_queues(dev))
+ goto free_all;
+
+#ifdef CONFIG_RPS
+ dev->num_rx_queues = rxqs;
+ dev->real_num_rx_queues = rxqs;
+ if (netif_alloc_rx_queues(dev))
+ goto free_all;
+#endif
+
strcpy(dev->name, name);
+ dev->group = INIT_NETDEV_GROUP;
return dev;
+free_all:
+ free_netdev(dev);
+ return NULL;
+
free_pcpu:
free_percpu(dev->pcpu_refcnt);
+ kfree(dev->_tx);
+#ifdef CONFIG_RPS
+ kfree(dev->_rx);
+#endif
+
free_p:
kfree(p);
return NULL;
}
-EXPORT_SYMBOL(alloc_netdev_mq);
+EXPORT_SYMBOL(alloc_netdev_mqs);
/**
* free_netdev - free network device
@@ -5622,6 +5915,9 @@ void free_netdev(struct net_device *dev)
release_net(dev_net(dev));
kfree(dev->_tx);
+#ifdef CONFIG_RPS
+ kfree(dev->_rx);
+#endif
kfree(rcu_dereference_raw(dev->ingress_queue));
@@ -5769,7 +6065,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
/* We get here if we can't use the current device name */
if (!pat)
goto out;
- if (dev_get_valid_name(dev, pat, 1))
+ if (dev_get_valid_name(dev, pat) < 0)
goto out;
}
@@ -5899,32 +6195,22 @@ static int dev_cpu_callback(struct notifier_block *nfb,
* @one to the master device with current feature set @all. Will not
* enable anything that is off in @mask. Returns the new feature set.
*/
-unsigned long netdev_increment_features(unsigned long all, unsigned long one,
- unsigned long mask)
+u32 netdev_increment_features(u32 all, u32 one, u32 mask)
{
- /* If device needs checksumming, downgrade to it. */
- if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
- all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM);
- else if (mask & NETIF_F_ALL_CSUM) {
- /* If one device supports v4/v6 checksumming, set for all. */
- if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) &&
- !(all & NETIF_F_GEN_CSUM)) {
- all &= ~NETIF_F_ALL_CSUM;
- all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
- }
+ if (mask & NETIF_F_GEN_CSUM)
+ mask |= NETIF_F_ALL_CSUM;
+ mask |= NETIF_F_VLAN_CHALLENGED;
- /* If one device supports hw checksumming, set for all. */
- if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) {
- all &= ~NETIF_F_ALL_CSUM;
- all |= NETIF_F_HW_CSUM;
- }
- }
+ all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask;
+ all &= one | ~NETIF_F_ALL_FOR_ALL;
- one |= NETIF_F_ALL_CSUM;
+ /* If device needs checksumming, downgrade to it. */
+ if (all & (NETIF_F_ALL_CSUM & ~NETIF_F_NO_CSUM))
+ all &= ~NETIF_F_NO_CSUM;
- one |= all & NETIF_F_ONE_FOR_ALL;
- all &= one | NETIF_F_LLTX | NETIF_F_GSO | NETIF_F_UFO;
- all |= one & mask & NETIF_F_ONE_FOR_ALL;
+ /* If one device supports hw checksumming, set for all. */
+ if (all & NETIF_F_GEN_CSUM)
+ all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);
return all;
}
@@ -6085,7 +6371,7 @@ static void __net_exit default_device_exit(struct net *net)
if (dev->rtnl_link_ops)
continue;
- /* Push remaing network devices to init_net */
+ /* Push remaining network devices to init_net */
snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
err = dev_change_net_namespace(dev, &init_net, fb_name);
if (err) {
@@ -6100,7 +6386,7 @@ static void __net_exit default_device_exit(struct net *net)
static void __net_exit default_device_exit_batch(struct list_head *net_list)
{
/* At exit all network devices most be removed from a network
- * namespace. Do this in the reverse order of registeration.
+ * namespace. Do this in the reverse order of registration.
* Do this across as many network namespaces as possible to
* improve batching efficiency.
*/
@@ -6118,6 +6404,7 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
}
}
unregister_netdevice_many(&dev_kill_list);
+ list_del(&dev_kill_list);
rtnl_unlock();
}
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 508f9c18992f..e2e66939ed00 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -68,14 +68,6 @@ static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
return __hw_addr_add_ex(list, addr, addr_len, addr_type, false);
}
-static void ha_rcu_free(struct rcu_head *head)
-{
- struct netdev_hw_addr *ha;
-
- ha = container_of(head, struct netdev_hw_addr, rcu_head);
- kfree(ha);
-}
-
static int __hw_addr_del_ex(struct netdev_hw_addr_list *list,
unsigned char *addr, int addr_len,
unsigned char addr_type, bool global)
@@ -94,7 +86,7 @@ static int __hw_addr_del_ex(struct netdev_hw_addr_list *list,
if (--ha->refcount)
return 0;
list_del_rcu(&ha->list);
- call_rcu(&ha->rcu_head, ha_rcu_free);
+ kfree_rcu(ha, rcu_head);
list->count--;
return 0;
}
@@ -144,7 +136,7 @@ void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
list_for_each_entry(ha, &from_list->list, list) {
type = addr_type ? addr_type : ha->type;
- __hw_addr_del(to_list, ha->addr, addr_len, addr_type);
+ __hw_addr_del(to_list, ha->addr, addr_len, type);
}
}
EXPORT_SYMBOL(__hw_addr_del_multiple);
@@ -197,7 +189,7 @@ void __hw_addr_flush(struct netdev_hw_addr_list *list)
list_for_each_entry_safe(ha, tmp, &list->list, list) {
list_del_rcu(&ha->list);
- call_rcu(&ha->rcu_head, ha_rcu_free);
+ kfree_rcu(ha, rcu_head);
}
list->count = 0;
}
@@ -357,8 +349,8 @@ EXPORT_SYMBOL(dev_addr_add_multiple);
/**
* dev_addr_del_multiple - Delete device addresses by another device
* @to_dev: device where the addresses will be deleted
- * @from_dev: device by which addresses the addresses will be deleted
- * @addr_type: address type - 0 means type will used from from_dev
+ * @from_dev: device supplying the addresses to be deleted
+ * @addr_type: address type - 0 means type will be used from from_dev
*
* Deletes addresses in to device by the list of addresses in from device.
*
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 36e603c78ce9..7f36b38e060f 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -207,14 +207,6 @@ static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi)
rcu_read_unlock();
}
-
-static void free_dm_hw_stat(struct rcu_head *head)
-{
- struct dm_hw_stat_delta *n;
- n = container_of(head, struct dm_hw_stat_delta, rcu);
- kfree(n);
-}
-
static int set_all_monitor_traces(int state)
{
int rc = 0;
@@ -245,7 +237,7 @@ static int set_all_monitor_traces(int state)
list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) {
if (new_stat->dev == NULL) {
list_del_rcu(&new_stat->list);
- call_rcu(&new_stat->rcu, free_dm_hw_stat);
+ kfree_rcu(new_stat, rcu);
}
}
break;
@@ -314,7 +306,7 @@ static int dropmon_net_event(struct notifier_block *ev_block,
new_stat->dev = NULL;
if (trace_state == TRACE_OFF) {
list_del_rcu(&new_stat->list);
- call_rcu(&new_stat->rcu, free_dm_hw_stat);
+ kfree_rcu(new_stat, rcu);
break;
}
}
@@ -350,7 +342,7 @@ static int __init init_net_drop_monitor(void)
struct per_cpu_dm_data *data;
int cpu, rc;
- printk(KERN_INFO "Initalizing network drop monitor service\n");
+ printk(KERN_INFO "Initializing network drop monitor service\n");
if (sizeof(void *) > 8) {
printk(KERN_ERR "Unable to store program counters on this arch, Drop monitor failed\n");
diff --git a/net/core/dst.c b/net/core/dst.c
index 8abe628b79f1..81a4fa1c95ed 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -19,6 +19,7 @@
#include <linux/types.h>
#include <net/net_namespace.h>
#include <linux/sched.h>
+#include <linux/prefetch.h>
#include <net/dst.h>
@@ -33,9 +34,6 @@
* 3) This list is guarded by a mutex,
* so that the gc_task and dst_dev_event() can be synchronized.
*/
-#if RT_CACHE_DEBUG >= 2
-static atomic_t dst_total = ATOMIC_INIT(0);
-#endif
/*
* We want to keep lock & list close together
@@ -69,10 +67,6 @@ static void dst_gc_task(struct work_struct *work)
unsigned long expires = ~0L;
struct dst_entry *dst, *next, head;
struct dst_entry *last = &head;
-#if RT_CACHE_DEBUG >= 2
- ktime_t time_start = ktime_get();
- struct timespec elapsed;
-#endif
mutex_lock(&dst_gc_mutex);
next = dst_busy_list;
@@ -146,15 +140,6 @@ loop:
spin_unlock_bh(&dst_garbage.lock);
mutex_unlock(&dst_gc_mutex);
-#if RT_CACHE_DEBUG >= 2
- elapsed = ktime_to_timespec(ktime_sub(ktime_get(), time_start));
- printk(KERN_DEBUG "dst_total: %d delayed: %d work_perf: %d"
- " expires: %lu elapsed: %lu us\n",
- atomic_read(&dst_total), delayed, work_performed,
- expires,
- elapsed.tv_sec * USEC_PER_SEC +
- elapsed.tv_nsec / NSEC_PER_USEC);
-#endif
}
int dst_discard(struct sk_buff *skb)
@@ -164,7 +149,10 @@ int dst_discard(struct sk_buff *skb)
}
EXPORT_SYMBOL(dst_discard);
-void *dst_alloc(struct dst_ops *ops)
+const u32 dst_default_metrics[RTAX_MAX];
+
+void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
+ int initial_ref, int initial_obsolete, int flags)
{
struct dst_entry *dst;
@@ -172,17 +160,36 @@ void *dst_alloc(struct dst_ops *ops)
if (ops->gc(ops))
return NULL;
}
- dst = kmem_cache_zalloc(ops->kmem_cachep, GFP_ATOMIC);
+ dst = kmem_cache_alloc(ops->kmem_cachep, GFP_ATOMIC);
if (!dst)
return NULL;
- atomic_set(&dst->__refcnt, 0);
+ dst->child = NULL;
+ dst->dev = dev;
+ if (dev)
+ dev_hold(dev);
dst->ops = ops;
- dst->lastuse = jiffies;
+ dst_init_metrics(dst, dst_default_metrics, true);
+ dst->expires = 0UL;
dst->path = dst;
- dst->input = dst->output = dst_discard;
-#if RT_CACHE_DEBUG >= 2
- atomic_inc(&dst_total);
+ dst->neighbour = NULL;
+ dst->hh = NULL;
+#ifdef CONFIG_XFRM
+ dst->xfrm = NULL;
#endif
+ dst->input = dst_discard;
+ dst->output = dst_discard;
+ dst->error = 0;
+ dst->obsolete = initial_obsolete;
+ dst->header_len = 0;
+ dst->trailer_len = 0;
+#ifdef CONFIG_IP_ROUTE_CLASSID
+ dst->tclassid = 0;
+#endif
+ atomic_set(&dst->__refcnt, initial_ref);
+ dst->__use = 0;
+ dst->lastuse = jiffies;
+ dst->flags = flags;
+ dst->next = NULL;
dst_entries_add(ops, 1);
return dst;
}
@@ -242,9 +249,6 @@ again:
dst->ops->destroy(dst);
if (dst->dev)
dev_put(dst->dev);
-#if RT_CACHE_DEBUG >= 2
- atomic_dec(&dst_total);
-#endif
kmem_cache_free(dst->ops->kmem_cachep, dst);
dst = child;
@@ -282,6 +286,42 @@ void dst_release(struct dst_entry *dst)
}
EXPORT_SYMBOL(dst_release);
+u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old)
+{
+ u32 *p = kmalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC);
+
+ if (p) {
+ u32 *old_p = __DST_METRICS_PTR(old);
+ unsigned long prev, new;
+
+ memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
+
+ new = (unsigned long) p;
+ prev = cmpxchg(&dst->_metrics, old, new);
+
+ if (prev != old) {
+ kfree(p);
+ p = __DST_METRICS_PTR(prev);
+ if (prev & DST_METRICS_READ_ONLY)
+ p = NULL;
+ }
+ }
+ return p;
+}
+EXPORT_SYMBOL(dst_cow_metrics_generic);
+
+/* Caller asserts that dst_metrics_read_only(dst) is false. */
+void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old)
+{
+ unsigned long prev, new;
+
+ new = (unsigned long) dst_default_metrics;
+ prev = cmpxchg(&dst->_metrics, old, new);
+ if (prev == old)
+ kfree(__DST_METRICS_PTR(old));
+}
+EXPORT_SYMBOL(__dst_destroy_metrics_generic);
+
/**
* skb_dst_set_noref - sets skb dst, without a reference
* @skb: buffer
@@ -370,6 +410,7 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event,
static struct notifier_block dst_dev_notifier = {
.notifier_call = dst_dev_event,
+ .priority = -10, /* must be called after other network notifiers */
};
void __init dst_init(void)
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 956a9f4971cb..84e7304532e6 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -21,6 +21,8 @@
#include <linux/uaccess.h>
#include <linux/vmalloc.h>
#include <linux/slab.h>
+#include <linux/rtnetlink.h>
+#include <linux/sched.h>
/*
* Some useful ethtool_ops methods that're device independent.
@@ -34,12 +36,6 @@ u32 ethtool_op_get_link(struct net_device *dev)
}
EXPORT_SYMBOL(ethtool_op_get_link);
-u32 ethtool_op_get_rx_csum(struct net_device *dev)
-{
- return (dev->features & NETIF_F_ALL_CSUM) != 0;
-}
-EXPORT_SYMBOL(ethtool_op_get_rx_csum);
-
u32 ethtool_op_get_tx_csum(struct net_device *dev)
{
return (dev->features & NETIF_F_ALL_CSUM) != 0;
@@ -55,6 +51,7 @@ int ethtool_op_set_tx_csum(struct net_device *dev, u32 data)
return 0;
}
+EXPORT_SYMBOL(ethtool_op_set_tx_csum);
int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data)
{
@@ -146,9 +143,24 @@ u32 ethtool_op_get_flags(struct net_device *dev)
}
EXPORT_SYMBOL(ethtool_op_get_flags);
+/* Check if device can enable (or disable) particular feature coded in "data"
+ * argument. Flags "supported" describe features that can be toggled by device.
+ * If feature can not be toggled, it state (enabled or disabled) must match
+ * hardcoded device features state, otherwise flags are marked as invalid.
+ */
+bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported)
+{
+ u32 features = dev->features & flags_dup_features;
+ /* "data" can contain only flags_dup_features bits,
+ * see __ethtool_set_flags */
+
+ return (features & ~supported) != (data & ~supported);
+}
+EXPORT_SYMBOL(ethtool_invalid_flags);
+
int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported)
{
- if (data & ~supported)
+ if (ethtool_invalid_flags(dev, data, supported))
return -EINVAL;
dev->features = ((dev->features & ~flags_dup_features) |
@@ -171,6 +183,381 @@ EXPORT_SYMBOL(ethtool_ntuple_flush);
/* Handlers for each ethtool command */
+#define ETHTOOL_DEV_FEATURE_WORDS 1
+
+static void ethtool_get_features_compat(struct net_device *dev,
+ struct ethtool_get_features_block *features)
+{
+ if (!dev->ethtool_ops)
+ return;
+
+ /* getting RX checksum */
+ if (dev->ethtool_ops->get_rx_csum)
+ if (dev->ethtool_ops->get_rx_csum(dev))
+ features[0].active |= NETIF_F_RXCSUM;
+
+ /* mark legacy-changeable features */
+ if (dev->ethtool_ops->set_sg)
+ features[0].available |= NETIF_F_SG;
+ if (dev->ethtool_ops->set_tx_csum)
+ features[0].available |= NETIF_F_ALL_CSUM;
+ if (dev->ethtool_ops->set_tso)
+ features[0].available |= NETIF_F_ALL_TSO;
+ if (dev->ethtool_ops->set_rx_csum)
+ features[0].available |= NETIF_F_RXCSUM;
+ if (dev->ethtool_ops->set_flags)
+ features[0].available |= flags_dup_features;
+}
+
+static int ethtool_set_feature_compat(struct net_device *dev,
+ int (*legacy_set)(struct net_device *, u32),
+ struct ethtool_set_features_block *features, u32 mask)
+{
+ u32 do_set;
+
+ if (!legacy_set)
+ return 0;
+
+ if (!(features[0].valid & mask))
+ return 0;
+
+ features[0].valid &= ~mask;
+
+ do_set = !!(features[0].requested & mask);
+
+ if (legacy_set(dev, do_set) < 0)
+ netdev_info(dev,
+ "Legacy feature change (%s) failed for 0x%08x\n",
+ do_set ? "set" : "clear", mask);
+
+ return 1;
+}
+
+static int ethtool_set_features_compat(struct net_device *dev,
+ struct ethtool_set_features_block *features)
+{
+ int compat;
+
+ if (!dev->ethtool_ops)
+ return 0;
+
+ compat = ethtool_set_feature_compat(dev, dev->ethtool_ops->set_sg,
+ features, NETIF_F_SG);
+ compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_tx_csum,
+ features, NETIF_F_ALL_CSUM);
+ compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_tso,
+ features, NETIF_F_ALL_TSO);
+ compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_rx_csum,
+ features, NETIF_F_RXCSUM);
+ compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_flags,
+ features, flags_dup_features);
+
+ return compat;
+}
+
+static int ethtool_get_features(struct net_device *dev, void __user *useraddr)
+{
+ struct ethtool_gfeatures cmd = {
+ .cmd = ETHTOOL_GFEATURES,
+ .size = ETHTOOL_DEV_FEATURE_WORDS,
+ };
+ struct ethtool_get_features_block features[ETHTOOL_DEV_FEATURE_WORDS] = {
+ {
+ .available = dev->hw_features,
+ .requested = dev->wanted_features,
+ .active = dev->features,
+ .never_changed = NETIF_F_NEVER_CHANGE,
+ },
+ };
+ u32 __user *sizeaddr;
+ u32 copy_size;
+
+ ethtool_get_features_compat(dev, features);
+
+ sizeaddr = useraddr + offsetof(struct ethtool_gfeatures, size);
+ if (get_user(copy_size, sizeaddr))
+ return -EFAULT;
+
+ if (copy_size > ETHTOOL_DEV_FEATURE_WORDS)
+ copy_size = ETHTOOL_DEV_FEATURE_WORDS;
+
+ if (copy_to_user(useraddr, &cmd, sizeof(cmd)))
+ return -EFAULT;
+ useraddr += sizeof(cmd);
+ if (copy_to_user(useraddr, features, copy_size * sizeof(*features)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int ethtool_set_features(struct net_device *dev, void __user *useraddr)
+{
+ struct ethtool_sfeatures cmd;
+ struct ethtool_set_features_block features[ETHTOOL_DEV_FEATURE_WORDS];
+ int ret = 0;
+
+ if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
+ return -EFAULT;
+ useraddr += sizeof(cmd);
+
+ if (cmd.size != ETHTOOL_DEV_FEATURE_WORDS)
+ return -EINVAL;
+
+ if (copy_from_user(features, useraddr, sizeof(features)))
+ return -EFAULT;
+
+ if (features[0].valid & ~NETIF_F_ETHTOOL_BITS)
+ return -EINVAL;
+
+ if (ethtool_set_features_compat(dev, features))
+ ret |= ETHTOOL_F_COMPAT;
+
+ if (features[0].valid & ~dev->hw_features) {
+ features[0].valid &= dev->hw_features;
+ ret |= ETHTOOL_F_UNSUPPORTED;
+ }
+
+ dev->wanted_features &= ~features[0].valid;
+ dev->wanted_features |= features[0].valid & features[0].requested;
+ __netdev_update_features(dev);
+
+ if ((dev->wanted_features ^ dev->features) & features[0].valid)
+ ret |= ETHTOOL_F_WISH;
+
+ return ret;
+}
+
+static const char netdev_features_strings[ETHTOOL_DEV_FEATURE_WORDS * 32][ETH_GSTRING_LEN] = {
+ /* NETIF_F_SG */ "tx-scatter-gather",
+ /* NETIF_F_IP_CSUM */ "tx-checksum-ipv4",
+ /* NETIF_F_NO_CSUM */ "tx-checksum-unneeded",
+ /* NETIF_F_HW_CSUM */ "tx-checksum-ip-generic",
+ /* NETIF_F_IPV6_CSUM */ "tx-checksum-ipv6",
+ /* NETIF_F_HIGHDMA */ "highdma",
+ /* NETIF_F_FRAGLIST */ "tx-scatter-gather-fraglist",
+ /* NETIF_F_HW_VLAN_TX */ "tx-vlan-hw-insert",
+
+ /* NETIF_F_HW_VLAN_RX */ "rx-vlan-hw-parse",
+ /* NETIF_F_HW_VLAN_FILTER */ "rx-vlan-filter",
+ /* NETIF_F_VLAN_CHALLENGED */ "vlan-challenged",
+ /* NETIF_F_GSO */ "tx-generic-segmentation",
+ /* NETIF_F_LLTX */ "tx-lockless",
+ /* NETIF_F_NETNS_LOCAL */ "netns-local",
+ /* NETIF_F_GRO */ "rx-gro",
+ /* NETIF_F_LRO */ "rx-lro",
+
+ /* NETIF_F_TSO */ "tx-tcp-segmentation",
+ /* NETIF_F_UFO */ "tx-udp-fragmentation",
+ /* NETIF_F_GSO_ROBUST */ "tx-gso-robust",
+ /* NETIF_F_TSO_ECN */ "tx-tcp-ecn-segmentation",
+ /* NETIF_F_TSO6 */ "tx-tcp6-segmentation",
+ /* NETIF_F_FSO */ "tx-fcoe-segmentation",
+ "",
+ "",
+
+ /* NETIF_F_FCOE_CRC */ "tx-checksum-fcoe-crc",
+ /* NETIF_F_SCTP_CSUM */ "tx-checksum-sctp",
+ /* NETIF_F_FCOE_MTU */ "fcoe-mtu",
+ /* NETIF_F_NTUPLE */ "rx-ntuple-filter",
+ /* NETIF_F_RXHASH */ "rx-hashing",
+ /* NETIF_F_RXCSUM */ "rx-checksum",
+ /* NETIF_F_NOCACHE_COPY */ "tx-nocache-copy",
+ /* NETIF_F_LOOPBACK */ "loopback",
+};
+
+static int __ethtool_get_sset_count(struct net_device *dev, int sset)
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+
+ if (sset == ETH_SS_FEATURES)
+ return ARRAY_SIZE(netdev_features_strings);
+
+ if (ops && ops->get_sset_count && ops->get_strings)
+ return ops->get_sset_count(dev, sset);
+ else
+ return -EOPNOTSUPP;
+}
+
+static void __ethtool_get_strings(struct net_device *dev,
+ u32 stringset, u8 *data)
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+
+ if (stringset == ETH_SS_FEATURES)
+ memcpy(data, netdev_features_strings,
+ sizeof(netdev_features_strings));
+ else
+ /* ops->get_strings is valid because checked earlier */
+ ops->get_strings(dev, stringset, data);
+}
+
+static u32 ethtool_get_feature_mask(u32 eth_cmd)
+{
+ /* feature masks of legacy discrete ethtool ops */
+
+ switch (eth_cmd) {
+ case ETHTOOL_GTXCSUM:
+ case ETHTOOL_STXCSUM:
+ return NETIF_F_ALL_CSUM | NETIF_F_SCTP_CSUM;
+ case ETHTOOL_GRXCSUM:
+ case ETHTOOL_SRXCSUM:
+ return NETIF_F_RXCSUM;
+ case ETHTOOL_GSG:
+ case ETHTOOL_SSG:
+ return NETIF_F_SG;
+ case ETHTOOL_GTSO:
+ case ETHTOOL_STSO:
+ return NETIF_F_ALL_TSO;
+ case ETHTOOL_GUFO:
+ case ETHTOOL_SUFO:
+ return NETIF_F_UFO;
+ case ETHTOOL_GGSO:
+ case ETHTOOL_SGSO:
+ return NETIF_F_GSO;
+ case ETHTOOL_GGRO:
+ case ETHTOOL_SGRO:
+ return NETIF_F_GRO;
+ default:
+ BUG();
+ }
+}
+
+static void *__ethtool_get_one_feature_actor(struct net_device *dev, u32 ethcmd)
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+
+ if (!ops)
+ return NULL;
+
+ switch (ethcmd) {
+ case ETHTOOL_GTXCSUM:
+ return ops->get_tx_csum;
+ case ETHTOOL_GRXCSUM:
+ return ops->get_rx_csum;
+ case ETHTOOL_SSG:
+ return ops->get_sg;
+ case ETHTOOL_STSO:
+ return ops->get_tso;
+ case ETHTOOL_SUFO:
+ return ops->get_ufo;
+ default:
+ return NULL;
+ }
+}
+
+static u32 __ethtool_get_rx_csum_oldbug(struct net_device *dev)
+{
+ return !!(dev->features & NETIF_F_ALL_CSUM);
+}
+
+static int ethtool_get_one_feature(struct net_device *dev,
+ char __user *useraddr, u32 ethcmd)
+{
+ u32 mask = ethtool_get_feature_mask(ethcmd);
+ struct ethtool_value edata = {
+ .cmd = ethcmd,
+ .data = !!(dev->features & mask),
+ };
+
+ /* compatibility with discrete get_ ops */
+ if (!(dev->hw_features & mask)) {
+ u32 (*actor)(struct net_device *);
+
+ actor = __ethtool_get_one_feature_actor(dev, ethcmd);
+
+ /* bug compatibility with old get_rx_csum */
+ if (ethcmd == ETHTOOL_GRXCSUM && !actor)
+ actor = __ethtool_get_rx_csum_oldbug;
+
+ if (actor)
+ edata.data = actor(dev);
+ }
+
+ if (copy_to_user(useraddr, &edata, sizeof(edata)))
+ return -EFAULT;
+ return 0;
+}
+
+static int __ethtool_set_tx_csum(struct net_device *dev, u32 data);
+static int __ethtool_set_rx_csum(struct net_device *dev, u32 data);
+static int __ethtool_set_sg(struct net_device *dev, u32 data);
+static int __ethtool_set_tso(struct net_device *dev, u32 data);
+static int __ethtool_set_ufo(struct net_device *dev, u32 data);
+
+static int ethtool_set_one_feature(struct net_device *dev,
+ void __user *useraddr, u32 ethcmd)
+{
+ struct ethtool_value edata;
+ u32 mask;
+
+ if (copy_from_user(&edata, useraddr, sizeof(edata)))
+ return -EFAULT;
+
+ mask = ethtool_get_feature_mask(ethcmd);
+ mask &= dev->hw_features;
+ if (mask) {
+ if (edata.data)
+ dev->wanted_features |= mask;
+ else
+ dev->wanted_features &= ~mask;
+
+ __netdev_update_features(dev);
+ return 0;
+ }
+
+ /* Driver is not converted to ndo_fix_features or does not
+ * support changing this offload. In the latter case it won't
+ * have corresponding ethtool_ops field set.
+ *
+ * Following part is to be removed after all drivers advertise
+ * their changeable features in netdev->hw_features and stop
+ * using discrete offload setting ops.
+ */
+
+ switch (ethcmd) {
+ case ETHTOOL_STXCSUM:
+ return __ethtool_set_tx_csum(dev, edata.data);
+ case ETHTOOL_SRXCSUM:
+ return __ethtool_set_rx_csum(dev, edata.data);
+ case ETHTOOL_SSG:
+ return __ethtool_set_sg(dev, edata.data);
+ case ETHTOOL_STSO:
+ return __ethtool_set_tso(dev, edata.data);
+ case ETHTOOL_SUFO:
+ return __ethtool_set_ufo(dev, edata.data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+int __ethtool_set_flags(struct net_device *dev, u32 data)
+{
+ u32 changed;
+
+ if (data & ~flags_dup_features)
+ return -EINVAL;
+
+ /* legacy set_flags() op */
+ if (dev->ethtool_ops->set_flags) {
+ if (unlikely(dev->hw_features & flags_dup_features))
+ netdev_warn(dev,
+ "driver BUG: mixed hw_features and set_flags()\n");
+ return dev->ethtool_ops->set_flags(dev, data);
+ }
+
+ /* allow changing only bits set in hw_features */
+ changed = (data ^ dev->features) & flags_dup_features;
+ if (changed & ~dev->hw_features)
+ return (changed & dev->hw_features) ? -EINVAL : -EOPNOTSUPP;
+
+ dev->wanted_features =
+ (dev->wanted_features & ~changed) | (data & dev->hw_features);
+
+ __netdev_update_features(dev);
+
+ return 0;
+}
+
static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
{
struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET };
@@ -251,14 +638,10 @@ static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev,
void __user *useraddr)
{
struct ethtool_sset_info info;
- const struct ethtool_ops *ops = dev->ethtool_ops;
u64 sset_mask;
int i, idx = 0, n_bits = 0, ret, rc;
u32 *info_buf = NULL;
- if (!ops->get_sset_count)
- return -EOPNOTSUPP;
-
if (copy_from_user(&info, useraddr, sizeof(info)))
return -EFAULT;
@@ -285,7 +668,7 @@ static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev,
if (!(sset_mask & (1ULL << i)))
continue;
- rc = ops->get_sset_count(dev, i);
+ rc = __ethtool_get_sset_count(dev, i);
if (rc >= 0) {
info.sset_mask |= (1ULL << i);
info_buf[idx++] = rc;
@@ -527,6 +910,9 @@ static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev,
struct ethtool_rx_ntuple_flow_spec_container *fsc = NULL;
int ret;
+ if (!ops->set_rx_ntuple)
+ return -EOPNOTSUPP;
+
if (!(dev->features & NETIF_F_NTUPLE))
return -EINVAL;
@@ -817,7 +1203,7 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
if (regs.len > reglen)
regs.len = reglen;
- regbuf = vmalloc(reglen);
+ regbuf = vzalloc(reglen);
if (!regbuf)
return -ENOMEM;
@@ -891,6 +1277,20 @@ static int ethtool_nway_reset(struct net_device *dev)
return dev->ethtool_ops->nway_reset(dev);
}
+static int ethtool_get_link(struct net_device *dev, char __user *useraddr)
+{
+ struct ethtool_value edata = { .cmd = ETHTOOL_GLINK };
+
+ if (!dev->ethtool_ops->get_link)
+ return -EOPNOTSUPP;
+
+ edata.data = netif_running(dev) && dev->ethtool_ops->get_link(dev);
+
+ if (copy_to_user(useraddr, &edata, sizeof(edata)))
+ return -EFAULT;
+ return 0;
+}
+
static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr)
{
struct ethtool_eeprom eeprom;
@@ -1046,6 +1446,35 @@ static int ethtool_set_ringparam(struct net_device *dev, void __user *useraddr)
return dev->ethtool_ops->set_ringparam(dev, &ringparam);
}
+static noinline_for_stack int ethtool_get_channels(struct net_device *dev,
+ void __user *useraddr)
+{
+ struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS };
+
+ if (!dev->ethtool_ops->get_channels)
+ return -EOPNOTSUPP;
+
+ dev->ethtool_ops->get_channels(dev, &channels);
+
+ if (copy_to_user(useraddr, &channels, sizeof(channels)))
+ return -EFAULT;
+ return 0;
+}
+
+static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
+ void __user *useraddr)
+{
+ struct ethtool_channels channels;
+
+ if (!dev->ethtool_ops->set_channels)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&channels, useraddr, sizeof(channels)))
+ return -EFAULT;
+
+ return dev->ethtool_ops->set_channels(dev, &channels);
+}
+
static int ethtool_get_pauseparam(struct net_device *dev, void __user *useraddr)
{
struct ethtool_pauseparam pauseparam = { ETHTOOL_GPAUSEPARAM };
@@ -1077,6 +1506,12 @@ static int __ethtool_set_sg(struct net_device *dev, u32 data)
{
int err;
+ if (!dev->ethtool_ops->set_sg)
+ return -EOPNOTSUPP;
+
+ if (data && !(dev->features & NETIF_F_ALL_CSUM))
+ return -EINVAL;
+
if (!data && dev->ethtool_ops->set_tso) {
err = dev->ethtool_ops->set_tso(dev, 0);
if (err)
@@ -1091,143 +1526,55 @@ static int __ethtool_set_sg(struct net_device *dev, u32 data)
return dev->ethtool_ops->set_sg(dev, data);
}
-static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr)
+static int __ethtool_set_tx_csum(struct net_device *dev, u32 data)
{
- struct ethtool_value edata;
int err;
if (!dev->ethtool_ops->set_tx_csum)
return -EOPNOTSUPP;
- if (copy_from_user(&edata, useraddr, sizeof(edata)))
- return -EFAULT;
-
- if (!edata.data && dev->ethtool_ops->set_sg) {
+ if (!data && dev->ethtool_ops->set_sg) {
err = __ethtool_set_sg(dev, 0);
if (err)
return err;
}
- return dev->ethtool_ops->set_tx_csum(dev, edata.data);
+ return dev->ethtool_ops->set_tx_csum(dev, data);
}
-EXPORT_SYMBOL(ethtool_op_set_tx_csum);
-static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr)
+static int __ethtool_set_rx_csum(struct net_device *dev, u32 data)
{
- struct ethtool_value edata;
-
if (!dev->ethtool_ops->set_rx_csum)
return -EOPNOTSUPP;
- if (copy_from_user(&edata, useraddr, sizeof(edata)))
- return -EFAULT;
-
- if (!edata.data && dev->ethtool_ops->set_sg)
+ if (!data)
dev->features &= ~NETIF_F_GRO;
- return dev->ethtool_ops->set_rx_csum(dev, edata.data);
-}
-
-static int ethtool_set_sg(struct net_device *dev, char __user *useraddr)
-{
- struct ethtool_value edata;
-
- if (!dev->ethtool_ops->set_sg)
- return -EOPNOTSUPP;
-
- if (copy_from_user(&edata, useraddr, sizeof(edata)))
- return -EFAULT;
-
- if (edata.data &&
- !(dev->features & NETIF_F_ALL_CSUM))
- return -EINVAL;
-
- return __ethtool_set_sg(dev, edata.data);
+ return dev->ethtool_ops->set_rx_csum(dev, data);
}
-static int ethtool_set_tso(struct net_device *dev, char __user *useraddr)
+static int __ethtool_set_tso(struct net_device *dev, u32 data)
{
- struct ethtool_value edata;
-
if (!dev->ethtool_ops->set_tso)
return -EOPNOTSUPP;
- if (copy_from_user(&edata, useraddr, sizeof(edata)))
- return -EFAULT;
-
- if (edata.data && !(dev->features & NETIF_F_SG))
+ if (data && !(dev->features & NETIF_F_SG))
return -EINVAL;
- return dev->ethtool_ops->set_tso(dev, edata.data);
+ return dev->ethtool_ops->set_tso(dev, data);
}
-static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr)
+static int __ethtool_set_ufo(struct net_device *dev, u32 data)
{
- struct ethtool_value edata;
-
if (!dev->ethtool_ops->set_ufo)
return -EOPNOTSUPP;
- if (copy_from_user(&edata, useraddr, sizeof(edata)))
- return -EFAULT;
- if (edata.data && !(dev->features & NETIF_F_SG))
+ if (data && !(dev->features & NETIF_F_SG))
return -EINVAL;
- if (edata.data && !(dev->features & NETIF_F_HW_CSUM))
+ if (data && !((dev->features & NETIF_F_GEN_CSUM) ||
+ (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
+ == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)))
return -EINVAL;
- return dev->ethtool_ops->set_ufo(dev, edata.data);
-}
-
-static int ethtool_get_gso(struct net_device *dev, char __user *useraddr)
-{
- struct ethtool_value edata = { ETHTOOL_GGSO };
-
- edata.data = dev->features & NETIF_F_GSO;
- if (copy_to_user(useraddr, &edata, sizeof(edata)))
- return -EFAULT;
- return 0;
-}
-
-static int ethtool_set_gso(struct net_device *dev, char __user *useraddr)
-{
- struct ethtool_value edata;
-
- if (copy_from_user(&edata, useraddr, sizeof(edata)))
- return -EFAULT;
- if (edata.data)
- dev->features |= NETIF_F_GSO;
- else
- dev->features &= ~NETIF_F_GSO;
- return 0;
-}
-
-static int ethtool_get_gro(struct net_device *dev, char __user *useraddr)
-{
- struct ethtool_value edata = { ETHTOOL_GGRO };
-
- edata.data = dev->features & NETIF_F_GRO;
- if (copy_to_user(useraddr, &edata, sizeof(edata)))
- return -EFAULT;
- return 0;
-}
-
-static int ethtool_set_gro(struct net_device *dev, char __user *useraddr)
-{
- struct ethtool_value edata;
-
- if (copy_from_user(&edata, useraddr, sizeof(edata)))
- return -EFAULT;
-
- if (edata.data) {
- u32 rxcsum = dev->ethtool_ops->get_rx_csum ?
- dev->ethtool_ops->get_rx_csum(dev) :
- ethtool_op_get_rx_csum(dev);
-
- if (!rxcsum)
- return -EINVAL;
- dev->features |= NETIF_F_GRO;
- } else
- dev->features &= ~NETIF_F_GRO;
-
- return 0;
+ return dev->ethtool_ops->set_ufo(dev, data);
}
static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
@@ -1271,17 +1618,13 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
{
struct ethtool_gstrings gstrings;
- const struct ethtool_ops *ops = dev->ethtool_ops;
u8 *data;
int ret;
- if (!ops->get_strings || !ops->get_sset_count)
- return -EOPNOTSUPP;
-
if (copy_from_user(&gstrings, useraddr, sizeof(gstrings)))
return -EFAULT;
- ret = ops->get_sset_count(dev, gstrings.string_set);
+ ret = __ethtool_get_sset_count(dev, gstrings.string_set);
if (ret < 0)
return ret;
@@ -1291,7 +1634,7 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
if (!data)
return -ENOMEM;
- ops->get_strings(dev, gstrings.string_set, data);
+ __ethtool_get_strings(dev, gstrings.string_set, data);
ret = -EFAULT;
if (copy_to_user(useraddr, &gstrings, sizeof(gstrings)))
@@ -1301,7 +1644,7 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
goto out;
ret = 0;
- out:
+out:
kfree(data);
return ret;
}
@@ -1309,14 +1652,60 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
{
struct ethtool_value id;
+ static bool busy;
+ int rc;
- if (!dev->ethtool_ops->phys_id)
+ if (!dev->ethtool_ops->set_phys_id)
return -EOPNOTSUPP;
+ if (busy)
+ return -EBUSY;
+
if (copy_from_user(&id, useraddr, sizeof(id)))
return -EFAULT;
- return dev->ethtool_ops->phys_id(dev, id.data);
+ rc = dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_ACTIVE);
+ if (rc < 0)
+ return rc;
+
+ /* Drop the RTNL lock while waiting, but prevent reentry or
+ * removal of the device.
+ */
+ busy = true;
+ dev_hold(dev);
+ rtnl_unlock();
+
+ if (rc == 0) {
+ /* Driver will handle this itself */
+ schedule_timeout_interruptible(
+ id.data ? (id.data * HZ) : MAX_SCHEDULE_TIMEOUT);
+ } else {
+ /* Driver expects to be called at twice the frequency in rc */
+ int n = rc * 2, i, interval = HZ / n;
+
+ /* Count down seconds */
+ do {
+ /* Count down iterations per second */
+ i = n;
+ do {
+ rtnl_lock();
+ rc = dev->ethtool_ops->set_phys_id(dev,
+ (i & 1) ? ETHTOOL_ID_OFF : ETHTOOL_ID_ON);
+ rtnl_unlock();
+ if (rc)
+ break;
+ schedule_timeout_interruptible(interval);
+ } while (!signal_pending(current) && --i != 0);
+ } while (!signal_pending(current) &&
+ (id.data == 0 || --id.data != 0));
+ }
+
+ rtnl_lock();
+ dev_put(dev);
+ busy = false;
+
+ (void)dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_INACTIVE);
+ return rc;
}
static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
@@ -1434,6 +1823,87 @@ static noinline_for_stack int ethtool_flash_device(struct net_device *dev,
return dev->ethtool_ops->flash_device(dev, &efl);
}
+static int ethtool_set_dump(struct net_device *dev,
+ void __user *useraddr)
+{
+ struct ethtool_dump dump;
+
+ if (!dev->ethtool_ops->set_dump)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&dump, useraddr, sizeof(dump)))
+ return -EFAULT;
+
+ return dev->ethtool_ops->set_dump(dev, &dump);
+}
+
+static int ethtool_get_dump_flag(struct net_device *dev,
+ void __user *useraddr)
+{
+ int ret;
+ struct ethtool_dump dump;
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+
+ if (!dev->ethtool_ops->get_dump_flag)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&dump, useraddr, sizeof(dump)))
+ return -EFAULT;
+
+ ret = ops->get_dump_flag(dev, &dump);
+ if (ret)
+ return ret;
+
+ if (copy_to_user(useraddr, &dump, sizeof(dump)))
+ return -EFAULT;
+ return 0;
+}
+
+static int ethtool_get_dump_data(struct net_device *dev,
+ void __user *useraddr)
+{
+ int ret;
+ __u32 len;
+ struct ethtool_dump dump, tmp;
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ void *data = NULL;
+
+ if (!dev->ethtool_ops->get_dump_data ||
+ !dev->ethtool_ops->get_dump_flag)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&dump, useraddr, sizeof(dump)))
+ return -EFAULT;
+
+ memset(&tmp, 0, sizeof(tmp));
+ tmp.cmd = ETHTOOL_GET_DUMP_FLAG;
+ ret = ops->get_dump_flag(dev, &tmp);
+ if (ret)
+ return ret;
+
+ len = (tmp.len > dump.len) ? dump.len : tmp.len;
+ if (!len)
+ return -EFAULT;
+
+ data = vzalloc(tmp.len);
+ if (!data)
+ return -ENOMEM;
+ ret = ops->get_dump_data(dev, &dump, data);
+ if (ret)
+ goto out;
+
+ if (copy_to_user(useraddr, &dump, sizeof(dump))) {
+ ret = -EFAULT;
+ goto out;
+ }
+ useraddr += offsetof(struct ethtool_dump, data);
+ if (copy_to_user(useraddr, data, len))
+ ret = -EFAULT;
+out:
+ vfree(data);
+ return ret;
+}
+
/* The main entry point in this file. Called from net/core/dev.c */
int dev_ethtool(struct net *net, struct ifreq *ifr)
@@ -1442,7 +1912,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
void __user *useraddr = ifr->ifr_data;
u32 ethcmd;
int rc;
- unsigned long old_features;
+ u32 old_features;
if (!dev || !netif_device_present(dev))
return -ENODEV;
@@ -1484,6 +1954,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_GRXCLSRLCNT:
case ETHTOOL_GRXCLSRULE:
case ETHTOOL_GRXCLSRLALL:
+ case ETHTOOL_GFEATURES:
break;
default:
if (!capable(CAP_NET_ADMIN))
@@ -1528,8 +1999,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
rc = ethtool_nway_reset(dev);
break;
case ETHTOOL_GLINK:
- rc = ethtool_get_value(dev, useraddr, ethcmd,
- dev->ethtool_ops->get_link);
+ rc = ethtool_get_link(dev, useraddr);
break;
case ETHTOOL_GEEPROM:
rc = ethtool_get_eeprom(dev, useraddr);
@@ -1555,42 +2025,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_SPAUSEPARAM:
rc = ethtool_set_pauseparam(dev, useraddr);
break;
- case ETHTOOL_GRXCSUM:
- rc = ethtool_get_value(dev, useraddr, ethcmd,
- (dev->ethtool_ops->get_rx_csum ?
- dev->ethtool_ops->get_rx_csum :
- ethtool_op_get_rx_csum));
- break;
- case ETHTOOL_SRXCSUM:
- rc = ethtool_set_rx_csum(dev, useraddr);
- break;
- case ETHTOOL_GTXCSUM:
- rc = ethtool_get_value(dev, useraddr, ethcmd,
- (dev->ethtool_ops->get_tx_csum ?
- dev->ethtool_ops->get_tx_csum :
- ethtool_op_get_tx_csum));
- break;
- case ETHTOOL_STXCSUM:
- rc = ethtool_set_tx_csum(dev, useraddr);
- break;
- case ETHTOOL_GSG:
- rc = ethtool_get_value(dev, useraddr, ethcmd,
- (dev->ethtool_ops->get_sg ?
- dev->ethtool_ops->get_sg :
- ethtool_op_get_sg));
- break;
- case ETHTOOL_SSG:
- rc = ethtool_set_sg(dev, useraddr);
- break;
- case ETHTOOL_GTSO:
- rc = ethtool_get_value(dev, useraddr, ethcmd,
- (dev->ethtool_ops->get_tso ?
- dev->ethtool_ops->get_tso :
- ethtool_op_get_tso));
- break;
- case ETHTOOL_STSO:
- rc = ethtool_set_tso(dev, useraddr);
- break;
case ETHTOOL_TEST:
rc = ethtool_self_test(dev, useraddr);
break;
@@ -1606,21 +2040,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_GPERMADDR:
rc = ethtool_get_perm_addr(dev, useraddr);
break;
- case ETHTOOL_GUFO:
- rc = ethtool_get_value(dev, useraddr, ethcmd,
- (dev->ethtool_ops->get_ufo ?
- dev->ethtool_ops->get_ufo :
- ethtool_op_get_ufo));
- break;
- case ETHTOOL_SUFO:
- rc = ethtool_set_ufo(dev, useraddr);
- break;
- case ETHTOOL_GGSO:
- rc = ethtool_get_gso(dev, useraddr);
- break;
- case ETHTOOL_SGSO:
- rc = ethtool_set_gso(dev, useraddr);
- break;
case ETHTOOL_GFLAGS:
rc = ethtool_get_value(dev, useraddr, ethcmd,
(dev->ethtool_ops->get_flags ?
@@ -1628,8 +2047,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
ethtool_op_get_flags));
break;
case ETHTOOL_SFLAGS:
- rc = ethtool_set_value(dev, useraddr,
- dev->ethtool_ops->set_flags);
+ rc = ethtool_set_value(dev, useraddr, __ethtool_set_flags);
break;
case ETHTOOL_GPFLAGS:
rc = ethtool_get_value(dev, useraddr, ethcmd,
@@ -1651,12 +2069,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_SRXCLSRLINS:
rc = ethtool_set_rxnfc(dev, ethcmd, useraddr);
break;
- case ETHTOOL_GGRO:
- rc = ethtool_get_gro(dev, useraddr);
- break;
- case ETHTOOL_SGRO:
- rc = ethtool_set_gro(dev, useraddr);
- break;
case ETHTOOL_FLASHDEV:
rc = ethtool_flash_device(dev, useraddr);
break;
@@ -1678,6 +2090,45 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_SRXFHINDIR:
rc = ethtool_set_rxfh_indir(dev, useraddr);
break;
+ case ETHTOOL_GFEATURES:
+ rc = ethtool_get_features(dev, useraddr);
+ break;
+ case ETHTOOL_SFEATURES:
+ rc = ethtool_set_features(dev, useraddr);
+ break;
+ case ETHTOOL_GTXCSUM:
+ case ETHTOOL_GRXCSUM:
+ case ETHTOOL_GSG:
+ case ETHTOOL_GTSO:
+ case ETHTOOL_GUFO:
+ case ETHTOOL_GGSO:
+ case ETHTOOL_GGRO:
+ rc = ethtool_get_one_feature(dev, useraddr, ethcmd);
+ break;
+ case ETHTOOL_STXCSUM:
+ case ETHTOOL_SRXCSUM:
+ case ETHTOOL_SSG:
+ case ETHTOOL_STSO:
+ case ETHTOOL_SUFO:
+ case ETHTOOL_SGSO:
+ case ETHTOOL_SGRO:
+ rc = ethtool_set_one_feature(dev, useraddr, ethcmd);
+ break;
+ case ETHTOOL_GCHANNELS:
+ rc = ethtool_get_channels(dev, useraddr);
+ break;
+ case ETHTOOL_SCHANNELS:
+ rc = ethtool_set_channels(dev, useraddr);
+ break;
+ case ETHTOOL_SET_DUMP:
+ rc = ethtool_set_dump(dev, useraddr);
+ break;
+ case ETHTOOL_GET_DUMP_FLAG:
+ rc = ethtool_get_dump_flag(dev, useraddr);
+ break;
+ case ETHTOOL_GET_DUMP_DATA:
+ rc = ethtool_get_dump_data(dev, useraddr);
+ break;
default:
rc = -EOPNOTSUPP;
}
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 1bc3f253ba6c..3911586e12e4 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -181,14 +181,13 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
{
int ret = 0;
- if (rule->iifindex && (rule->iifindex != fl->iif) &&
- !(fl->flags & FLOWI_FLAG_MATCH_ANY_IIF))
+ if (rule->iifindex && (rule->iifindex != fl->flowi_iif))
goto out;
- if (rule->oifindex && (rule->oifindex != fl->oif))
+ if (rule->oifindex && (rule->oifindex != fl->flowi_oif))
goto out;
- if ((rule->mark ^ fl->mark) & rule->mark_mask)
+ if ((rule->mark ^ fl->flowi_mark) & rule->mark_mask)
goto out;
ret = ops->match(rule, fl, flags);
@@ -351,12 +350,12 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
list_for_each_entry(r, &ops->rules_list, list) {
if (r->pref == rule->target) {
- rule->ctarget = r;
+ RCU_INIT_POINTER(rule->ctarget, r);
break;
}
}
- if (rule->ctarget == NULL)
+ if (rcu_dereference_protected(rule->ctarget, 1) == NULL)
unresolved = 1;
} else if (rule->action == FR_ACT_GOTO)
goto errout_free;
@@ -373,6 +372,11 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
fib_rule_get(rule);
+ if (last)
+ list_add_rcu(&rule->list, &last->list);
+ else
+ list_add_rcu(&rule->list, &ops->rules_list);
+
if (ops->unresolved_rules) {
/*
* There are unresolved goto rules in the list, check if
@@ -381,7 +385,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
list_for_each_entry(r, &ops->rules_list, list) {
if (r->action == FR_ACT_GOTO &&
r->target == rule->pref) {
- BUG_ON(r->ctarget != NULL);
+ BUG_ON(rtnl_dereference(r->ctarget) != NULL);
rcu_assign_pointer(r->ctarget, rule);
if (--ops->unresolved_rules == 0)
break;
@@ -395,11 +399,6 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
if (unresolved)
ops->unresolved_rules++;
- if (last)
- list_add_rcu(&rule->list, &last->list);
- else
- list_add_rcu(&rule->list, &ops->rules_list);
-
notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid);
flush_route_cache(ops);
rules_ops_put(ops);
@@ -487,7 +486,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
*/
if (ops->nr_goto_rules > 0) {
list_for_each_entry(tmp, &ops->rules_list, list) {
- if (tmp->ctarget == rule) {
+ if (rtnl_dereference(tmp->ctarget) == rule) {
rcu_assign_pointer(tmp->ctarget, NULL);
ops->unresolved_rules++;
}
@@ -545,7 +544,8 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
frh->action = rule->action;
frh->flags = rule->flags;
- if (rule->action == FR_ACT_GOTO && rule->ctarget == NULL)
+ if (rule->action == FR_ACT_GOTO &&
+ rcu_dereference_raw(rule->ctarget) == NULL)
frh->flags |= FIB_RULE_UNRESOLVED;
if (rule->iifname[0]) {
@@ -590,7 +590,8 @@ static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb,
int idx = 0;
struct fib_rule *rule;
- list_for_each_entry(rule, &ops->rules_list, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(rule, &ops->rules_list, list) {
if (idx < cb->args[1])
goto skip;
diff --git a/net/core/filter.c b/net/core/filter.c
index 7adf50352918..0eb8c4466eaa 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -37,9 +37,10 @@
#include <asm/uaccess.h>
#include <asm/unaligned.h>
#include <linux/filter.h>
+#include <linux/reciprocal_div.h>
/* No hurry in this branch */
-static void *__load_pointer(struct sk_buff *skb, int k)
+static void *__load_pointer(const struct sk_buff *skb, int k, unsigned int size)
{
u8 *ptr = NULL;
@@ -48,21 +49,17 @@ static void *__load_pointer(struct sk_buff *skb, int k)
else if (k >= SKF_LL_OFF)
ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
- if (ptr >= skb->head && ptr < skb_tail_pointer(skb))
+ if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb))
return ptr;
return NULL;
}
-static inline void *load_pointer(struct sk_buff *skb, int k,
+static inline void *load_pointer(const struct sk_buff *skb, int k,
unsigned int size, void *buffer)
{
if (k >= 0)
return skb_header_pointer(skb, k, size, buffer);
- else {
- if (k >= SKF_AD_OFF)
- return NULL;
- return __load_pointer(skb, k);
- }
+ return __load_pointer(skb, k, size);
}
/**
@@ -86,14 +83,14 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
if (err)
return err;
- rcu_read_lock_bh();
- filter = rcu_dereference_bh(sk->sk_filter);
+ rcu_read_lock();
+ filter = rcu_dereference(sk->sk_filter);
if (filter) {
- unsigned int pkt_len = sk_run_filter(skb, filter->insns,
- filter->len);
+ unsigned int pkt_len = SK_RUN_FILTER(filter, skb);
+
err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
}
- rcu_read_unlock_bh();
+ rcu_read_unlock();
return err;
}
@@ -102,49 +99,53 @@ EXPORT_SYMBOL(sk_filter);
/**
* sk_run_filter - run a filter on a socket
* @skb: buffer to run the filter on
- * @filter: filter to apply
- * @flen: length of filter
+ * @fentry: filter to apply
*
* Decode and apply filter instructions to the skb->data.
- * Return length to keep, 0 for none. skb is the data we are
- * filtering, filter is the array of filter instructions, and
- * len is the number of filter blocks in the array.
+ * Return length to keep, 0 for none. @skb is the data we are
+ * filtering, @filter is the array of filter instructions.
+ * Because all jumps are guaranteed to be before last instruction,
+ * and last instruction guaranteed to be a RET, we dont need to check
+ * flen. (We used to pass to this function the length of filter)
*/
-unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
+unsigned int sk_run_filter(const struct sk_buff *skb,
+ const struct sock_filter *fentry)
{
- struct sock_filter *fentry; /* We walk down these */
void *ptr;
u32 A = 0; /* Accumulator */
u32 X = 0; /* Index Register */
u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */
u32 tmp;
int k;
- int pc;
/*
* Process array of filter instructions.
*/
- for (pc = 0; pc < flen; pc++) {
- fentry = &filter[pc];
+ for (;; fentry++) {
+#if defined(CONFIG_X86_32)
+#define K (fentry->k)
+#else
+ const u32 K = fentry->k;
+#endif
switch (fentry->code) {
case BPF_S_ALU_ADD_X:
A += X;
continue;
case BPF_S_ALU_ADD_K:
- A += fentry->k;
+ A += K;
continue;
case BPF_S_ALU_SUB_X:
A -= X;
continue;
case BPF_S_ALU_SUB_K:
- A -= fentry->k;
+ A -= K;
continue;
case BPF_S_ALU_MUL_X:
A *= X;
continue;
case BPF_S_ALU_MUL_K:
- A *= fentry->k;
+ A *= K;
continue;
case BPF_S_ALU_DIV_X:
if (X == 0)
@@ -152,89 +153,89 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
A /= X;
continue;
case BPF_S_ALU_DIV_K:
- A /= fentry->k;
+ A = reciprocal_divide(A, K);
continue;
case BPF_S_ALU_AND_X:
A &= X;
continue;
case BPF_S_ALU_AND_K:
- A &= fentry->k;
+ A &= K;
continue;
case BPF_S_ALU_OR_X:
A |= X;
continue;
case BPF_S_ALU_OR_K:
- A |= fentry->k;
+ A |= K;
continue;
case BPF_S_ALU_LSH_X:
A <<= X;
continue;
case BPF_S_ALU_LSH_K:
- A <<= fentry->k;
+ A <<= K;
continue;
case BPF_S_ALU_RSH_X:
A >>= X;
continue;
case BPF_S_ALU_RSH_K:
- A >>= fentry->k;
+ A >>= K;
continue;
case BPF_S_ALU_NEG:
A = -A;
continue;
case BPF_S_JMP_JA:
- pc += fentry->k;
+ fentry += K;
continue;
case BPF_S_JMP_JGT_K:
- pc += (A > fentry->k) ? fentry->jt : fentry->jf;
+ fentry += (A > K) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JGE_K:
- pc += (A >= fentry->k) ? fentry->jt : fentry->jf;
+ fentry += (A >= K) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JEQ_K:
- pc += (A == fentry->k) ? fentry->jt : fentry->jf;
+ fentry += (A == K) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JSET_K:
- pc += (A & fentry->k) ? fentry->jt : fentry->jf;
+ fentry += (A & K) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JGT_X:
- pc += (A > X) ? fentry->jt : fentry->jf;
+ fentry += (A > X) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JGE_X:
- pc += (A >= X) ? fentry->jt : fentry->jf;
+ fentry += (A >= X) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JEQ_X:
- pc += (A == X) ? fentry->jt : fentry->jf;
+ fentry += (A == X) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JSET_X:
- pc += (A & X) ? fentry->jt : fentry->jf;
+ fentry += (A & X) ? fentry->jt : fentry->jf;
continue;
case BPF_S_LD_W_ABS:
- k = fentry->k;
+ k = K;
load_w:
ptr = load_pointer(skb, k, 4, &tmp);
if (ptr != NULL) {
A = get_unaligned_be32(ptr);
continue;
}
- break;
+ return 0;
case BPF_S_LD_H_ABS:
- k = fentry->k;
+ k = K;
load_h:
ptr = load_pointer(skb, k, 2, &tmp);
if (ptr != NULL) {
A = get_unaligned_be16(ptr);
continue;
}
- break;
+ return 0;
case BPF_S_LD_B_ABS:
- k = fentry->k;
+ k = K;
load_b:
ptr = load_pointer(skb, k, 1, &tmp);
if (ptr != NULL) {
A = *(u8 *)ptr;
continue;
}
- break;
+ return 0;
case BPF_S_LD_W_LEN:
A = skb->len;
continue;
@@ -242,32 +243,32 @@ load_b:
X = skb->len;
continue;
case BPF_S_LD_W_IND:
- k = X + fentry->k;
+ k = X + K;
goto load_w;
case BPF_S_LD_H_IND:
- k = X + fentry->k;
+ k = X + K;
goto load_h;
case BPF_S_LD_B_IND:
- k = X + fentry->k;
+ k = X + K;
goto load_b;
case BPF_S_LDX_B_MSH:
- ptr = load_pointer(skb, fentry->k, 1, &tmp);
+ ptr = load_pointer(skb, K, 1, &tmp);
if (ptr != NULL) {
X = (*(u8 *)ptr & 0xf) << 2;
continue;
}
return 0;
case BPF_S_LD_IMM:
- A = fentry->k;
+ A = K;
continue;
case BPF_S_LDX_IMM:
- X = fentry->k;
+ X = K;
continue;
case BPF_S_LD_MEM:
- A = mem[fentry->k];
+ A = mem[K];
continue;
case BPF_S_LDX_MEM:
- X = mem[fentry->k];
+ X = mem[K];
continue;
case BPF_S_MISC_TAX:
X = A;
@@ -276,48 +277,44 @@ load_b:
A = X;
continue;
case BPF_S_RET_K:
- return fentry->k;
+ return K;
case BPF_S_RET_A:
return A;
case BPF_S_ST:
- mem[fentry->k] = A;
+ mem[K] = A;
continue;
case BPF_S_STX:
- mem[fentry->k] = X;
+ mem[K] = X;
continue;
- default:
- WARN_ON(1);
- return 0;
- }
-
- /*
- * Handle ancillary data, which are impossible
- * (or very difficult) to get parsing packet contents.
- */
- switch (k-SKF_AD_OFF) {
- case SKF_AD_PROTOCOL:
+ case BPF_S_ANC_PROTOCOL:
A = ntohs(skb->protocol);
continue;
- case SKF_AD_PKTTYPE:
+ case BPF_S_ANC_PKTTYPE:
A = skb->pkt_type;
continue;
- case SKF_AD_IFINDEX:
+ case BPF_S_ANC_IFINDEX:
if (!skb->dev)
return 0;
A = skb->dev->ifindex;
continue;
- case SKF_AD_MARK:
+ case BPF_S_ANC_MARK:
A = skb->mark;
continue;
- case SKF_AD_QUEUE:
+ case BPF_S_ANC_QUEUE:
A = skb->queue_mapping;
continue;
- case SKF_AD_HATYPE:
+ case BPF_S_ANC_HATYPE:
if (!skb->dev)
return 0;
A = skb->dev->type;
continue;
- case SKF_AD_NLATTR: {
+ case BPF_S_ANC_RXHASH:
+ A = skb->rxhash;
+ continue;
+ case BPF_S_ANC_CPU:
+ A = raw_smp_processor_id();
+ continue;
+ case BPF_S_ANC_NLATTR: {
struct nlattr *nla;
if (skb_is_nonlinear(skb))
@@ -333,7 +330,7 @@ load_b:
A = 0;
continue;
}
- case SKF_AD_NLATTR_NEST: {
+ case BPF_S_ANC_NLATTR_NEST: {
struct nlattr *nla;
if (skb_is_nonlinear(skb))
@@ -353,6 +350,7 @@ load_b:
continue;
}
default:
+ WARN_ON(1);
return 0;
}
}
@@ -361,6 +359,66 @@ load_b:
}
EXPORT_SYMBOL(sk_run_filter);
+/*
+ * Security :
+ * A BPF program is able to use 16 cells of memory to store intermediate
+ * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter())
+ * As we dont want to clear mem[] array for each packet going through
+ * sk_run_filter(), we check that filter loaded by user never try to read
+ * a cell if not previously written, and we check all branches to be sure
+ * a malicious user doesn't try to abuse us.
+ */
+static int check_load_and_stores(struct sock_filter *filter, int flen)
+{
+ u16 *masks, memvalid = 0; /* one bit per cell, 16 cells */
+ int pc, ret = 0;
+
+ BUILD_BUG_ON(BPF_MEMWORDS > 16);
+ masks = kmalloc(flen * sizeof(*masks), GFP_KERNEL);
+ if (!masks)
+ return -ENOMEM;
+ memset(masks, 0xff, flen * sizeof(*masks));
+
+ for (pc = 0; pc < flen; pc++) {
+ memvalid &= masks[pc];
+
+ switch (filter[pc].code) {
+ case BPF_S_ST:
+ case BPF_S_STX:
+ memvalid |= (1 << filter[pc].k);
+ break;
+ case BPF_S_LD_MEM:
+ case BPF_S_LDX_MEM:
+ if (!(memvalid & (1 << filter[pc].k))) {
+ ret = -EINVAL;
+ goto error;
+ }
+ break;
+ case BPF_S_JMP_JA:
+ /* a jump must set masks on target */
+ masks[pc + 1 + filter[pc].k] &= memvalid;
+ memvalid = ~0;
+ break;
+ case BPF_S_JMP_JEQ_K:
+ case BPF_S_JMP_JEQ_X:
+ case BPF_S_JMP_JGE_K:
+ case BPF_S_JMP_JGE_X:
+ case BPF_S_JMP_JGT_K:
+ case BPF_S_JMP_JGT_X:
+ case BPF_S_JMP_JSET_X:
+ case BPF_S_JMP_JSET_K:
+ /* a jump must set masks on targets */
+ masks[pc + 1 + filter[pc].jt] &= memvalid;
+ masks[pc + 1 + filter[pc].jf] &= memvalid;
+ memvalid = ~0;
+ break;
+ }
+ }
+error:
+ kfree(masks);
+ return ret;
+}
+
/**
* sk_chk_filter - verify socket filter code
* @filter: filter to verify
@@ -377,7 +435,57 @@ EXPORT_SYMBOL(sk_run_filter);
*/
int sk_chk_filter(struct sock_filter *filter, int flen)
{
- struct sock_filter *ftest;
+ /*
+ * Valid instructions are initialized to non-0.
+ * Invalid instructions are initialized to 0.
+ */
+ static const u8 codes[] = {
+ [BPF_ALU|BPF_ADD|BPF_K] = BPF_S_ALU_ADD_K,
+ [BPF_ALU|BPF_ADD|BPF_X] = BPF_S_ALU_ADD_X,
+ [BPF_ALU|BPF_SUB|BPF_K] = BPF_S_ALU_SUB_K,
+ [BPF_ALU|BPF_SUB|BPF_X] = BPF_S_ALU_SUB_X,
+ [BPF_ALU|BPF_MUL|BPF_K] = BPF_S_ALU_MUL_K,
+ [BPF_ALU|BPF_MUL|BPF_X] = BPF_S_ALU_MUL_X,
+ [BPF_ALU|BPF_DIV|BPF_X] = BPF_S_ALU_DIV_X,
+ [BPF_ALU|BPF_AND|BPF_K] = BPF_S_ALU_AND_K,
+ [BPF_ALU|BPF_AND|BPF_X] = BPF_S_ALU_AND_X,
+ [BPF_ALU|BPF_OR|BPF_K] = BPF_S_ALU_OR_K,
+ [BPF_ALU|BPF_OR|BPF_X] = BPF_S_ALU_OR_X,
+ [BPF_ALU|BPF_LSH|BPF_K] = BPF_S_ALU_LSH_K,
+ [BPF_ALU|BPF_LSH|BPF_X] = BPF_S_ALU_LSH_X,
+ [BPF_ALU|BPF_RSH|BPF_K] = BPF_S_ALU_RSH_K,
+ [BPF_ALU|BPF_RSH|BPF_X] = BPF_S_ALU_RSH_X,
+ [BPF_ALU|BPF_NEG] = BPF_S_ALU_NEG,
+ [BPF_LD|BPF_W|BPF_ABS] = BPF_S_LD_W_ABS,
+ [BPF_LD|BPF_H|BPF_ABS] = BPF_S_LD_H_ABS,
+ [BPF_LD|BPF_B|BPF_ABS] = BPF_S_LD_B_ABS,
+ [BPF_LD|BPF_W|BPF_LEN] = BPF_S_LD_W_LEN,
+ [BPF_LD|BPF_W|BPF_IND] = BPF_S_LD_W_IND,
+ [BPF_LD|BPF_H|BPF_IND] = BPF_S_LD_H_IND,
+ [BPF_LD|BPF_B|BPF_IND] = BPF_S_LD_B_IND,
+ [BPF_LD|BPF_IMM] = BPF_S_LD_IMM,
+ [BPF_LDX|BPF_W|BPF_LEN] = BPF_S_LDX_W_LEN,
+ [BPF_LDX|BPF_B|BPF_MSH] = BPF_S_LDX_B_MSH,
+ [BPF_LDX|BPF_IMM] = BPF_S_LDX_IMM,
+ [BPF_MISC|BPF_TAX] = BPF_S_MISC_TAX,
+ [BPF_MISC|BPF_TXA] = BPF_S_MISC_TXA,
+ [BPF_RET|BPF_K] = BPF_S_RET_K,
+ [BPF_RET|BPF_A] = BPF_S_RET_A,
+ [BPF_ALU|BPF_DIV|BPF_K] = BPF_S_ALU_DIV_K,
+ [BPF_LD|BPF_MEM] = BPF_S_LD_MEM,
+ [BPF_LDX|BPF_MEM] = BPF_S_LDX_MEM,
+ [BPF_ST] = BPF_S_ST,
+ [BPF_STX] = BPF_S_STX,
+ [BPF_JMP|BPF_JA] = BPF_S_JMP_JA,
+ [BPF_JMP|BPF_JEQ|BPF_K] = BPF_S_JMP_JEQ_K,
+ [BPF_JMP|BPF_JEQ|BPF_X] = BPF_S_JMP_JEQ_X,
+ [BPF_JMP|BPF_JGE|BPF_K] = BPF_S_JMP_JGE_K,
+ [BPF_JMP|BPF_JGE|BPF_X] = BPF_S_JMP_JGE_X,
+ [BPF_JMP|BPF_JGT|BPF_K] = BPF_S_JMP_JGT_K,
+ [BPF_JMP|BPF_JGT|BPF_X] = BPF_S_JMP_JGT_X,
+ [BPF_JMP|BPF_JSET|BPF_K] = BPF_S_JMP_JSET_K,
+ [BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X,
+ };
int pc;
if (flen == 0 || flen > BPF_MAXINSNS)
@@ -385,136 +493,31 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
/* check the filter code now */
for (pc = 0; pc < flen; pc++) {
- ftest = &filter[pc];
-
- /* Only allow valid instructions */
- switch (ftest->code) {
- case BPF_ALU|BPF_ADD|BPF_K:
- ftest->code = BPF_S_ALU_ADD_K;
- break;
- case BPF_ALU|BPF_ADD|BPF_X:
- ftest->code = BPF_S_ALU_ADD_X;
- break;
- case BPF_ALU|BPF_SUB|BPF_K:
- ftest->code = BPF_S_ALU_SUB_K;
- break;
- case BPF_ALU|BPF_SUB|BPF_X:
- ftest->code = BPF_S_ALU_SUB_X;
- break;
- case BPF_ALU|BPF_MUL|BPF_K:
- ftest->code = BPF_S_ALU_MUL_K;
- break;
- case BPF_ALU|BPF_MUL|BPF_X:
- ftest->code = BPF_S_ALU_MUL_X;
- break;
- case BPF_ALU|BPF_DIV|BPF_X:
- ftest->code = BPF_S_ALU_DIV_X;
- break;
- case BPF_ALU|BPF_AND|BPF_K:
- ftest->code = BPF_S_ALU_AND_K;
- break;
- case BPF_ALU|BPF_AND|BPF_X:
- ftest->code = BPF_S_ALU_AND_X;
- break;
- case BPF_ALU|BPF_OR|BPF_K:
- ftest->code = BPF_S_ALU_OR_K;
- break;
- case BPF_ALU|BPF_OR|BPF_X:
- ftest->code = BPF_S_ALU_OR_X;
- break;
- case BPF_ALU|BPF_LSH|BPF_K:
- ftest->code = BPF_S_ALU_LSH_K;
- break;
- case BPF_ALU|BPF_LSH|BPF_X:
- ftest->code = BPF_S_ALU_LSH_X;
- break;
- case BPF_ALU|BPF_RSH|BPF_K:
- ftest->code = BPF_S_ALU_RSH_K;
- break;
- case BPF_ALU|BPF_RSH|BPF_X:
- ftest->code = BPF_S_ALU_RSH_X;
- break;
- case BPF_ALU|BPF_NEG:
- ftest->code = BPF_S_ALU_NEG;
- break;
- case BPF_LD|BPF_W|BPF_ABS:
- ftest->code = BPF_S_LD_W_ABS;
- break;
- case BPF_LD|BPF_H|BPF_ABS:
- ftest->code = BPF_S_LD_H_ABS;
- break;
- case BPF_LD|BPF_B|BPF_ABS:
- ftest->code = BPF_S_LD_B_ABS;
- break;
- case BPF_LD|BPF_W|BPF_LEN:
- ftest->code = BPF_S_LD_W_LEN;
- break;
- case BPF_LD|BPF_W|BPF_IND:
- ftest->code = BPF_S_LD_W_IND;
- break;
- case BPF_LD|BPF_H|BPF_IND:
- ftest->code = BPF_S_LD_H_IND;
- break;
- case BPF_LD|BPF_B|BPF_IND:
- ftest->code = BPF_S_LD_B_IND;
- break;
- case BPF_LD|BPF_IMM:
- ftest->code = BPF_S_LD_IMM;
- break;
- case BPF_LDX|BPF_W|BPF_LEN:
- ftest->code = BPF_S_LDX_W_LEN;
- break;
- case BPF_LDX|BPF_B|BPF_MSH:
- ftest->code = BPF_S_LDX_B_MSH;
- break;
- case BPF_LDX|BPF_IMM:
- ftest->code = BPF_S_LDX_IMM;
- break;
- case BPF_MISC|BPF_TAX:
- ftest->code = BPF_S_MISC_TAX;
- break;
- case BPF_MISC|BPF_TXA:
- ftest->code = BPF_S_MISC_TXA;
- break;
- case BPF_RET|BPF_K:
- ftest->code = BPF_S_RET_K;
- break;
- case BPF_RET|BPF_A:
- ftest->code = BPF_S_RET_A;
- break;
+ struct sock_filter *ftest = &filter[pc];
+ u16 code = ftest->code;
+ if (code >= ARRAY_SIZE(codes))
+ return -EINVAL;
+ code = codes[code];
+ if (!code)
+ return -EINVAL;
/* Some instructions need special checks */
-
+ switch (code) {
+ case BPF_S_ALU_DIV_K:
/* check for division by zero */
- case BPF_ALU|BPF_DIV|BPF_K:
if (ftest->k == 0)
return -EINVAL;
- ftest->code = BPF_S_ALU_DIV_K;
+ ftest->k = reciprocal_value(ftest->k);
break;
-
- /* check for invalid memory addresses */
- case BPF_LD|BPF_MEM:
- if (ftest->k >= BPF_MEMWORDS)
- return -EINVAL;
- ftest->code = BPF_S_LD_MEM;
- break;
- case BPF_LDX|BPF_MEM:
- if (ftest->k >= BPF_MEMWORDS)
- return -EINVAL;
- ftest->code = BPF_S_LDX_MEM;
- break;
- case BPF_ST:
- if (ftest->k >= BPF_MEMWORDS)
- return -EINVAL;
- ftest->code = BPF_S_ST;
- break;
- case BPF_STX:
+ case BPF_S_LD_MEM:
+ case BPF_S_LDX_MEM:
+ case BPF_S_ST:
+ case BPF_S_STX:
+ /* check for invalid memory addresses */
if (ftest->k >= BPF_MEMWORDS)
return -EINVAL;
- ftest->code = BPF_S_STX;
break;
-
- case BPF_JMP|BPF_JA:
+ case BPF_S_JMP_JA:
/*
* Note, the large ftest->k might cause loops.
* Compare this with conditional jumps below,
@@ -522,40 +525,7 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
*/
if (ftest->k >= (unsigned)(flen-pc-1))
return -EINVAL;
- ftest->code = BPF_S_JMP_JA;
- break;
-
- case BPF_JMP|BPF_JEQ|BPF_K:
- ftest->code = BPF_S_JMP_JEQ_K;
- break;
- case BPF_JMP|BPF_JEQ|BPF_X:
- ftest->code = BPF_S_JMP_JEQ_X;
- break;
- case BPF_JMP|BPF_JGE|BPF_K:
- ftest->code = BPF_S_JMP_JGE_K;
- break;
- case BPF_JMP|BPF_JGE|BPF_X:
- ftest->code = BPF_S_JMP_JGE_X;
- break;
- case BPF_JMP|BPF_JGT|BPF_K:
- ftest->code = BPF_S_JMP_JGT_K;
break;
- case BPF_JMP|BPF_JGT|BPF_X:
- ftest->code = BPF_S_JMP_JGT_X;
- break;
- case BPF_JMP|BPF_JSET|BPF_K:
- ftest->code = BPF_S_JMP_JSET_K;
- break;
- case BPF_JMP|BPF_JSET|BPF_X:
- ftest->code = BPF_S_JMP_JSET_X;
- break;
-
- default:
- return -EINVAL;
- }
-
- /* for conditionals both must be safe */
- switch (ftest->code) {
case BPF_S_JMP_JEQ_K:
case BPF_S_JMP_JEQ_X:
case BPF_S_JMP_JGE_K:
@@ -564,42 +534,55 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
case BPF_S_JMP_JGT_X:
case BPF_S_JMP_JSET_X:
case BPF_S_JMP_JSET_K:
+ /* for conditionals both must be safe */
if (pc + ftest->jt + 1 >= flen ||
pc + ftest->jf + 1 >= flen)
return -EINVAL;
+ break;
+ case BPF_S_LD_W_ABS:
+ case BPF_S_LD_H_ABS:
+ case BPF_S_LD_B_ABS:
+#define ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE: \
+ code = BPF_S_ANC_##CODE; \
+ break
+ switch (ftest->k) {
+ ANCILLARY(PROTOCOL);
+ ANCILLARY(PKTTYPE);
+ ANCILLARY(IFINDEX);
+ ANCILLARY(NLATTR);
+ ANCILLARY(NLATTR_NEST);
+ ANCILLARY(MARK);
+ ANCILLARY(QUEUE);
+ ANCILLARY(HATYPE);
+ ANCILLARY(RXHASH);
+ ANCILLARY(CPU);
+ }
}
+ ftest->code = code;
}
/* last instruction must be a RET code */
switch (filter[flen - 1].code) {
case BPF_S_RET_K:
case BPF_S_RET_A:
- return 0;
- break;
- default:
- return -EINVAL;
- }
+ return check_load_and_stores(filter, flen);
+ }
+ return -EINVAL;
}
EXPORT_SYMBOL(sk_chk_filter);
/**
- * sk_filter_rcu_release: Release a socket filter by rcu_head
+ * sk_filter_release_rcu - Release a socket filter by rcu_head
* @rcu: rcu_head that contains the sk_filter to free
*/
-static void sk_filter_rcu_release(struct rcu_head *rcu)
+void sk_filter_release_rcu(struct rcu_head *rcu)
{
struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
- sk_filter_release(fp);
-}
-
-static void sk_filter_delayed_uncharge(struct sock *sk, struct sk_filter *fp)
-{
- unsigned int size = sk_filter_len(fp);
-
- atomic_sub(size, &sk->sk_omem_alloc);
- call_rcu_bh(&fp->rcu, sk_filter_rcu_release);
+ bpf_jit_free(fp);
+ kfree(fp);
}
+EXPORT_SYMBOL(sk_filter_release_rcu);
/**
* sk_attach_filter - attach a socket filter
@@ -631,6 +614,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
atomic_set(&fp->refcnt, 1);
fp->len = fprog->len;
+ fp->bpf_func = sk_run_filter;
err = sk_chk_filter(fp->insns, fp->len);
if (err) {
@@ -638,12 +622,14 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
return err;
}
+ bpf_jit_compile(fp);
+
old_fp = rcu_dereference_protected(sk->sk_filter,
sock_owned_by_user(sk));
rcu_assign_pointer(sk->sk_filter, fp);
if (old_fp)
- sk_filter_delayed_uncharge(sk, old_fp);
+ sk_filter_uncharge(sk, old_fp);
return 0;
}
EXPORT_SYMBOL_GPL(sk_attach_filter);
@@ -657,7 +643,7 @@ int sk_detach_filter(struct sock *sk)
sock_owned_by_user(sk));
if (filter) {
rcu_assign_pointer(sk->sk_filter, NULL);
- sk_filter_delayed_uncharge(sk, filter);
+ sk_filter_uncharge(sk, filter);
ret = 0;
}
return ret;
diff --git a/net/core/flow.c b/net/core/flow.c
index 127c8a7ffd61..990703b8863b 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -172,9 +172,9 @@ static void flow_new_hash_rnd(struct flow_cache *fc,
static u32 flow_hash_code(struct flow_cache *fc,
struct flow_cache_percpu *fcp,
- struct flowi *key)
+ const struct flowi *key)
{
- u32 *k = (u32 *) key;
+ const u32 *k = (const u32 *) key;
return jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd)
& (flow_cache_hash_size(fc) - 1);
@@ -186,17 +186,17 @@ typedef unsigned long flow_compare_t;
* important assumptions that we can here, such as alignment and
* constant size.
*/
-static int flow_key_compare(struct flowi *key1, struct flowi *key2)
+static int flow_key_compare(const struct flowi *key1, const struct flowi *key2)
{
- flow_compare_t *k1, *k1_lim, *k2;
+ const flow_compare_t *k1, *k1_lim, *k2;
const int n_elem = sizeof(struct flowi) / sizeof(flow_compare_t);
BUILD_BUG_ON(sizeof(struct flowi) % sizeof(flow_compare_t));
- k1 = (flow_compare_t *) key1;
+ k1 = (const flow_compare_t *) key1;
k1_lim = k1 + n_elem;
- k2 = (flow_compare_t *) key2;
+ k2 = (const flow_compare_t *) key2;
do {
if (*k1++ != *k2++)
@@ -207,7 +207,7 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2)
}
struct flow_cache_object *
-flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
+flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
flow_resolve_t resolver, void *ctx)
{
struct flow_cache *fc = &flow_cache_global;
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 7c2373321b74..43b03dd71e85 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -249,13 +249,6 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
}
EXPORT_SYMBOL(gen_new_estimator);
-static void __gen_kill_estimator(struct rcu_head *head)
-{
- struct gen_estimator *e = container_of(head,
- struct gen_estimator, e_rcu);
- kfree(e);
-}
-
/**
* gen_kill_estimator - remove a rate estimator
* @bstats: basic statistics
@@ -279,7 +272,7 @@ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
write_unlock(&est_lock);
list_del_rcu(&e->list);
- call_rcu(&e->e_rcu, __gen_kill_estimator);
+ kfree_rcu(e, e_rcu);
}
spin_unlock_bh(&est_tree_lock);
}
diff --git a/net/core/iovec.c b/net/core/iovec.c
index 72aceb1fe4fa..c40f27e7d208 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -35,10 +35,9 @@
* in any case.
*/
-long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode)
+int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode)
{
- int size, ct;
- long err;
+ int size, ct, err;
if (m->msg_namelen) {
if (mode == VERIFY_READ) {
@@ -62,14 +61,13 @@ long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address,
err = 0;
for (ct = 0; ct < m->msg_iovlen; ct++) {
- err += iov[ct].iov_len;
- /*
- * Goal is not to verify user data, but to prevent returning
- * negative value, which is interpreted as errno.
- * Overflow is still possible, but it is harmless.
- */
- if (err < 0)
- return -EMSGSIZE;
+ size_t len = iov[ct].iov_len;
+
+ if (len > INT_MAX - err) {
+ len = INT_MAX - err;
+ iov[ct].iov_len = len;
+ }
+ err += len;
}
return err;
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 01a1101b5936..a7b342131869 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -129,7 +129,7 @@ static void linkwatch_schedule_work(int urgent)
if (!cancel_delayed_work(&linkwatch_work))
return;
- /* Otherwise we reschedule it again for immediate exection. */
+ /* Otherwise we reschedule it again for immediate execution. */
schedule_delayed_work(&linkwatch_work, 0);
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 8cc8f9a79db9..799f06e03a22 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -41,7 +41,6 @@
#define NEIGH_PRINTK(x...) printk(x)
#define NEIGH_NOPRINTK(x...) do { ; } while(0)
-#define NEIGH_PRINTK0 NEIGH_PRINTK
#define NEIGH_PRINTK1 NEIGH_NOPRINTK
#define NEIGH_PRINTK2 NEIGH_NOPRINTK
@@ -317,7 +316,7 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries)
{
size_t size = entries * sizeof(struct neighbour *);
struct neigh_hash_table *ret;
- struct neighbour **buckets;
+ struct neighbour __rcu **buckets;
ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
if (!ret)
@@ -325,14 +324,14 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries)
if (size <= PAGE_SIZE)
buckets = kzalloc(size, GFP_ATOMIC);
else
- buckets = (struct neighbour **)
+ buckets = (struct neighbour __rcu **)
__get_free_pages(GFP_ATOMIC | __GFP_ZERO,
get_order(size));
if (!buckets) {
kfree(ret);
return NULL;
}
- rcu_assign_pointer(ret->hash_buckets, buckets);
+ ret->hash_buckets = buckets;
ret->hash_mask = entries - 1;
get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd));
return ret;
@@ -344,7 +343,7 @@ static void neigh_hash_free_rcu(struct rcu_head *head)
struct neigh_hash_table,
rcu);
size_t size = (nht->hash_mask + 1) * sizeof(struct neighbour *);
- struct neighbour **buckets = nht->hash_buckets;
+ struct neighbour __rcu **buckets = nht->hash_buckets;
if (size <= PAGE_SIZE)
kfree(buckets);
@@ -1541,7 +1540,7 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
panic("cannot create neighbour proc dir entry");
#endif
- tbl->nht = neigh_hash_alloc(8);
+ RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(8));
phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
@@ -1603,7 +1602,8 @@ int neigh_table_clear(struct neigh_table *tbl)
}
write_unlock(&neigh_tbl_lock);
- call_rcu(&tbl->nht->rcu, neigh_hash_free_rcu);
+ call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
+ neigh_hash_free_rcu);
tbl->nht = NULL;
kfree(tbl->phash_buckets);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index b143173e3eb2..11b98bc2aa8f 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -28,6 +28,7 @@
static const char fmt_hex[] = "%#x\n";
static const char fmt_long_hex[] = "%#lx\n";
static const char fmt_dec[] = "%d\n";
+static const char fmt_udec[] = "%u\n";
static const char fmt_ulong[] = "%lu\n";
static const char fmt_u64[] = "%llu\n";
@@ -99,7 +100,7 @@ NETDEVICE_SHOW(addr_assign_type, fmt_dec);
NETDEVICE_SHOW(addr_len, fmt_dec);
NETDEVICE_SHOW(iflink, fmt_dec);
NETDEVICE_SHOW(ifindex, fmt_dec);
-NETDEVICE_SHOW(features, fmt_long_hex);
+NETDEVICE_SHOW(features, fmt_hex);
NETDEVICE_SHOW(type, fmt_dec);
NETDEVICE_SHOW(link_mode, fmt_dec);
@@ -145,13 +146,10 @@ static ssize_t show_speed(struct device *dev,
if (!rtnl_trylock())
return restart_syscall();
- if (netif_running(netdev) &&
- netdev->ethtool_ops &&
- netdev->ethtool_ops->get_settings) {
- struct ethtool_cmd cmd = { ETHTOOL_GSET };
-
- if (!netdev->ethtool_ops->get_settings(netdev, &cmd))
- ret = sprintf(buf, fmt_dec, ethtool_cmd_speed(&cmd));
+ if (netif_running(netdev)) {
+ struct ethtool_cmd cmd;
+ if (!dev_ethtool_get_settings(netdev, &cmd))
+ ret = sprintf(buf, fmt_udec, ethtool_cmd_speed(&cmd));
}
rtnl_unlock();
return ret;
@@ -166,13 +164,11 @@ static ssize_t show_duplex(struct device *dev,
if (!rtnl_trylock())
return restart_syscall();
- if (netif_running(netdev) &&
- netdev->ethtool_ops &&
- netdev->ethtool_ops->get_settings) {
- struct ethtool_cmd cmd = { ETHTOOL_GSET };
-
- if (!netdev->ethtool_ops->get_settings(netdev, &cmd))
- ret = sprintf(buf, "%s\n", cmd.duplex ? "full" : "half");
+ if (netif_running(netdev)) {
+ struct ethtool_cmd cmd;
+ if (!dev_ethtool_get_settings(netdev, &cmd))
+ ret = sprintf(buf, "%s\n",
+ cmd.duplex ? "full" : "half");
}
rtnl_unlock();
return ret;
@@ -295,6 +291,20 @@ static ssize_t show_ifalias(struct device *dev,
return ret;
}
+NETDEVICE_SHOW(group, fmt_dec);
+
+static int change_group(struct net_device *net, unsigned long new_group)
+{
+ dev_set_group(net, (int) new_group);
+ return 0;
+}
+
+static ssize_t store_group(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ return netdev_store(dev, attr, buf, len, change_group);
+}
+
static struct device_attribute net_class_attributes[] = {
__ATTR(addr_assign_type, S_IRUGO, show_addr_assign_type, NULL),
__ATTR(addr_len, S_IRUGO, show_addr_len, NULL),
@@ -316,6 +326,7 @@ static struct device_attribute net_class_attributes[] = {
__ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags),
__ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len,
store_tx_queue_len),
+ __ATTR(netdev_group, S_IRUGO | S_IWUSR, show_group, store_group),
{}
};
@@ -550,13 +561,6 @@ static ssize_t show_rps_map(struct netdev_rx_queue *queue,
return len;
}
-static void rps_map_release(struct rcu_head *rcu)
-{
- struct rps_map *map = container_of(rcu, struct rps_map, rcu);
-
- kfree(map);
-}
-
static ssize_t store_rps_map(struct netdev_rx_queue *queue,
struct rx_queue_attribute *attribute,
const char *buf, size_t len)
@@ -598,12 +602,13 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
}
spin_lock(&rps_map_lock);
- old_map = queue->rps_map;
+ old_map = rcu_dereference_protected(queue->rps_map,
+ lockdep_is_held(&rps_map_lock));
rcu_assign_pointer(queue->rps_map, map);
spin_unlock(&rps_map_lock);
if (old_map)
- call_rcu(&old_map->rcu, rps_map_release);
+ kfree_rcu(old_map, rcu);
free_cpumask_var(mask);
return len;
@@ -677,7 +682,8 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
table = NULL;
spin_lock(&rps_dev_flow_lock);
- old_table = queue->rps_flow_table;
+ old_table = rcu_dereference_protected(queue->rps_flow_table,
+ lockdep_is_held(&rps_dev_flow_lock));
rcu_assign_pointer(queue->rps_flow_table, table);
spin_unlock(&rps_dev_flow_lock);
@@ -704,17 +710,24 @@ static struct attribute *rx_queue_default_attrs[] = {
static void rx_queue_release(struct kobject *kobj)
{
struct netdev_rx_queue *queue = to_rx_queue(kobj);
- struct netdev_rx_queue *first = queue->first;
+ struct rps_map *map;
+ struct rps_dev_flow_table *flow_table;
- if (queue->rps_map)
- call_rcu(&queue->rps_map->rcu, rps_map_release);
- if (queue->rps_flow_table)
- call_rcu(&queue->rps_flow_table->rcu,
- rps_dev_flow_table_release);
+ map = rcu_dereference_raw(queue->rps_map);
+ if (map) {
+ RCU_INIT_POINTER(queue->rps_map, NULL);
+ kfree_rcu(map, rcu);
+ }
+
+ flow_table = rcu_dereference_raw(queue->rps_flow_table);
+ if (flow_table) {
+ RCU_INIT_POINTER(queue->rps_flow_table, NULL);
+ call_rcu(&flow_table->rcu, rps_dev_flow_table_release);
+ }
- if (atomic_dec_and_test(&first->count))
- kfree(first);
+ memset(kobj, 0, sizeof(*kobj));
+ dev_put(queue->dev);
}
static struct kobj_type rx_queue_ktype = {
@@ -726,7 +739,6 @@ static struct kobj_type rx_queue_ktype = {
static int rx_queue_add_kobject(struct net_device *net, int index)
{
struct netdev_rx_queue *queue = net->_rx + index;
- struct netdev_rx_queue *first = queue->first;
struct kobject *kobj = &queue->kobj;
int error = 0;
@@ -739,14 +751,16 @@ static int rx_queue_add_kobject(struct net_device *net, int index)
}
kobject_uevent(kobj, KOBJ_ADD);
- atomic_inc(&first->count);
+ dev_hold(queue->dev);
return error;
}
+#endif /* CONFIG_RPS */
int
net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
{
+#ifdef CONFIG_RPS
int i;
int error = 0;
@@ -762,23 +776,408 @@ net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
kobject_put(&net->_rx[i].kobj);
return error;
+#else
+ return 0;
+#endif
+}
+
+#ifdef CONFIG_XPS
+/*
+ * netdev_queue sysfs structures and functions.
+ */
+struct netdev_queue_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct netdev_queue *queue,
+ struct netdev_queue_attribute *attr, char *buf);
+ ssize_t (*store)(struct netdev_queue *queue,
+ struct netdev_queue_attribute *attr, const char *buf, size_t len);
+};
+#define to_netdev_queue_attr(_attr) container_of(_attr, \
+ struct netdev_queue_attribute, attr)
+
+#define to_netdev_queue(obj) container_of(obj, struct netdev_queue, kobj)
+
+static ssize_t netdev_queue_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr);
+ struct netdev_queue *queue = to_netdev_queue(kobj);
+
+ if (!attribute->show)
+ return -EIO;
+
+ return attribute->show(queue, attribute, buf);
+}
+
+static ssize_t netdev_queue_attr_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buf, size_t count)
+{
+ struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr);
+ struct netdev_queue *queue = to_netdev_queue(kobj);
+
+ if (!attribute->store)
+ return -EIO;
+
+ return attribute->store(queue, attribute, buf, count);
+}
+
+static const struct sysfs_ops netdev_queue_sysfs_ops = {
+ .show = netdev_queue_attr_show,
+ .store = netdev_queue_attr_store,
+};
+
+static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue)
+{
+ struct net_device *dev = queue->dev;
+ int i;
+
+ for (i = 0; i < dev->num_tx_queues; i++)
+ if (queue == &dev->_tx[i])
+ break;
+
+ BUG_ON(i >= dev->num_tx_queues);
+
+ return i;
+}
+
+
+static ssize_t show_xps_map(struct netdev_queue *queue,
+ struct netdev_queue_attribute *attribute, char *buf)
+{
+ struct net_device *dev = queue->dev;
+ struct xps_dev_maps *dev_maps;
+ cpumask_var_t mask;
+ unsigned long index;
+ size_t len = 0;
+ int i;
+
+ if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+ return -ENOMEM;
+
+ index = get_netdev_queue_index(queue);
+
+ rcu_read_lock();
+ dev_maps = rcu_dereference(dev->xps_maps);
+ if (dev_maps) {
+ for_each_possible_cpu(i) {
+ struct xps_map *map =
+ rcu_dereference(dev_maps->cpu_map[i]);
+ if (map) {
+ int j;
+ for (j = 0; j < map->len; j++) {
+ if (map->queues[j] == index) {
+ cpumask_set_cpu(i, mask);
+ break;
+ }
+ }
+ }
+ }
+ }
+ rcu_read_unlock();
+
+ len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask);
+ if (PAGE_SIZE - len < 3) {
+ free_cpumask_var(mask);
+ return -EINVAL;
+ }
+
+ free_cpumask_var(mask);
+ len += sprintf(buf + len, "\n");
+ return len;
+}
+
+static DEFINE_MUTEX(xps_map_mutex);
+#define xmap_dereference(P) \
+ rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
+
+static ssize_t store_xps_map(struct netdev_queue *queue,
+ struct netdev_queue_attribute *attribute,
+ const char *buf, size_t len)
+{
+ struct net_device *dev = queue->dev;
+ cpumask_var_t mask;
+ int err, i, cpu, pos, map_len, alloc_len, need_set;
+ unsigned long index;
+ struct xps_map *map, *new_map;
+ struct xps_dev_maps *dev_maps, *new_dev_maps;
+ int nonempty = 0;
+ int numa_node = -2;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+ return -ENOMEM;
+
+ index = get_netdev_queue_index(queue);
+
+ err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits);
+ if (err) {
+ free_cpumask_var(mask);
+ return err;
+ }
+
+ new_dev_maps = kzalloc(max_t(unsigned,
+ XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES), GFP_KERNEL);
+ if (!new_dev_maps) {
+ free_cpumask_var(mask);
+ return -ENOMEM;
+ }
+
+ mutex_lock(&xps_map_mutex);
+
+ dev_maps = xmap_dereference(dev->xps_maps);
+
+ for_each_possible_cpu(cpu) {
+ map = dev_maps ?
+ xmap_dereference(dev_maps->cpu_map[cpu]) : NULL;
+ new_map = map;
+ if (map) {
+ for (pos = 0; pos < map->len; pos++)
+ if (map->queues[pos] == index)
+ break;
+ map_len = map->len;
+ alloc_len = map->alloc_len;
+ } else
+ pos = map_len = alloc_len = 0;
+
+ need_set = cpumask_test_cpu(cpu, mask) && cpu_online(cpu);
+#ifdef CONFIG_NUMA
+ if (need_set) {
+ if (numa_node == -2)
+ numa_node = cpu_to_node(cpu);
+ else if (numa_node != cpu_to_node(cpu))
+ numa_node = -1;
+ }
+#endif
+ if (need_set && pos >= map_len) {
+ /* Need to add queue to this CPU's map */
+ if (map_len >= alloc_len) {
+ alloc_len = alloc_len ?
+ 2 * alloc_len : XPS_MIN_MAP_ALLOC;
+ new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len),
+ GFP_KERNEL,
+ cpu_to_node(cpu));
+ if (!new_map)
+ goto error;
+ new_map->alloc_len = alloc_len;
+ for (i = 0; i < map_len; i++)
+ new_map->queues[i] = map->queues[i];
+ new_map->len = map_len;
+ }
+ new_map->queues[new_map->len++] = index;
+ } else if (!need_set && pos < map_len) {
+ /* Need to remove queue from this CPU's map */
+ if (map_len > 1)
+ new_map->queues[pos] =
+ new_map->queues[--new_map->len];
+ else
+ new_map = NULL;
+ }
+ RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], new_map);
+ }
+
+ /* Cleanup old maps */
+ for_each_possible_cpu(cpu) {
+ map = dev_maps ?
+ xmap_dereference(dev_maps->cpu_map[cpu]) : NULL;
+ if (map && xmap_dereference(new_dev_maps->cpu_map[cpu]) != map)
+ kfree_rcu(map, rcu);
+ if (new_dev_maps->cpu_map[cpu])
+ nonempty = 1;
+ }
+
+ if (nonempty)
+ rcu_assign_pointer(dev->xps_maps, new_dev_maps);
+ else {
+ kfree(new_dev_maps);
+ rcu_assign_pointer(dev->xps_maps, NULL);
+ }
+
+ if (dev_maps)
+ kfree_rcu(dev_maps, rcu);
+
+ netdev_queue_numa_node_write(queue, (numa_node >= 0) ? numa_node :
+ NUMA_NO_NODE);
+
+ mutex_unlock(&xps_map_mutex);
+
+ free_cpumask_var(mask);
+ return len;
+
+error:
+ mutex_unlock(&xps_map_mutex);
+
+ if (new_dev_maps)
+ for_each_possible_cpu(i)
+ kfree(rcu_dereference_protected(
+ new_dev_maps->cpu_map[i],
+ 1));
+ kfree(new_dev_maps);
+ free_cpumask_var(mask);
+ return -ENOMEM;
+}
+
+static struct netdev_queue_attribute xps_cpus_attribute =
+ __ATTR(xps_cpus, S_IRUGO | S_IWUSR, show_xps_map, store_xps_map);
+
+static struct attribute *netdev_queue_default_attrs[] = {
+ &xps_cpus_attribute.attr,
+ NULL
+};
+
+static void netdev_queue_release(struct kobject *kobj)
+{
+ struct netdev_queue *queue = to_netdev_queue(kobj);
+ struct net_device *dev = queue->dev;
+ struct xps_dev_maps *dev_maps;
+ struct xps_map *map;
+ unsigned long index;
+ int i, pos, nonempty = 0;
+
+ index = get_netdev_queue_index(queue);
+
+ mutex_lock(&xps_map_mutex);
+ dev_maps = xmap_dereference(dev->xps_maps);
+
+ if (dev_maps) {
+ for_each_possible_cpu(i) {
+ map = xmap_dereference(dev_maps->cpu_map[i]);
+ if (!map)
+ continue;
+
+ for (pos = 0; pos < map->len; pos++)
+ if (map->queues[pos] == index)
+ break;
+
+ if (pos < map->len) {
+ if (map->len > 1)
+ map->queues[pos] =
+ map->queues[--map->len];
+ else {
+ RCU_INIT_POINTER(dev_maps->cpu_map[i],
+ NULL);
+ kfree_rcu(map, rcu);
+ map = NULL;
+ }
+ }
+ if (map)
+ nonempty = 1;
+ }
+
+ if (!nonempty) {
+ RCU_INIT_POINTER(dev->xps_maps, NULL);
+ kfree_rcu(dev_maps, rcu);
+ }
+ }
+
+ mutex_unlock(&xps_map_mutex);
+
+ memset(kobj, 0, sizeof(*kobj));
+ dev_put(queue->dev);
+}
+
+static struct kobj_type netdev_queue_ktype = {
+ .sysfs_ops = &netdev_queue_sysfs_ops,
+ .release = netdev_queue_release,
+ .default_attrs = netdev_queue_default_attrs,
+};
+
+static int netdev_queue_add_kobject(struct net_device *net, int index)
+{
+ struct netdev_queue *queue = net->_tx + index;
+ struct kobject *kobj = &queue->kobj;
+ int error = 0;
+
+ kobj->kset = net->queues_kset;
+ error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL,
+ "tx-%u", index);
+ if (error) {
+ kobject_put(kobj);
+ return error;
+ }
+
+ kobject_uevent(kobj, KOBJ_ADD);
+ dev_hold(queue->dev);
+
+ return error;
}
+#endif /* CONFIG_XPS */
-static int rx_queue_register_kobjects(struct net_device *net)
+int
+netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
{
+#ifdef CONFIG_XPS
+ int i;
+ int error = 0;
+
+ for (i = old_num; i < new_num; i++) {
+ error = netdev_queue_add_kobject(net, i);
+ if (error) {
+ new_num = old_num;
+ break;
+ }
+ }
+
+ while (--i >= new_num)
+ kobject_put(&net->_tx[i].kobj);
+
+ return error;
+#else
+ return 0;
+#endif
+}
+
+static int register_queue_kobjects(struct net_device *net)
+{
+ int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0;
+
+#if defined(CONFIG_RPS) || defined(CONFIG_XPS)
net->queues_kset = kset_create_and_add("queues",
NULL, &net->dev.kobj);
if (!net->queues_kset)
return -ENOMEM;
- return net_rx_queue_update_kobjects(net, 0, net->real_num_rx_queues);
+#endif
+
+#ifdef CONFIG_RPS
+ real_rx = net->real_num_rx_queues;
+#endif
+ real_tx = net->real_num_tx_queues;
+
+ error = net_rx_queue_update_kobjects(net, 0, real_rx);
+ if (error)
+ goto error;
+ rxq = real_rx;
+
+ error = netdev_queue_update_kobjects(net, 0, real_tx);
+ if (error)
+ goto error;
+ txq = real_tx;
+
+ return 0;
+
+error:
+ netdev_queue_update_kobjects(net, txq, 0);
+ net_rx_queue_update_kobjects(net, rxq, 0);
+ return error;
}
-static void rx_queue_remove_kobjects(struct net_device *net)
+static void remove_queue_kobjects(struct net_device *net)
{
- net_rx_queue_update_kobjects(net, net->real_num_rx_queues, 0);
+ int real_rx = 0, real_tx = 0;
+
+#ifdef CONFIG_RPS
+ real_rx = net->real_num_rx_queues;
+#endif
+ real_tx = net->real_num_tx_queues;
+
+ net_rx_queue_update_kobjects(net, real_rx, 0);
+ netdev_queue_update_kobjects(net, real_tx, 0);
+#if defined(CONFIG_RPS) || defined(CONFIG_XPS)
kset_unregister(net->queues_kset);
+#endif
}
-#endif /* CONFIG_RPS */
static const void *net_current_ns(void)
{
@@ -877,9 +1276,7 @@ void netdev_unregister_kobject(struct net_device * net)
kobject_get(&dev->kobj);
-#ifdef CONFIG_RPS
- rx_queue_remove_kobjects(net);
-#endif
+ remove_queue_kobjects(net);
device_del(dev);
}
@@ -918,13 +1315,11 @@ int netdev_register_kobject(struct net_device *net)
if (error)
return error;
-#ifdef CONFIG_RPS
- error = rx_queue_register_kobjects(net);
+ error = register_queue_kobjects(net);
if (error) {
device_del(dev);
return error;
}
-#endif
return error;
}
diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h
index 778e1571548d..bd7751ec1c4d 100644
--- a/net/core/net-sysfs.h
+++ b/net/core/net-sysfs.h
@@ -4,8 +4,8 @@
int netdev_kobject_init(void);
int netdev_register_kobject(struct net_device *);
void netdev_unregister_kobject(struct net_device *);
-#ifdef CONFIG_RPS
int net_rx_queue_update_kobjects(struct net_device *, int old_num, int new_num);
-#endif
+int netdev_queue_update_kobjects(struct net_device *net,
+ int old_num, int new_num);
#endif
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index c988e685433a..2e2dce6583e1 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -27,14 +27,6 @@ EXPORT_SYMBOL(init_net);
#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */
-static void net_generic_release(struct rcu_head *rcu)
-{
- struct net_generic *ng;
-
- ng = container_of(rcu, struct net_generic, rcu);
- kfree(ng);
-}
-
static int net_assign_generic(struct net *net, int id, void *data)
{
struct net_generic *ng, *old_ng;
@@ -42,7 +34,9 @@ static int net_assign_generic(struct net *net, int id, void *data)
BUG_ON(!mutex_is_locked(&net_mutex));
BUG_ON(id == 0);
- ng = old_ng = net->gen;
+ old_ng = rcu_dereference_protected(net->gen,
+ lockdep_is_held(&net_mutex));
+ ng = old_ng;
if (old_ng->len >= id)
goto assign;
@@ -66,7 +60,7 @@ static int net_assign_generic(struct net *net, int id, void *data)
memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));
rcu_assign_pointer(net->gen, ng);
- call_rcu(&old_ng->rcu, net_generic_release);
+ kfree_rcu(old_ng, rcu);
assign:
ng->ptr[id - 1] = data;
return 0;
@@ -214,11 +208,14 @@ static void net_free(struct net *net)
kmem_cache_free(net_cachep, net);
}
-static struct net *net_create(void)
+struct net *copy_net_ns(unsigned long flags, struct net *old_net)
{
struct net *net;
int rv;
+ if (!(flags & CLONE_NEWNET))
+ return get_net(old_net);
+
net = net_alloc();
if (!net)
return ERR_PTR(-ENOMEM);
@@ -237,13 +234,6 @@ static struct net *net_create(void)
return net;
}
-struct net *copy_net_ns(unsigned long flags, struct net *old_net)
-{
- if (!(flags & CLONE_NEWNET))
- return get_net(old_net);
- return net_create();
-}
-
static DEFINE_SPINLOCK(cleanup_list_lock);
static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 4e98ffac3af0..2d7d6d473781 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -35,7 +35,6 @@
#define MAX_UDP_CHUNK 1460
#define MAX_SKBS 32
-#define MAX_QUEUE_DEPTH (MAX_SKBS / 2)
static struct sk_buff_head skb_pool;
@@ -76,8 +75,7 @@ static void queue_process(struct work_struct *work)
local_irq_save(flags);
__netif_tx_lock(txq, smp_processor_id());
- if (netif_tx_queue_stopped(txq) ||
- netif_tx_queue_frozen(txq) ||
+ if (netif_tx_queue_frozen_or_stopped(txq) ||
ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) {
skb_queue_head(&npinfo->txq, skb);
__netif_tx_unlock(txq);
@@ -195,6 +193,17 @@ void netpoll_poll_dev(struct net_device *dev)
poll_napi(dev);
+ if (dev->priv_flags & IFF_SLAVE) {
+ if (dev->npinfo) {
+ struct net_device *bond_dev = dev->master;
+ struct sk_buff *skb;
+ while ((skb = skb_dequeue(&dev->npinfo->arp_tx))) {
+ skb->dev = bond_dev;
+ skb_queue_tail(&bond_dev->npinfo->arp_tx, skb);
+ }
+ }
+ }
+
service_arp_queue(dev->npinfo);
zap_completion_queue();
@@ -315,9 +324,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
tries > 0; --tries) {
if (__netif_tx_trylock(txq)) {
if (!netif_tx_queue_stopped(txq)) {
- dev->priv_flags |= IFF_IN_NETPOLL;
status = ops->ndo_start_xmit(skb, dev);
- dev->priv_flags &= ~IFF_IN_NETPOLL;
if (status == NETDEV_TX_OK)
txq_trans_update(txq);
}
@@ -532,7 +539,7 @@ int __netpoll_rx(struct sk_buff *skb)
{
int proto, len, ulen;
int hits = 0;
- struct iphdr *iph;
+ const struct iphdr *iph;
struct udphdr *uh;
struct netpoll_info *npinfo = skb->dev->npinfo;
struct netpoll *np, *tmp;
@@ -691,32 +698,8 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
if (*cur != 0) {
/* MAC address */
- if ((delim = strchr(cur, ':')) == NULL)
- goto parse_failed;
- *delim = 0;
- np->remote_mac[0] = simple_strtol(cur, NULL, 16);
- cur = delim + 1;
- if ((delim = strchr(cur, ':')) == NULL)
+ if (!mac_pton(cur, np->remote_mac))
goto parse_failed;
- *delim = 0;
- np->remote_mac[1] = simple_strtol(cur, NULL, 16);
- cur = delim + 1;
- if ((delim = strchr(cur, ':')) == NULL)
- goto parse_failed;
- *delim = 0;
- np->remote_mac[2] = simple_strtol(cur, NULL, 16);
- cur = delim + 1;
- if ((delim = strchr(cur, ':')) == NULL)
- goto parse_failed;
- *delim = 0;
- np->remote_mac[3] = simple_strtol(cur, NULL, 16);
- cur = delim + 1;
- if ((delim = strchr(cur, ':')) == NULL)
- goto parse_failed;
- *delim = 0;
- np->remote_mac[4] = simple_strtol(cur, NULL, 16);
- cur = delim + 1;
- np->remote_mac[5] = simple_strtol(cur, NULL, 16);
}
netpoll_print_options(np);
@@ -925,7 +908,7 @@ void __netpoll_cleanup(struct netpoll *np)
skb_queue_purge(&npinfo->arp_tx);
skb_queue_purge(&npinfo->txq);
- cancel_rearming_delayed_work(&npinfo->tx_work);
+ cancel_delayed_work_sync(&npinfo->tx_work);
/* clean after last, unfinished work */
__skb_queue_purge(&npinfo->txq);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 2c0df0f95b3d..67870e9fd097 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -156,6 +156,7 @@
#include <linux/wait.h>
#include <linux/etherdevice.h>
#include <linux/kthread.h>
+#include <linux/prefetch.h>
#include <net/net_namespace.h>
#include <net/checksum.h>
#include <net/ipv6.h>
@@ -251,6 +252,7 @@ struct pktgen_dev {
int max_pkt_size; /* = ETH_ZLEN; */
int pkt_overhead; /* overhead for MPLS, VLANs, IPSEC etc */
int nfrags;
+ struct page *page;
u64 delay; /* nano-seconds */
__u64 count; /* Default No packets to send */
@@ -378,6 +380,7 @@ struct pktgen_dev {
u16 queue_map_min;
u16 queue_map_max;
+ __u32 skb_priority; /* skb priority field */
int node; /* Memory node */
#ifdef CONFIG_XFRM
@@ -394,6 +397,8 @@ struct pktgen_hdr {
__be32 tv_usec;
};
+static bool pktgen_exiting __read_mostly;
+
struct pktgen_thread {
spinlock_t if_lock; /* for list of devices */
struct list_head if_list; /* All device here */
@@ -445,7 +450,6 @@ static void pktgen_stop(struct pktgen_thread *t);
static void pktgen_clear_counters(struct pktgen_dev *pkt_dev);
static unsigned int scan_ip6(const char *s, char ip[16]);
-static unsigned int fmt_ip6(char *s, const char ip[16]);
/* Module parameters, defaults. */
static int pg_count_d __read_mostly = 1000;
@@ -547,22 +551,18 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
pkt_dev->queue_map_min,
pkt_dev->queue_map_max);
- if (pkt_dev->flags & F_IPV6) {
- char b1[128], b2[128], b3[128];
- fmt_ip6(b1, pkt_dev->in6_saddr.s6_addr);
- fmt_ip6(b2, pkt_dev->min_in6_saddr.s6_addr);
- fmt_ip6(b3, pkt_dev->max_in6_saddr.s6_addr);
- seq_printf(seq,
- " saddr: %s min_saddr: %s max_saddr: %s\n", b1,
- b2, b3);
+ if (pkt_dev->skb_priority)
+ seq_printf(seq, " skb_priority: %u\n",
+ pkt_dev->skb_priority);
- fmt_ip6(b1, pkt_dev->in6_daddr.s6_addr);
- fmt_ip6(b2, pkt_dev->min_in6_daddr.s6_addr);
- fmt_ip6(b3, pkt_dev->max_in6_daddr.s6_addr);
+ if (pkt_dev->flags & F_IPV6) {
seq_printf(seq,
- " daddr: %s min_daddr: %s max_daddr: %s\n", b1,
- b2, b3);
-
+ " saddr: %pI6c min_saddr: %pI6c max_saddr: %pI6c\n"
+ " daddr: %pI6c min_daddr: %pI6c max_daddr: %pI6c\n",
+ &pkt_dev->in6_saddr,
+ &pkt_dev->min_in6_saddr, &pkt_dev->max_in6_saddr,
+ &pkt_dev->in6_daddr,
+ &pkt_dev->min_in6_daddr, &pkt_dev->max_in6_daddr);
} else {
seq_printf(seq,
" dst_min: %s dst_max: %s\n",
@@ -698,10 +698,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
pkt_dev->cur_src_mac_offset);
if (pkt_dev->flags & F_IPV6) {
- char b1[128], b2[128];
- fmt_ip6(b1, pkt_dev->cur_in6_daddr.s6_addr);
- fmt_ip6(b2, pkt_dev->cur_in6_saddr.s6_addr);
- seq_printf(seq, " cur_saddr: %s cur_daddr: %s\n", b2, b1);
+ seq_printf(seq, " cur_saddr: %pI6c cur_daddr: %pI6c\n",
+ &pkt_dev->cur_in6_saddr,
+ &pkt_dev->cur_in6_daddr);
} else
seq_printf(seq, " cur_saddr: 0x%x cur_daddr: 0x%x\n",
pkt_dev->cur_saddr, pkt_dev->cur_daddr);
@@ -771,10 +770,10 @@ done:
static unsigned long num_arg(const char __user * user_buffer,
unsigned long maxlen, unsigned long *num)
{
- int i = 0;
+ int i;
*num = 0;
- for (; i < maxlen; i++) {
+ for (i = 0; i < maxlen; i++) {
char c;
if (get_user(c, &user_buffer[i]))
return -EFAULT;
@@ -789,9 +788,9 @@ static unsigned long num_arg(const char __user * user_buffer,
static int strn_len(const char __user * user_buffer, unsigned int maxlen)
{
- int i = 0;
+ int i;
- for (; i < maxlen; i++) {
+ for (i = 0; i < maxlen; i++) {
char c;
if (get_user(c, &user_buffer[i]))
return -EFAULT;
@@ -846,7 +845,7 @@ static ssize_t pktgen_if_write(struct file *file,
{
struct seq_file *seq = file->private_data;
struct pktgen_dev *pkt_dev = seq->private;
- int i = 0, max, len;
+ int i, max, len;
char name[16], valstr[32];
unsigned long value = 0;
char *pg_result = NULL;
@@ -860,13 +859,13 @@ static ssize_t pktgen_if_write(struct file *file,
return -EINVAL;
}
- max = count - i;
- tmp = count_trail_chars(&user_buffer[i], max);
+ max = count;
+ tmp = count_trail_chars(user_buffer, max);
if (tmp < 0) {
pr_warning("illegal format\n");
return tmp;
}
- i += tmp;
+ i = tmp;
/* Read variable name */
@@ -887,10 +886,11 @@ static ssize_t pktgen_if_write(struct file *file,
i += len;
if (debug) {
- char tb[count + 1];
- if (copy_from_user(tb, user_buffer, count))
+ size_t copy = min_t(size_t, count, 1023);
+ char tb[copy + 1];
+ if (copy_from_user(tb, user_buffer, copy))
return -EFAULT;
- tb[count] = 0;
+ tb[copy] = 0;
printk(KERN_DEBUG "pktgen: %s,%lu buffer -:%s:-\n", name,
(unsigned long)count, tb);
}
@@ -1126,6 +1126,10 @@ static ssize_t pktgen_if_write(struct file *file,
if (node_possible(value)) {
pkt_dev->node = value;
sprintf(pg_result, "OK: node=%d", pkt_dev->node);
+ if (pkt_dev->page) {
+ put_page(pkt_dev->page);
+ pkt_dev->page = NULL;
+ }
}
else
sprintf(pg_result, "ERROR: node not possible");
@@ -1296,7 +1300,7 @@ static ssize_t pktgen_if_write(struct file *file,
buf[len] = 0;
scan_ip6(buf, pkt_dev->in6_daddr.s6_addr);
- fmt_ip6(buf, pkt_dev->in6_daddr.s6_addr);
+ snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->in6_daddr);
ipv6_addr_copy(&pkt_dev->cur_in6_daddr, &pkt_dev->in6_daddr);
@@ -1319,7 +1323,7 @@ static ssize_t pktgen_if_write(struct file *file,
buf[len] = 0;
scan_ip6(buf, pkt_dev->min_in6_daddr.s6_addr);
- fmt_ip6(buf, pkt_dev->min_in6_daddr.s6_addr);
+ snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->min_in6_daddr);
ipv6_addr_copy(&pkt_dev->cur_in6_daddr,
&pkt_dev->min_in6_daddr);
@@ -1342,7 +1346,7 @@ static ssize_t pktgen_if_write(struct file *file,
buf[len] = 0;
scan_ip6(buf, pkt_dev->max_in6_daddr.s6_addr);
- fmt_ip6(buf, pkt_dev->max_in6_daddr.s6_addr);
+ snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->max_in6_daddr);
if (debug)
printk(KERN_DEBUG "pktgen: dst6_max set to: %s\n", buf);
@@ -1363,7 +1367,7 @@ static ssize_t pktgen_if_write(struct file *file,
buf[len] = 0;
scan_ip6(buf, pkt_dev->in6_saddr.s6_addr);
- fmt_ip6(buf, pkt_dev->in6_saddr.s6_addr);
+ snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->in6_saddr);
ipv6_addr_copy(&pkt_dev->cur_in6_saddr, &pkt_dev->in6_saddr);
@@ -1417,11 +1421,6 @@ static ssize_t pktgen_if_write(struct file *file,
return count;
}
if (!strcmp(name, "dst_mac")) {
- char *v = valstr;
- unsigned char old_dmac[ETH_ALEN];
- unsigned char *m = pkt_dev->dst_mac;
- memcpy(old_dmac, pkt_dev->dst_mac, ETH_ALEN);
-
len = strn_len(&user_buffer[i], sizeof(valstr) - 1);
if (len < 0)
return len;
@@ -1429,35 +1428,16 @@ static ssize_t pktgen_if_write(struct file *file,
memset(valstr, 0, sizeof(valstr));
if (copy_from_user(valstr, &user_buffer[i], len))
return -EFAULT;
- i += len;
-
- for (*m = 0; *v && m < pkt_dev->dst_mac + 6; v++) {
- int value;
-
- value = hex_to_bin(*v);
- if (value >= 0)
- *m = *m * 16 + value;
-
- if (*v == ':') {
- m++;
- *m = 0;
- }
- }
+ if (!mac_pton(valstr, pkt_dev->dst_mac))
+ return -EINVAL;
/* Set up Dest MAC */
- if (compare_ether_addr(old_dmac, pkt_dev->dst_mac))
- memcpy(&(pkt_dev->hh[0]), pkt_dev->dst_mac, ETH_ALEN);
+ memcpy(&pkt_dev->hh[0], pkt_dev->dst_mac, ETH_ALEN);
- sprintf(pg_result, "OK: dstmac");
+ sprintf(pg_result, "OK: dstmac %pM", pkt_dev->dst_mac);
return count;
}
if (!strcmp(name, "src_mac")) {
- char *v = valstr;
- unsigned char old_smac[ETH_ALEN];
- unsigned char *m = pkt_dev->src_mac;
-
- memcpy(old_smac, pkt_dev->src_mac, ETH_ALEN);
-
len = strn_len(&user_buffer[i], sizeof(valstr) - 1);
if (len < 0)
return len;
@@ -1465,26 +1445,13 @@ static ssize_t pktgen_if_write(struct file *file,
memset(valstr, 0, sizeof(valstr));
if (copy_from_user(valstr, &user_buffer[i], len))
return -EFAULT;
- i += len;
-
- for (*m = 0; *v && m < pkt_dev->src_mac + 6; v++) {
- int value;
-
- value = hex_to_bin(*v);
- if (value >= 0)
- *m = *m * 16 + value;
-
- if (*v == ':') {
- m++;
- *m = 0;
- }
- }
+ if (!mac_pton(valstr, pkt_dev->src_mac))
+ return -EINVAL;
/* Set up Src MAC */
- if (compare_ether_addr(old_smac, pkt_dev->src_mac))
- memcpy(&(pkt_dev->hh[6]), pkt_dev->src_mac, ETH_ALEN);
+ memcpy(&pkt_dev->hh[6], pkt_dev->src_mac, ETH_ALEN);
- sprintf(pg_result, "OK: srcmac");
+ sprintf(pg_result, "OK: srcmac %pM", pkt_dev->src_mac);
return count;
}
@@ -1710,6 +1677,18 @@ static ssize_t pktgen_if_write(struct file *file,
return count;
}
+ if (!strcmp(name, "skb_priority")) {
+ len = num_arg(&user_buffer[i], 9, &value);
+ if (len < 0)
+ return len;
+
+ i += len;
+ pkt_dev->skb_priority = value;
+ sprintf(pg_result, "OK: skb_priority=%i",
+ pkt_dev->skb_priority);
+ return count;
+ }
+
sprintf(pkt_dev->result, "No such parameter \"%s\"", name);
return -EINVAL;
}
@@ -1764,7 +1743,7 @@ static ssize_t pktgen_thread_write(struct file *file,
{
struct seq_file *seq = file->private_data;
struct pktgen_thread *t = seq->private;
- int i = 0, max, len, ret;
+ int i, max, len, ret;
char name[40];
char *pg_result;
@@ -1773,12 +1752,12 @@ static ssize_t pktgen_thread_write(struct file *file,
return -EINVAL;
}
- max = count - i;
- len = count_trail_chars(&user_buffer[i], max);
+ max = count;
+ len = count_trail_chars(user_buffer, max);
if (len < 0)
return len;
- i += len;
+ i = len;
/* Read variable name */
@@ -1975,7 +1954,7 @@ static struct net_device *pktgen_dev_get_by_name(struct pktgen_dev *pkt_dev,
const char *ifname)
{
char b[IFNAMSIZ+5];
- int i = 0;
+ int i;
for (i = 0; ifname[i] != '@'; i++) {
if (i == IFNAMSIZ)
@@ -2489,7 +2468,6 @@ static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev)
{
struct xfrm_state *x = pkt_dev->flows[pkt_dev->curfl].x;
int err = 0;
- struct iphdr *iph;
if (!x)
return 0;
@@ -2499,7 +2477,6 @@ static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev)
return 0;
spin_lock(&x->lock);
- iph = ip_hdr(skb);
err = x->outer_mode->output(x, skb);
if (err)
@@ -2519,8 +2496,8 @@ static void free_SAs(struct pktgen_dev *pkt_dev)
{
if (pkt_dev->cflows) {
/* let go of the SAs if we have them */
- int i = 0;
- for (; i < pkt_dev->cflows; i++) {
+ int i;
+ for (i = 0; i < pkt_dev->cflows; i++) {
struct xfrm_state *x = pkt_dev->flows[i].x;
if (x) {
xfrm_state_put(x);
@@ -2585,6 +2562,72 @@ static inline __be16 build_tci(unsigned int id, unsigned int cfi,
return htons(id | (cfi << 12) | (prio << 13));
}
+static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
+ int datalen)
+{
+ struct timeval timestamp;
+ struct pktgen_hdr *pgh;
+
+ pgh = (struct pktgen_hdr *)skb_put(skb, sizeof(*pgh));
+ datalen -= sizeof(*pgh);
+
+ if (pkt_dev->nfrags <= 0) {
+ memset(skb_put(skb, datalen), 0, datalen);
+ } else {
+ int frags = pkt_dev->nfrags;
+ int i, len;
+ int frag_len;
+
+
+ if (frags > MAX_SKB_FRAGS)
+ frags = MAX_SKB_FRAGS;
+ len = datalen - frags * PAGE_SIZE;
+ if (len > 0) {
+ memset(skb_put(skb, len), 0, len);
+ datalen = frags * PAGE_SIZE;
+ }
+
+ i = 0;
+ frag_len = (datalen/frags) < PAGE_SIZE ?
+ (datalen/frags) : PAGE_SIZE;
+ while (datalen > 0) {
+ if (unlikely(!pkt_dev->page)) {
+ int node = numa_node_id();
+
+ if (pkt_dev->node >= 0 && (pkt_dev->flags & F_NODE))
+ node = pkt_dev->node;
+ pkt_dev->page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
+ if (!pkt_dev->page)
+ break;
+ }
+ skb_shinfo(skb)->frags[i].page = pkt_dev->page;
+ get_page(pkt_dev->page);
+ skb_shinfo(skb)->frags[i].page_offset = 0;
+ /*last fragment, fill rest of data*/
+ if (i == (frags - 1))
+ skb_shinfo(skb)->frags[i].size =
+ (datalen < PAGE_SIZE ? datalen : PAGE_SIZE);
+ else
+ skb_shinfo(skb)->frags[i].size = frag_len;
+ datalen -= skb_shinfo(skb)->frags[i].size;
+ skb->len += skb_shinfo(skb)->frags[i].size;
+ skb->data_len += skb_shinfo(skb)->frags[i].size;
+ i++;
+ skb_shinfo(skb)->nr_frags = i;
+ }
+ }
+
+ /* Stamp the time, and sequence number,
+ * convert them to network byte order
+ */
+ pgh->pgh_magic = htonl(PKTGEN_MAGIC);
+ pgh->seq_num = htonl(pkt_dev->seq_num);
+
+ do_gettimeofday(&timestamp);
+ pgh->tv_sec = htonl(timestamp.tv_sec);
+ pgh->tv_usec = htonl(timestamp.tv_usec);
+}
+
static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
struct pktgen_dev *pkt_dev)
{
@@ -2593,7 +2636,6 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
struct udphdr *udph;
int datalen, iplen;
struct iphdr *iph;
- struct pktgen_hdr *pgh = NULL;
__be16 protocol = htons(ETH_P_IP);
__be32 *mpls;
__be16 *vlan_tci = NULL; /* Encapsulates priority and VLAN ID */
@@ -2611,8 +2653,8 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
/* Update any of the values, used when we're incrementing various
* fields.
*/
- queue_map = pkt_dev->cur_queue_map;
mod_cur_headers(pkt_dev);
+ queue_map = pkt_dev->cur_queue_map;
datalen = (odev->hard_header_len + 16) & ~0xf;
@@ -2640,6 +2682,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
sprintf(pkt_dev->result, "No memory");
return NULL;
}
+ prefetchw(skb->data);
skb_reserve(skb, datalen);
@@ -2670,6 +2713,8 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
skb->transport_header = skb->network_header + sizeof(struct iphdr);
skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr));
skb_set_queue_mapping(skb, queue_map);
+ skb->priority = pkt_dev->skb_priority;
+
iph = ip_hdr(skb);
udph = udp_hdr(skb);
@@ -2706,76 +2751,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
pkt_dev->pkt_overhead);
skb->dev = odev;
skb->pkt_type = PACKET_HOST;
-
- if (pkt_dev->nfrags <= 0) {
- pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
- memset(pgh + 1, 0, datalen - sizeof(struct pktgen_hdr));
- } else {
- int frags = pkt_dev->nfrags;
- int i, len;
-
- pgh = (struct pktgen_hdr *)(((char *)(udph)) + 8);
-
- if (frags > MAX_SKB_FRAGS)
- frags = MAX_SKB_FRAGS;
- if (datalen > frags * PAGE_SIZE) {
- len = datalen - frags * PAGE_SIZE;
- memset(skb_put(skb, len), 0, len);
- datalen = frags * PAGE_SIZE;
- }
-
- i = 0;
- while (datalen > 0) {
- struct page *page = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0);
- skb_shinfo(skb)->frags[i].page = page;
- skb_shinfo(skb)->frags[i].page_offset = 0;
- skb_shinfo(skb)->frags[i].size =
- (datalen < PAGE_SIZE ? datalen : PAGE_SIZE);
- datalen -= skb_shinfo(skb)->frags[i].size;
- skb->len += skb_shinfo(skb)->frags[i].size;
- skb->data_len += skb_shinfo(skb)->frags[i].size;
- i++;
- skb_shinfo(skb)->nr_frags = i;
- }
-
- while (i < frags) {
- int rem;
-
- if (i == 0)
- break;
-
- rem = skb_shinfo(skb)->frags[i - 1].size / 2;
- if (rem == 0)
- break;
-
- skb_shinfo(skb)->frags[i - 1].size -= rem;
-
- skb_shinfo(skb)->frags[i] =
- skb_shinfo(skb)->frags[i - 1];
- get_page(skb_shinfo(skb)->frags[i].page);
- skb_shinfo(skb)->frags[i].page =
- skb_shinfo(skb)->frags[i - 1].page;
- skb_shinfo(skb)->frags[i].page_offset +=
- skb_shinfo(skb)->frags[i - 1].size;
- skb_shinfo(skb)->frags[i].size = rem;
- i++;
- skb_shinfo(skb)->nr_frags = i;
- }
- }
-
- /* Stamp the time, and sequence number,
- * convert them to network byte order
- */
- if (pgh) {
- struct timeval timestamp;
-
- pgh->pgh_magic = htonl(PKTGEN_MAGIC);
- pgh->seq_num = htonl(pkt_dev->seq_num);
-
- do_gettimeofday(&timestamp);
- pgh->tv_sec = htonl(timestamp.tv_sec);
- pgh->tv_usec = htonl(timestamp.tv_usec);
- }
+ pktgen_finalize_skb(pkt_dev, skb, datalen);
#ifdef CONFIG_XFRM
if (!process_ipsec(pkt_dev, skb, protocol))
@@ -2876,79 +2852,6 @@ static unsigned int scan_ip6(const char *s, char ip[16])
return len;
}
-static char tohex(char hexdigit)
-{
- return hexdigit > 9 ? hexdigit + 'a' - 10 : hexdigit + '0';
-}
-
-static int fmt_xlong(char *s, unsigned int i)
-{
- char *bak = s;
- *s = tohex((i >> 12) & 0xf);
- if (s != bak || *s != '0')
- ++s;
- *s = tohex((i >> 8) & 0xf);
- if (s != bak || *s != '0')
- ++s;
- *s = tohex((i >> 4) & 0xf);
- if (s != bak || *s != '0')
- ++s;
- *s = tohex(i & 0xf);
- return s - bak + 1;
-}
-
-static unsigned int fmt_ip6(char *s, const char ip[16])
-{
- unsigned int len;
- unsigned int i;
- unsigned int temp;
- unsigned int compressing;
- int j;
-
- len = 0;
- compressing = 0;
- for (j = 0; j < 16; j += 2) {
-
-#ifdef V4MAPPEDPREFIX
- if (j == 12 && !memcmp(ip, V4mappedprefix, 12)) {
- inet_ntoa_r(*(struct in_addr *)(ip + 12), s);
- temp = strlen(s);
- return len + temp;
- }
-#endif
- temp = ((unsigned long)(unsigned char)ip[j] << 8) +
- (unsigned long)(unsigned char)ip[j + 1];
- if (temp == 0) {
- if (!compressing) {
- compressing = 1;
- if (j == 0) {
- *s++ = ':';
- ++len;
- }
- }
- } else {
- if (compressing) {
- compressing = 0;
- *s++ = ':';
- ++len;
- }
- i = fmt_xlong(s, temp);
- len += i;
- s += i;
- if (j < 14) {
- *s++ = ':';
- ++len;
- }
- }
- }
- if (compressing) {
- *s++ = ':';
- ++len;
- }
- *s = 0;
- return len;
-}
-
static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
struct pktgen_dev *pkt_dev)
{
@@ -2957,7 +2860,6 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
struct udphdr *udph;
int datalen;
struct ipv6hdr *iph;
- struct pktgen_hdr *pgh = NULL;
__be16 protocol = htons(ETH_P_IPV6);
__be32 *mpls;
__be16 *vlan_tci = NULL; /* Encapsulates priority and VLAN ID */
@@ -2975,8 +2877,8 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
/* Update any of the values, used when we're incrementing various
* fields.
*/
- queue_map = pkt_dev->cur_queue_map;
mod_cur_headers(pkt_dev);
+ queue_map = pkt_dev->cur_queue_map;
skb = __netdev_alloc_skb(odev,
pkt_dev->cur_pkt_size + 64
@@ -2985,6 +2887,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
sprintf(pkt_dev->result, "No memory");
return NULL;
}
+ prefetchw(skb->data);
skb_reserve(skb, 16);
@@ -3015,6 +2918,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr));
skb_set_queue_mapping(skb, queue_map);
+ skb->priority = pkt_dev->skb_priority;
iph = ipv6_hdr(skb);
udph = udp_hdr(skb);
@@ -3058,75 +2962,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
skb->dev = odev;
skb->pkt_type = PACKET_HOST;
- if (pkt_dev->nfrags <= 0)
- pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
- else {
- int frags = pkt_dev->nfrags;
- int i;
-
- pgh = (struct pktgen_hdr *)(((char *)(udph)) + 8);
-
- if (frags > MAX_SKB_FRAGS)
- frags = MAX_SKB_FRAGS;
- if (datalen > frags * PAGE_SIZE) {
- skb_put(skb, datalen - frags * PAGE_SIZE);
- datalen = frags * PAGE_SIZE;
- }
-
- i = 0;
- while (datalen > 0) {
- struct page *page = alloc_pages(GFP_KERNEL, 0);
- skb_shinfo(skb)->frags[i].page = page;
- skb_shinfo(skb)->frags[i].page_offset = 0;
- skb_shinfo(skb)->frags[i].size =
- (datalen < PAGE_SIZE ? datalen : PAGE_SIZE);
- datalen -= skb_shinfo(skb)->frags[i].size;
- skb->len += skb_shinfo(skb)->frags[i].size;
- skb->data_len += skb_shinfo(skb)->frags[i].size;
- i++;
- skb_shinfo(skb)->nr_frags = i;
- }
-
- while (i < frags) {
- int rem;
-
- if (i == 0)
- break;
-
- rem = skb_shinfo(skb)->frags[i - 1].size / 2;
- if (rem == 0)
- break;
-
- skb_shinfo(skb)->frags[i - 1].size -= rem;
-
- skb_shinfo(skb)->frags[i] =
- skb_shinfo(skb)->frags[i - 1];
- get_page(skb_shinfo(skb)->frags[i].page);
- skb_shinfo(skb)->frags[i].page =
- skb_shinfo(skb)->frags[i - 1].page;
- skb_shinfo(skb)->frags[i].page_offset +=
- skb_shinfo(skb)->frags[i - 1].size;
- skb_shinfo(skb)->frags[i].size = rem;
- i++;
- skb_shinfo(skb)->nr_frags = i;
- }
- }
-
- /* Stamp the time, and sequence number,
- * convert them to network byte order
- * should we update cloned packets too ?
- */
- if (pgh) {
- struct timeval timestamp;
-
- pgh->pgh_magic = htonl(PKTGEN_MAGIC);
- pgh->seq_num = htonl(pkt_dev->seq_num);
-
- do_gettimeofday(&timestamp);
- pgh->tv_sec = htonl(timestamp.tv_sec);
- pgh->tv_usec = htonl(timestamp.tv_usec);
- }
- /* pkt_dev->seq_num++; FF: you really mean this? */
+ pktgen_finalize_skb(pkt_dev, skb, datalen);
return skb;
}
@@ -3296,7 +3132,7 @@ static void show_results(struct pktgen_dev *pkt_dev, int nr_frags)
pkt_dev->started_at);
ktime_t idle = ns_to_ktime(pkt_dev->idle_acc);
- p += sprintf(p, "OK: %llu(c%llu+d%llu) nsec, %llu (%dbyte,%dfrags)\n",
+ p += sprintf(p, "OK: %llu(c%llu+d%llu) usec, %llu (%dbyte,%dfrags)\n",
(unsigned long long)ktime_to_us(elapsed),
(unsigned long long)ktime_to_us(ktime_sub(elapsed, idle)),
(unsigned long long)ktime_to_us(idle),
@@ -3430,11 +3266,6 @@ static void pktgen_rem_thread(struct pktgen_thread *t)
remove_proc_entry(t->tsk->comm, pg_proc_dir);
- mutex_lock(&pktgen_thread_lock);
-
- list_del(&t->th_list);
-
- mutex_unlock(&pktgen_thread_lock);
}
static void pktgen_resched(struct pktgen_dev *pkt_dev)
@@ -3509,7 +3340,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
__netif_tx_lock_bh(txq);
- if (unlikely(netif_tx_queue_stopped(txq) || netif_tx_queue_frozen(txq))) {
+ if (unlikely(netif_tx_queue_frozen_or_stopped(txq))) {
ret = NETDEV_TX_BUSY;
pkt_dev->last_ok = 0;
goto unlock;
@@ -3533,8 +3364,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
break;
default: /* Drivers are not supposed to return other values! */
if (net_ratelimit())
- pr_info("pktgen: %s xmit error: %d\n",
- pkt_dev->odevname, ret);
+ pr_info("%s xmit error: %d\n", pkt_dev->odevname, ret);
pkt_dev->errors++;
/* fallthru */
case NETDEV_TX_LOCKED:
@@ -3581,6 +3411,8 @@ static int pktgen_thread_worker(void *arg)
pkt_dev = next_to_run(t);
if (unlikely(!pkt_dev && t->control == 0)) {
+ if (pktgen_exiting)
+ break;
wait_event_interruptible_timeout(t->queue,
t->control != 0,
HZ/10);
@@ -3633,6 +3465,13 @@ static int pktgen_thread_worker(void *arg)
pr_debug("%s removing thread\n", t->tsk->comm);
pktgen_rem_thread(t);
+ /* Wait for kthread_stop */
+ while (!kthread_should_stop()) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule();
+ }
+ __set_current_state(TASK_RUNNING);
+
return 0;
}
@@ -3784,7 +3623,10 @@ static int __init pktgen_create_thread(int cpu)
list_add_tail(&t->th_list, &pktgen_threads);
init_completion(&t->start_done);
- p = kthread_create(pktgen_thread_worker, t, "kpktgend_%d", cpu);
+ p = kthread_create_on_node(pktgen_thread_worker,
+ t,
+ cpu_to_node(cpu),
+ "kpktgend_%d", cpu);
if (IS_ERR(p)) {
pr_err("kernel_thread() failed for cpu %d\n", t->cpu);
list_del(&t->th_list);
@@ -3856,6 +3698,8 @@ static int pktgen_remove_device(struct pktgen_thread *t,
free_SAs(pkt_dev);
#endif
vfree(pkt_dev->flows);
+ if (pkt_dev->page)
+ put_page(pkt_dev->page);
kfree(pkt_dev);
return 0;
}
@@ -3907,6 +3751,7 @@ static void __exit pg_cleanup(void)
struct list_head *q, *n;
/* Stop all interfaces & threads */
+ pktgen_exiting = true;
list_for_each_safe(q, n, &pktgen_threads) {
t = list_entry(q, struct pktgen_thread, th_list);
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 7552495aff7a..182236b2510a 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -33,6 +33,7 @@
* Note : Dont forget somaxconn that may limit backlog too.
*/
int sysctl_max_syn_backlog = 256;
+EXPORT_SYMBOL(sysctl_max_syn_backlog);
int reqsk_queue_alloc(struct request_sock_queue *queue,
unsigned int nr_table_entries)
@@ -45,9 +46,7 @@ int reqsk_queue_alloc(struct request_sock_queue *queue,
nr_table_entries = roundup_pow_of_two(nr_table_entries + 1);
lopt_size += nr_table_entries * sizeof(struct request_sock *);
if (lopt_size > PAGE_SIZE)
- lopt = __vmalloc(lopt_size,
- GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
- PAGE_KERNEL);
+ lopt = vzalloc(lopt_size);
else
lopt = kzalloc(lopt_size, GFP_KERNEL);
if (lopt == NULL)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 8121268ddbdd..d2ba2597c75a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -196,7 +196,7 @@ EXPORT_SYMBOL_GPL(__rtnl_register);
* as failure of this function is very unlikely, it can only happen due
* to lack of memory when allocating the chain to store all message
* handlers for a protocol. Meant for use in init functions where lack
- * of memory implies no sense in continueing.
+ * of memory implies no sense in continuing.
*/
void rtnl_register(int protocol, int msgtype,
rtnl_doit_func doit, rtnl_dumpit_func dumpit)
@@ -347,16 +347,106 @@ static size_t rtnl_link_get_size(const struct net_device *dev)
if (!ops)
return 0;
- size = nlmsg_total_size(sizeof(struct nlattr)) + /* IFLA_LINKINFO */
- nlmsg_total_size(strlen(ops->kind) + 1); /* IFLA_INFO_KIND */
+ size = nla_total_size(sizeof(struct nlattr)) + /* IFLA_LINKINFO */
+ nla_total_size(strlen(ops->kind) + 1); /* IFLA_INFO_KIND */
if (ops->get_size)
/* IFLA_INFO_DATA + nested data */
- size += nlmsg_total_size(sizeof(struct nlattr)) +
+ size += nla_total_size(sizeof(struct nlattr)) +
ops->get_size(dev);
if (ops->get_xstats_size)
- size += ops->get_xstats_size(dev); /* IFLA_INFO_XSTATS */
+ /* IFLA_INFO_XSTATS */
+ size += nla_total_size(ops->get_xstats_size(dev));
+
+ return size;
+}
+
+static LIST_HEAD(rtnl_af_ops);
+
+static const struct rtnl_af_ops *rtnl_af_lookup(const int family)
+{
+ const struct rtnl_af_ops *ops;
+
+ list_for_each_entry(ops, &rtnl_af_ops, list) {
+ if (ops->family == family)
+ return ops;
+ }
+
+ return NULL;
+}
+
+/**
+ * __rtnl_af_register - Register rtnl_af_ops with rtnetlink.
+ * @ops: struct rtnl_af_ops * to register
+ *
+ * The caller must hold the rtnl_mutex.
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int __rtnl_af_register(struct rtnl_af_ops *ops)
+{
+ list_add_tail(&ops->list, &rtnl_af_ops);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(__rtnl_af_register);
+
+/**
+ * rtnl_af_register - Register rtnl_af_ops with rtnetlink.
+ * @ops: struct rtnl_af_ops * to register
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int rtnl_af_register(struct rtnl_af_ops *ops)
+{
+ int err;
+
+ rtnl_lock();
+ err = __rtnl_af_register(ops);
+ rtnl_unlock();
+ return err;
+}
+EXPORT_SYMBOL_GPL(rtnl_af_register);
+
+/**
+ * __rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink.
+ * @ops: struct rtnl_af_ops * to unregister
+ *
+ * The caller must hold the rtnl_mutex.
+ */
+void __rtnl_af_unregister(struct rtnl_af_ops *ops)
+{
+ list_del(&ops->list);
+}
+EXPORT_SYMBOL_GPL(__rtnl_af_unregister);
+
+/**
+ * rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink.
+ * @ops: struct rtnl_af_ops * to unregister
+ */
+void rtnl_af_unregister(struct rtnl_af_ops *ops)
+{
+ rtnl_lock();
+ __rtnl_af_unregister(ops);
+ rtnl_unlock();
+}
+EXPORT_SYMBOL_GPL(rtnl_af_unregister);
+
+static size_t rtnl_link_get_af_size(const struct net_device *dev)
+{
+ struct rtnl_af_ops *af_ops;
+ size_t size;
+
+ /* IFLA_AF_SPEC */
+ size = nla_total_size(sizeof(struct nlattr));
+
+ list_for_each_entry(af_ops, &rtnl_af_ops, list) {
+ if (af_ops->get_link_af_size) {
+ /* AF_* + nested data */
+ size += nla_total_size(sizeof(struct nlattr)) +
+ af_ops->get_link_af_size(dev);
+ }
+ }
return size;
}
@@ -670,7 +760,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev)
+ nla_total_size(4) /* IFLA_NUM_VF */
+ rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */
+ rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
- + rtnl_link_get_size(dev); /* IFLA_LINKINFO */
+ + rtnl_link_get_size(dev) /* IFLA_LINKINFO */
+ + rtnl_link_get_af_size(dev); /* IFLA_AF_SPEC */
}
static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
@@ -756,7 +847,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
struct nlmsghdr *nlh;
struct rtnl_link_stats64 temp;
const struct rtnl_link_stats64 *stats;
- struct nlattr *attr;
+ struct nlattr *attr, *af_spec;
+ struct rtnl_af_ops *af_ops;
nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
if (nlh == NULL)
@@ -776,6 +868,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
netif_running(dev) ? dev->operstate : IF_OPER_DOWN);
NLA_PUT_U8(skb, IFLA_LINKMODE, dev->link_mode);
NLA_PUT_U32(skb, IFLA_MTU, dev->mtu);
+ NLA_PUT_U32(skb, IFLA_GROUP, dev->group);
if (dev->ifindex != dev->iflink)
NLA_PUT_U32(skb, IFLA_LINK, dev->iflink);
@@ -865,6 +958,36 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
goto nla_put_failure;
}
+ if (!(af_spec = nla_nest_start(skb, IFLA_AF_SPEC)))
+ goto nla_put_failure;
+
+ list_for_each_entry(af_ops, &rtnl_af_ops, list) {
+ if (af_ops->fill_link_af) {
+ struct nlattr *af;
+ int err;
+
+ if (!(af = nla_nest_start(skb, af_ops->family)))
+ goto nla_put_failure;
+
+ err = af_ops->fill_link_af(skb, dev);
+
+ /*
+ * Caller may return ENODATA to indicate that there
+ * was no data to be dumped. This is not an error, it
+ * means we should trim the attribute header and
+ * continue.
+ */
+ if (err == -ENODATA)
+ nla_nest_cancel(skb, af);
+ else if (err < 0)
+ goto nla_put_failure;
+
+ nla_nest_end(skb, af);
+ }
+ }
+
+ nla_nest_end(skb, af_spec);
+
return nlmsg_end(skb, nlh);
nla_put_failure:
@@ -884,10 +1007,11 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
s_h = cb->args[0];
s_idx = cb->args[1];
+ rcu_read_lock();
for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
idx = 0;
head = &net->dev_index_head[h];
- hlist_for_each_entry(dev, node, head, index_hlist) {
+ hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
if (idx < s_idx)
goto cont;
if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
@@ -900,6 +1024,7 @@ cont:
}
}
out:
+ rcu_read_unlock();
cb->args[1] = idx;
cb->args[0] = h;
@@ -913,6 +1038,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_MAP] = { .len = sizeof(struct rtnl_link_ifmap) },
[IFLA_MTU] = { .type = NLA_U32 },
[IFLA_LINK] = { .type = NLA_U32 },
+ [IFLA_MASTER] = { .type = NLA_U32 },
[IFLA_TXQLEN] = { .type = NLA_U32 },
[IFLA_WEIGHT] = { .type = NLA_U32 },
[IFLA_OPERSTATE] = { .type = NLA_U8 },
@@ -923,6 +1049,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_VFINFO_LIST] = {. type = NLA_NESTED },
[IFLA_VF_PORTS] = { .type = NLA_NESTED },
[IFLA_PORT_SELF] = { .type = NLA_NESTED },
+ [IFLA_AF_SPEC] = { .type = NLA_NESTED },
};
EXPORT_SYMBOL(ifla_policy);
@@ -984,6 +1111,27 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
return -EINVAL;
}
+ if (tb[IFLA_AF_SPEC]) {
+ struct nlattr *af;
+ int rem, err;
+
+ nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
+ const struct rtnl_af_ops *af_ops;
+
+ if (!(af_ops = rtnl_af_lookup(nla_type(af))))
+ return -EAFNOSUPPORT;
+
+ if (!af_ops->set_link_af)
+ return -EOPNOTSUPP;
+
+ if (af_ops->validate_link_af) {
+ err = af_ops->validate_link_af(dev, af);
+ if (err < 0)
+ return err;
+ }
+ }
+ }
+
return 0;
}
@@ -1033,6 +1181,41 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
return err;
}
+static int do_set_master(struct net_device *dev, int ifindex)
+{
+ struct net_device *master_dev;
+ const struct net_device_ops *ops;
+ int err;
+
+ if (dev->master) {
+ if (dev->master->ifindex == ifindex)
+ return 0;
+ ops = dev->master->netdev_ops;
+ if (ops->ndo_del_slave) {
+ err = ops->ndo_del_slave(dev->master, dev);
+ if (err)
+ return err;
+ } else {
+ return -EOPNOTSUPP;
+ }
+ }
+
+ if (ifindex) {
+ master_dev = __dev_get_by_index(dev_net(dev), ifindex);
+ if (!master_dev)
+ return -EINVAL;
+ ops = master_dev->netdev_ops;
+ if (ops->ndo_add_slave) {
+ err = ops->ndo_add_slave(master_dev, dev);
+ if (err)
+ return err;
+ } else {
+ return -EOPNOTSUPP;
+ }
+ }
+ return 0;
+}
+
static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
struct nlattr **tb, char *ifname, int modified)
{
@@ -1120,6 +1303,11 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
modified = 1;
}
+ if (tb[IFLA_GROUP]) {
+ dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP]));
+ modified = 1;
+ }
+
/*
* Interface selected by interface index but interface
* name provided implies that a name change has been
@@ -1151,6 +1339,13 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
goto errout;
}
+ if (tb[IFLA_MASTER]) {
+ err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]));
+ if (err)
+ goto errout;
+ modified = 1;
+ }
+
if (tb[IFLA_TXQLEN])
dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]);
@@ -1224,12 +1419,30 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
goto errout;
modified = 1;
}
+
+ if (tb[IFLA_AF_SPEC]) {
+ struct nlattr *af;
+ int rem;
+
+ nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
+ const struct rtnl_af_ops *af_ops;
+
+ if (!(af_ops = rtnl_af_lookup(nla_type(af))))
+ BUG();
+
+ err = af_ops->set_link_af(dev, af);
+ if (err < 0)
+ goto errout;
+
+ modified = 1;
+ }
+ }
err = 0;
errout:
if (err < 0 && modified && net_ratelimit())
printk(KERN_WARNING "A link change request failed with "
- "some changes comitted already. Interface %s may "
+ "some changes committed already. Interface %s may "
"have been left with an inconsistent configuration, "
"please check.\n", dev->name);
@@ -1288,6 +1501,7 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
char ifname[IFNAMSIZ];
struct nlattr *tb[IFLA_MAX+1];
int err;
+ LIST_HEAD(list_kill);
err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
if (err < 0)
@@ -1311,7 +1525,9 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
if (!ops)
return -EOPNOTSUPP;
- ops->dellink(dev, NULL);
+ ops->dellink(dev, &list_kill);
+ unregister_netdevice_many(&list_kill);
+ list_del(&list_kill);
return 0;
}
@@ -1359,12 +1575,6 @@ struct net_device *rtnl_create_link(struct net *src_net, struct net *net,
dev->rtnl_link_state = RTNL_LINK_INITIALIZING;
dev->real_num_tx_queues = real_num_queues;
- if (strchr(dev->name, '%')) {
- err = dev_alloc_name(dev, dev->name);
- if (err < 0)
- goto err_free;
- }
-
if (tb[IFLA_MTU])
dev->mtu = nla_get_u32(tb[IFLA_MTU]);
if (tb[IFLA_ADDRESS])
@@ -1379,16 +1589,34 @@ struct net_device *rtnl_create_link(struct net *src_net, struct net *net,
set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
if (tb[IFLA_LINKMODE])
dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
+ if (tb[IFLA_GROUP])
+ dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP]));
return dev;
-err_free:
- free_netdev(dev);
err:
return ERR_PTR(err);
}
EXPORT_SYMBOL(rtnl_create_link);
+static int rtnl_group_changelink(struct net *net, int group,
+ struct ifinfomsg *ifm,
+ struct nlattr **tb)
+{
+ struct net_device *dev;
+ int err;
+
+ for_each_netdev(net, dev) {
+ if (dev->group == group) {
+ err = do_setlink(dev, ifm, tb, NULL, 0);
+ if (err < 0)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
struct net *net = sock_net(skb->sk);
@@ -1416,10 +1644,12 @@ replay:
ifm = nlmsg_data(nlh);
if (ifm->ifi_index > 0)
dev = __dev_get_by_index(net, ifm->ifi_index);
- else if (ifname[0])
- dev = __dev_get_by_name(net, ifname);
- else
- dev = NULL;
+ else {
+ if (ifname[0])
+ dev = __dev_get_by_name(net, ifname);
+ else
+ dev = NULL;
+ }
err = validate_linkmsg(dev, tb);
if (err < 0)
@@ -1483,8 +1713,13 @@ replay:
return do_setlink(dev, ifm, tb, ifname, modified);
}
- if (!(nlh->nlmsg_flags & NLM_F_CREATE))
+ if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
+ if (ifm->ifi_index == 0 && tb[IFLA_GROUP])
+ return rtnl_group_changelink(net,
+ nla_get_u32(tb[IFLA_GROUP]),
+ ifm, tb);
return -ENODEV;
+ }
if (ifm->ifi_index)
return -EOPNOTSUPP;
@@ -1509,6 +1744,9 @@ replay:
snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind);
dest_net = rtnl_link_get_net(net, tb);
+ if (IS_ERR(dest_net))
+ return PTR_ERR(dest_net);
+
dev = rtnl_create_link(net, dest_net, ifname, ops, tb);
if (IS_ERR(dev))
@@ -1638,7 +1876,6 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
int min_len;
int family;
int type;
- int err;
type = nlh->nlmsg_type;
if (type > RTM_MAX)
@@ -1665,11 +1902,8 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
if (dumpit == NULL)
return -EOPNOTSUPP;
- __rtnl_unlock();
rtnl = net->rtnl;
- err = netlink_dump_start(rtnl, skb, nlh, dumpit, NULL);
- rtnl_lock();
- return err;
+ return netlink_dump_start(rtnl, skb, nlh, dumpit, NULL);
}
memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *)));
@@ -1739,7 +1973,7 @@ static int __net_init rtnetlink_net_init(struct net *net)
{
struct sock *sk;
sk = netlink_kernel_create(net, NETLINK_ROUTE, RTNLGRP_MAX,
- rtnetlink_rcv, &rtnl_mutex, THIS_MODULE);
+ rtnetlink_rcv, NULL, THIS_MODULE);
if (!sk)
return -ENOMEM;
net->rtnl = sk;
diff --git a/net/core/scm.c b/net/core/scm.c
index 413cab89017d..4c1ef026d695 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -79,10 +79,11 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
return -ENOMEM;
*fplp = fpl;
fpl->count = 0;
+ fpl->max = SCM_MAX_FD;
}
fpp = &fpl->fp[fpl->count];
- if (fpl->count + num > SCM_MAX_FD)
+ if (fpl->count + num > fpl->max)
return -EINVAL;
/*
@@ -94,7 +95,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
int fd = fdp[i];
struct file *file;
- if (fd < 0 || !(file = fget(fd)))
+ if (fd < 0 || !(file = fget_raw(fd)))
return -EBADF;
*fpp++ = file;
fpl->count++;
@@ -331,11 +332,12 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
if (!fpl)
return NULL;
- new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL);
+ new_fpl = kmemdup(fpl, offsetof(struct scm_fp_list, fp[fpl->count]),
+ GFP_KERNEL);
if (new_fpl) {
- for (i=fpl->count-1; i>=0; i--)
+ for (i = 0; i < fpl->count; i++)
get_file(fpl->fp[i]);
- memcpy(new_fpl, fpl, sizeof(*fpl));
+ new_fpl->max = new_fpl->count;
}
return new_fpl;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 104f8444754a..46cbd28f40f9 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -57,6 +57,7 @@
#include <linux/init.h>
#include <linux/scatterlist.h>
#include <linux/errqueue.h>
+#include <linux/prefetch.h>
#include <net/protocol.h>
#include <net/dst.h>
@@ -210,6 +211,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
shinfo = skb_shinfo(skb);
memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
atomic_set(&shinfo->dataref, 1);
+ kmemcheck_annotate_variable(shinfo->destructor_arg);
if (fclone) {
struct sk_buff *child = skb + 1;
@@ -380,6 +382,8 @@ static void skb_release_head_state(struct sk_buff *skb)
}
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
nf_conntrack_put(skb->nfct);
+#endif
+#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
nf_conntrack_put_reasm(skb->nfct_reasm);
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
@@ -520,7 +524,6 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
new->ip_summed = old->ip_summed;
skb_copy_queue_mapping(new, old);
new->priority = old->priority;
- new->deliver_no_wcard = old->deliver_no_wcard;
#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
new->ipvs_property = old->ipvs_property;
#endif
@@ -778,6 +781,28 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
size = SKB_DATA_ALIGN(size);
+ /* Check if we can avoid taking references on fragments if we own
+ * the last reference on skb->head. (see skb_release_data())
+ */
+ if (!skb->cloned)
+ fastpath = true;
+ else {
+ int delta = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1;
+
+ fastpath = atomic_read(&skb_shinfo(skb)->dataref) == delta;
+ }
+
+ if (fastpath &&
+ size + sizeof(struct skb_shared_info) <= ksize(skb->head)) {
+ memmove(skb->head + size, skb_shinfo(skb),
+ offsetof(struct skb_shared_info,
+ frags[skb_shinfo(skb)->nr_frags]));
+ memmove(skb->head + nhead, skb->head,
+ skb_tail_pointer(skb) - skb->head);
+ off = nhead;
+ goto adjust_others;
+ }
+
data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
if (!data)
goto nodata;
@@ -791,17 +816,6 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
skb_shinfo(skb),
offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags]));
- /* Check if we can avoid taking references on fragments if we own
- * the last reference on skb->head. (see skb_release_data())
- */
- if (!skb->cloned)
- fastpath = true;
- else {
- int delta = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1;
-
- fastpath = atomic_read(&skb_shinfo(skb)->dataref) == delta;
- }
-
if (fastpath) {
kfree(skb->head);
} else {
@@ -816,6 +830,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
off = (data + nhead) - skb->head;
skb->head = data;
+adjust_others:
skb->data += off;
#ifdef NET_SKBUFF_DATA_USES_OFFSET
skb->end = size;
@@ -1812,7 +1827,7 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
long csstart;
if (skb->ip_summed == CHECKSUM_PARTIAL)
- csstart = skb->csum_start - skb_headroom(skb);
+ csstart = skb_checksum_start_offset(skb);
else
csstart = skb_headlen(skb);
@@ -2253,7 +2268,7 @@ EXPORT_SYMBOL(skb_prepare_seq_read);
* of bytes already consumed and the next call to
* skb_seq_read() will return the remaining part of the block.
*
- * Note 1: The size of each block of data returned can be arbitary,
+ * Note 1: The size of each block of data returned can be arbitrary,
* this limitation is the cost for zerocopy seqeuental
* reads of potentially non linear data.
*
@@ -2419,8 +2434,6 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
return -ENOMEM;
/* initialize the next frag */
- sk->sk_sndmsg_page = page;
- sk->sk_sndmsg_off = 0;
skb_fill_page_desc(skb, frg_cnt, page, 0, 0);
skb->truesize += PAGE_SIZE;
atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
@@ -2440,7 +2453,6 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
return -EFAULT;
/* copy was successful so update the size parameters */
- sk->sk_sndmsg_off += copy;
frag->size += copy;
skb->len += copy;
skb->data_len += copy;
@@ -2483,7 +2495,7 @@ EXPORT_SYMBOL_GPL(skb_pull_rcsum);
* a pointer to the first in a list of new skbs for the segments.
* In case of error it returns ERR_PTR(err).
*/
-struct sk_buff *skb_segment(struct sk_buff *skb, int features)
+struct sk_buff *skb_segment(struct sk_buff *skb, u32 features)
{
struct sk_buff *segs = NULL;
struct sk_buff *tail = NULL;
@@ -2493,7 +2505,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
unsigned int offset = doffset;
unsigned int headroom;
unsigned int len;
- int sg = features & NETIF_F_SG;
+ int sg = !!(features & NETIF_F_SG);
int nfrags = skb_shinfo(skb)->nr_frags;
int err = -ENOMEM;
int i = 0;
@@ -2730,8 +2742,12 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
merge:
if (offset > headlen) {
- skbinfo->frags[0].page_offset += offset - headlen;
- skbinfo->frags[0].size -= offset - headlen;
+ unsigned int eat = offset - headlen;
+
+ skbinfo->frags[0].page_offset += eat;
+ skbinfo->frags[0].size -= eat;
+ skb->data_len -= eat;
+ skb->len -= eat;
offset = headlen;
}
@@ -2978,6 +2994,9 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
skb->destructor = sock_rmem_free;
atomic_add(skb->truesize, &sk->sk_rmem_alloc);
+ /* before exiting rcu section, make sure dst is refcounted */
+ skb_dst_force(skb);
+
skb_queue_tail(&sk->sk_error_queue, skb);
if (!sock_flag(sk, SOCK_DEAD))
sk->sk_data_ready(sk, skb->len);
diff --git a/net/core/sock.c b/net/core/sock.c
index 11db43632df8..6e819780c232 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -157,7 +157,7 @@ static const char *const af_family_key_strings[AF_MAX+1] = {
"sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" ,
"sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
"sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" ,
- "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" ,
+ "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" ,
"sk_lock-AF_MAX"
};
static const char *const af_family_slock_key_strings[AF_MAX+1] = {
@@ -173,7 +173,7 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = {
"slock-27" , "slock-28" , "slock-AF_CAN" ,
"slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
"slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" ,
- "slock-AF_IEEE802154", "slock-AF_CAIF" ,
+ "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" ,
"slock-AF_MAX"
};
static const char *const af_family_clock_key_strings[AF_MAX+1] = {
@@ -189,7 +189,7 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = {
"clock-27" , "clock-28" , "clock-AF_CAN" ,
"clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
"clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" ,
- "clock-AF_IEEE802154", "clock-AF_CAIF" ,
+ "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" ,
"clock-AF_MAX"
};
@@ -215,7 +215,7 @@ __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
-/* Maximal space eaten by iovec or ancilliary data plus some space */
+/* Maximal space eaten by iovec or ancillary data plus some space */
int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
EXPORT_SYMBOL(sysctl_optmem_max);
@@ -992,23 +992,54 @@ static inline void sock_lock_init(struct sock *sk)
/*
* Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
* even temporarly, because of RCU lookups. sk_node should also be left as is.
+ * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end
*/
static void sock_copy(struct sock *nsk, const struct sock *osk)
{
#ifdef CONFIG_SECURITY_NETWORK
void *sptr = nsk->sk_security;
#endif
- BUILD_BUG_ON(offsetof(struct sock, sk_copy_start) !=
- sizeof(osk->sk_node) + sizeof(osk->sk_refcnt) +
- sizeof(osk->sk_tx_queue_mapping));
- memcpy(&nsk->sk_copy_start, &osk->sk_copy_start,
- osk->sk_prot->obj_size - offsetof(struct sock, sk_copy_start));
+ memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
+
+ memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
+ osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
+
#ifdef CONFIG_SECURITY_NETWORK
nsk->sk_security = sptr;
security_sk_clone(osk, nsk);
#endif
}
+/*
+ * caches using SLAB_DESTROY_BY_RCU should let .next pointer from nulls nodes
+ * un-modified. Special care is taken when initializing object to zero.
+ */
+static inline void sk_prot_clear_nulls(struct sock *sk, int size)
+{
+ if (offsetof(struct sock, sk_node.next) != 0)
+ memset(sk, 0, offsetof(struct sock, sk_node.next));
+ memset(&sk->sk_node.pprev, 0,
+ size - offsetof(struct sock, sk_node.pprev));
+}
+
+void sk_prot_clear_portaddr_nulls(struct sock *sk, int size)
+{
+ unsigned long nulls1, nulls2;
+
+ nulls1 = offsetof(struct sock, __sk_common.skc_node.next);
+ nulls2 = offsetof(struct sock, __sk_common.skc_portaddr_node.next);
+ if (nulls1 > nulls2)
+ swap(nulls1, nulls2);
+
+ if (nulls1 != 0)
+ memset((char *)sk, 0, nulls1);
+ memset((char *)sk + nulls1 + sizeof(void *), 0,
+ nulls2 - nulls1 - sizeof(void *));
+ memset((char *)sk + nulls2 + sizeof(void *), 0,
+ size - nulls2 - sizeof(void *));
+}
+EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls);
+
static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
int family)
{
@@ -1021,19 +1052,12 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
if (!sk)
return sk;
if (priority & __GFP_ZERO) {
- /*
- * caches using SLAB_DESTROY_BY_RCU should let
- * sk_node.next un-modified. Special care is taken
- * when initializing object to zero.
- */
- if (offsetof(struct sock, sk_node.next) != 0)
- memset(sk, 0, offsetof(struct sock, sk_node.next));
- memset(&sk->sk_node.pprev, 0,
- prot->obj_size - offsetof(struct sock,
- sk_node.pprev));
+ if (prot->clear_sk)
+ prot->clear_sk(sk, prot->obj_size);
+ else
+ sk_prot_clear_nulls(sk, prot->obj_size);
}
- }
- else
+ } else
sk = kmalloc(prot->obj_size, priority);
if (sk != NULL) {
@@ -1151,7 +1175,7 @@ static void __sk_free(struct sock *sk)
void sk_free(struct sock *sk)
{
/*
- * We substract one from sk_wmem_alloc and can know if
+ * We subtract one from sk_wmem_alloc and can know if
* some packets are still in some tx queue.
* If not null, sock_wfree() will call __sk_free(sk) later
*/
@@ -1161,10 +1185,10 @@ void sk_free(struct sock *sk)
EXPORT_SYMBOL(sk_free);
/*
- * Last sock_put should drop referrence to sk->sk_net. It has already
- * been dropped in sk_change_net. Taking referrence to stopping namespace
+ * Last sock_put should drop reference to sk->sk_net. It has already
+ * been dropped in sk_change_net. Taking reference to stopping namespace
* is not an option.
- * Take referrence to a socket to remove it from hash _alive_ and after that
+ * Take reference to a socket to remove it from hash _alive_ and after that
* destroy it in the context of init_net.
*/
void sk_release_kernel(struct sock *sk)
@@ -1225,7 +1249,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
sock_reset_flag(newsk, SOCK_DONE);
skb_queue_head_init(&newsk->sk_error_queue);
- filter = newsk->sk_filter;
+ filter = rcu_dereference_protected(newsk->sk_filter, 1);
if (filter != NULL)
sk_filter_charge(newsk, filter);
@@ -1653,10 +1677,10 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
{
struct proto *prot = sk->sk_prot;
int amt = sk_mem_pages(size);
- int allocated;
+ long allocated;
sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
- allocated = atomic_add_return(amt, prot->memory_allocated);
+ allocated = atomic_long_add_return(amt, prot->memory_allocated);
/* Under limit. */
if (allocated <= prot->sysctl_mem[0]) {
@@ -1714,7 +1738,7 @@ suppress_allocation:
/* Alas. Undo changes. */
sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
- atomic_sub(amt, prot->memory_allocated);
+ atomic_long_sub(amt, prot->memory_allocated);
return 0;
}
EXPORT_SYMBOL(__sk_mem_schedule);
@@ -1727,12 +1751,12 @@ void __sk_mem_reclaim(struct sock *sk)
{
struct proto *prot = sk->sk_prot;
- atomic_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT,
+ atomic_long_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT,
prot->memory_allocated);
sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
if (prot->memory_pressure && *prot->memory_pressure &&
- (atomic_read(prot->memory_allocated) < prot->sysctl_mem[0]))
+ (atomic_long_read(prot->memory_allocated) < prot->sysctl_mem[0]))
*prot->memory_pressure = 0;
}
EXPORT_SYMBOL(__sk_mem_reclaim);
@@ -1884,7 +1908,7 @@ static void sock_def_readable(struct sock *sk, int len)
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
if (wq_has_sleeper(wq))
- wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
+ wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI |
POLLRDNORM | POLLRDBAND);
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
rcu_read_unlock();
@@ -2452,12 +2476,12 @@ static char proto_method_implemented(const void *method)
static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
{
- seq_printf(seq, "%-9s %4u %6d %6d %-3s %6u %-3s %-10s "
+ seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s "
"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
proto->name,
proto->obj_size,
sock_prot_inuse_get(seq_file_net(seq), proto),
- proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
+ proto->memory_allocated != NULL ? atomic_long_read(proto->memory_allocated) : -1L,
proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
proto->max_header,
proto->slab == NULL ? "no" : "yes",
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 01eee5d984be..a829e3f60aeb 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -34,7 +34,8 @@ static int rps_sock_flow_sysctl(ctl_table *table, int write,
mutex_lock(&sock_flow_mutex);
- orig_sock_table = rps_sock_flow_table;
+ orig_sock_table = rcu_dereference_protected(rps_sock_flow_table,
+ lockdep_is_held(&sock_flow_mutex));
size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0;
ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
@@ -121,6 +122,15 @@ static struct ctl_table net_core_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
+#ifdef CONFIG_BPF_JIT
+ {
+ .procname = "bpf_jit_enable",
+ .data = &bpf_jit_enable,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+#endif
{
.procname = "netdev_tstamp_prequeue",
.data = &netdev_tstamp_prequeue,
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
index 0ae6c22da85b..7e7ca375d431 100644
--- a/net/core/timestamping.c
+++ b/net/core/timestamping.c
@@ -26,12 +26,12 @@ static struct sock_filter ptp_filter[] = {
PTP_FILTER
};
-static unsigned int classify(struct sk_buff *skb)
+static unsigned int classify(const struct sk_buff *skb)
{
if (likely(skb->dev &&
skb->dev->phydev &&
skb->dev->phydev->drv))
- return sk_run_filter(skb, ptp_filter, ARRAY_SIZE(ptp_filter));
+ return sk_run_filter(skb, ptp_filter);
else
return PTP_CLASS_NONE;
}
@@ -96,11 +96,13 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb)
struct phy_device *phydev;
unsigned int type;
- skb_push(skb, ETH_HLEN);
+ if (skb_headroom(skb) < ETH_HLEN)
+ return false;
+ __skb_push(skb, ETH_HLEN);
type = classify(skb);
- skb_pull(skb, ETH_HLEN);
+ __skb_pull(skb, ETH_HLEN);
switch (type) {
case PTP_CLASS_V1_IPV4:
diff --git a/net/core/utils.c b/net/core/utils.c
index 5fea0ab21902..2012bc797f9c 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -296,3 +296,27 @@ void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
csum_unfold(*sum)));
}
EXPORT_SYMBOL(inet_proto_csum_replace4);
+
+int mac_pton(const char *s, u8 *mac)
+{
+ int i;
+
+ /* XX:XX:XX:XX:XX:XX */
+ if (strlen(s) < 3 * ETH_ALEN - 1)
+ return 0;
+
+ /* Don't dirty result unless string is valid MAC. */
+ for (i = 0; i < ETH_ALEN; i++) {
+ if (!strchr("0123456789abcdefABCDEF", s[i * 3]))
+ return 0;
+ if (!strchr("0123456789abcdefABCDEF", s[i * 3 + 1]))
+ return 0;
+ if (i != ETH_ALEN - 1 && s[i * 3 + 2] != ':')
+ return 0;
+ }
+ for (i = 0; i < ETH_ALEN; i++) {
+ mac[i] = (hex_to_bin(s[i * 3]) << 4) | hex_to_bin(s[i * 3 + 1]);
+ }
+ return 1;
+}
+EXPORT_SYMBOL(mac_pton);