From 23289a37e2b127dfc4de1313fba15bb4c9f0cd5b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 27 Oct 2009 07:06:36 +0000
Subject: net: add a list_head parameter to dellink() method

Adding a list_head parameter to rtnl_link_ops->dellink() methods
allow us to queue devices on a list, in order to dismantle
them all at once.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/veth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/net/veth.c')

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index ade5b344f75d..ffb502daa916 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -442,7 +442,7 @@ err_register_peer:
 	return err;
 }
 
-static void veth_dellink(struct net_device *dev)
+static void veth_dellink(struct net_device *dev, struct list_head *head)
 {
 	struct veth_priv *priv;
 	struct net_device *peer;
-- 
cgit v1.2.3


From 24540535d33f72505807be3e7ef2e94f3726f971 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 30 Oct 2009 01:00:27 -0700
Subject: veth: Fix veth_dellink method

In commit 23289a37e2b127dfc4de1313fba15bb4c9f0cd5b
(net: add a list_head parameter to dellink() method),
I forgot to actually use this parameter in veth_dellink.

I remember feeling a bit uncomfortable about veth_close(),
because it does :

netif_carrier_off(dev);
netif_carrier_off(priv->peer);

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/veth.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers/net/veth.c')

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index ffb502daa916..9bed694cd215 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -450,8 +450,8 @@ static void veth_dellink(struct net_device *dev, struct list_head *head)
 	priv = netdev_priv(dev);
 	peer = priv->peer;
 
-	unregister_netdevice(dev);
-	unregister_netdevice(peer);
+	unregister_netdevice_queue(dev, head);
+	unregister_netdevice_queue(peer, head);
 }
 
 static const struct nla_policy veth_policy[VETH_INFO_MAX + 1];
-- 
cgit v1.2.3


From 81adee47dfb608df3ad0b91d230fb3cef75f0060 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@aristanetworks.com>
Date: Sun, 8 Nov 2009 00:53:51 -0800
Subject: net: Support specifying the network namespace upon device creation.

There is no good reason to not support userspace specifying the
network namespace during device creation, and it makes it easier
to create a network device and pass it to a child network namespace
with a well known name.

We have to be careful to ensure that the target network namespace
for the new device exists through the life of the call.  To keep
that logic clear I have factored out the network namespace grabbing
logic into rtnl_link_get_net.

In addtion we need to continue to pass the source network namespace
to the rtnl_link_ops.newlink method so that we can find the base
device source network namespace.

Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 drivers/net/can/dev.c    |  2 +-
 drivers/net/macvlan.c    |  4 ++--
 drivers/net/veth.c       | 15 ++++++++++++---
 include/net/rtnetlink.h  |  8 +++++---
 net/8021q/vlan_netlink.c |  4 ++--
 net/core/rtnetlink.c     | 38 +++++++++++++++++++++++++++-----------
 net/ipv4/ip_gre.c        |  2 +-
 7 files changed, 50 insertions(+), 23 deletions(-)

(limited to 'drivers/net/veth.c')

diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index c3db111d2ff5..5fe34d64ca2a 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -674,7 +674,7 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
-static int can_newlink(struct net_device *dev,
+static int can_newlink(struct net *src_net, struct net_device *dev,
 		       struct nlattr *tb[], struct nlattr *data[])
 {
 	return -EOPNOTSUPP;
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 20b7707f38ef..d7dba3f6f763 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -504,7 +504,7 @@ static int macvlan_get_tx_queues(struct net *net,
 	return 0;
 }
 
-static int macvlan_newlink(struct net_device *dev,
+static int macvlan_newlink(struct net *src_net, struct net_device *dev,
 			   struct nlattr *tb[], struct nlattr *data[])
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
@@ -515,7 +515,7 @@ static int macvlan_newlink(struct net_device *dev,
 	if (!tb[IFLA_LINK])
 		return -EINVAL;
 
-	lowerdev = __dev_get_by_index(dev_net(dev), nla_get_u32(tb[IFLA_LINK]));
+	lowerdev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK]));
 	if (lowerdev == NULL)
 		return -ENODEV;
 
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 9bed694cd215..2d657f2314cb 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -340,7 +340,7 @@ static int veth_validate(struct nlattr *tb[], struct nlattr *data[])
 
 static struct rtnl_link_ops veth_link_ops;
 
-static int veth_newlink(struct net_device *dev,
+static int veth_newlink(struct net *src_net, struct net_device *dev,
 			 struct nlattr *tb[], struct nlattr *data[])
 {
 	int err;
@@ -348,6 +348,7 @@ static int veth_newlink(struct net_device *dev,
 	struct veth_priv *priv;
 	char ifname[IFNAMSIZ];
 	struct nlattr *peer_tb[IFLA_MAX + 1], **tbp;
+	struct net *net;
 
 	/*
 	 * create and register peer first
@@ -380,14 +381,22 @@ static int veth_newlink(struct net_device *dev,
 	else
 		snprintf(ifname, IFNAMSIZ, DRV_NAME "%%d");
 
-	peer = rtnl_create_link(dev_net(dev), ifname, &veth_link_ops, tbp);
-	if (IS_ERR(peer))
+	net = rtnl_link_get_net(src_net, tbp);
+	if (IS_ERR(net))
+		return PTR_ERR(net);
+
+	peer = rtnl_create_link(src_net, net, ifname, &veth_link_ops, tbp);
+	if (IS_ERR(peer)) {
+		put_net(net);
 		return PTR_ERR(peer);
+	}
 
 	if (tbp[IFLA_ADDRESS] == NULL)
 		random_ether_addr(peer->dev_addr);
 
 	err = register_netdevice(peer);
+	put_net(net);
+	net = NULL;
 	if (err < 0)
 		goto err_register_peer;
 
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index cd5af1f508f2..48d3efcb0880 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -55,7 +55,8 @@ struct rtnl_link_ops {
 	int			(*validate)(struct nlattr *tb[],
 					    struct nlattr *data[]);
 
-	int			(*newlink)(struct net_device *dev,
+	int			(*newlink)(struct net *src_net,
+					   struct net_device *dev,
 					   struct nlattr *tb[],
 					   struct nlattr *data[]);
 	int			(*changelink)(struct net_device *dev,
@@ -83,8 +84,9 @@ extern void	rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops);
 extern int	rtnl_link_register(struct rtnl_link_ops *ops);
 extern void	rtnl_link_unregister(struct rtnl_link_ops *ops);
 
-extern struct net_device *rtnl_create_link(struct net *net, char *ifname,
-		const struct rtnl_link_ops *ops, struct nlattr *tb[]);
+extern struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]);
+extern struct net_device *rtnl_create_link(struct net *src_net, struct net *net,
+	char *ifname, const struct rtnl_link_ops *ops, struct nlattr *tb[]);
 extern const struct nla_policy ifla_policy[IFLA_MAX+1];
 
 #define MODULE_ALIAS_RTNL_LINK(kind) MODULE_ALIAS("rtnl-link-" kind)
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index a91504850195..3c9cf6a8e7fb 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -119,7 +119,7 @@ static int vlan_get_tx_queues(struct net *net,
 	return 0;
 }
 
-static int vlan_newlink(struct net_device *dev,
+static int vlan_newlink(struct net *src_net, struct net_device *dev,
 			struct nlattr *tb[], struct nlattr *data[])
 {
 	struct vlan_dev_info *vlan = vlan_dev_info(dev);
@@ -131,7 +131,7 @@ static int vlan_newlink(struct net_device *dev,
 
 	if (!tb[IFLA_LINK])
 		return -EINVAL;
-	real_dev = __dev_get_by_index(dev_net(dev), nla_get_u32(tb[IFLA_LINK]));
+	real_dev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK]));
 	if (!real_dev)
 		return -ENODEV;
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index e2f3317f290f..33148a568199 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -733,6 +733,20 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
 	[IFLA_INFO_DATA]	= { .type = NLA_NESTED },
 };
 
+struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
+{
+	struct net *net;
+	/* Examine the link attributes and figure out which
+	 * network namespace we are talking about.
+	 */
+	if (tb[IFLA_NET_NS_PID])
+		net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID]));
+	else
+		net = get_net(src_net);
+	return net;
+}
+EXPORT_SYMBOL(rtnl_link_get_net);
+
 static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
 {
 	if (dev) {
@@ -756,8 +770,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 	int err;
 
 	if (tb[IFLA_NET_NS_PID]) {
-		struct net *net;
-		net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID]));
+		struct net *net = rtnl_link_get_net(dev_net(dev), tb);
 		if (IS_ERR(net)) {
 			err = PTR_ERR(net);
 			goto errout;
@@ -976,8 +989,8 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	return 0;
 }
 
-struct net_device *rtnl_create_link(struct net *net, char *ifname,
-		const struct rtnl_link_ops *ops, struct nlattr *tb[])
+struct net_device *rtnl_create_link(struct net *src_net, struct net *net,
+	char *ifname, const struct rtnl_link_ops *ops, struct nlattr *tb[])
 {
 	int err;
 	struct net_device *dev;
@@ -985,7 +998,7 @@ struct net_device *rtnl_create_link(struct net *net, char *ifname,
 	unsigned int real_num_queues = 1;
 
 	if (ops->get_tx_queues) {
-		err = ops->get_tx_queues(net, tb, &num_queues,
+		err = ops->get_tx_queues(src_net, tb, &num_queues,
 					 &real_num_queues);
 		if (err)
 			goto err;
@@ -995,16 +1008,16 @@ struct net_device *rtnl_create_link(struct net *net, char *ifname,
 	if (!dev)
 		goto err;
 
+	dev_net_set(dev, net);
+	dev->rtnl_link_ops = ops;
 	dev->real_num_tx_queues = real_num_queues;
+
 	if (strchr(dev->name, '%')) {
 		err = dev_alloc_name(dev, dev->name);
 		if (err < 0)
 			goto err_free;
 	}
 
-	dev_net_set(dev, net);
-	dev->rtnl_link_ops = ops;
-
 	if (tb[IFLA_MTU])
 		dev->mtu = nla_get_u32(tb[IFLA_MTU]);
 	if (tb[IFLA_ADDRESS])
@@ -1083,6 +1096,7 @@ replay:
 
 	if (1) {
 		struct nlattr *attr[ops ? ops->maxtype + 1 : 0], **data = NULL;
+		struct net *dest_net;
 
 		if (ops) {
 			if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) {
@@ -1147,17 +1161,19 @@ replay:
 		if (!ifname[0])
 			snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind);
 
-		dev = rtnl_create_link(net, ifname, ops, tb);
+		dest_net = rtnl_link_get_net(net, tb);
+		dev = rtnl_create_link(net, dest_net, ifname, ops, tb);
 
 		if (IS_ERR(dev))
 			err = PTR_ERR(dev);
 		else if (ops->newlink)
-			err = ops->newlink(dev, tb, data);
+			err = ops->newlink(net, dev, tb, data);
 		else
 			err = register_netdevice(dev);
-
 		if (err < 0 && !IS_ERR(dev))
 			free_netdev(dev);
+
+		put_net(dest_net);
 		return err;
 	}
 }
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 71a3242fb7d0..a7de9e3a8f18 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1483,7 +1483,7 @@ static void ipgre_tap_setup(struct net_device *dev)
 	dev->features		|= NETIF_F_NETNS_LOCAL;
 }
 
-static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[],
+static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
 			 struct nlattr *data[])
 {
 	struct ip_tunnel *nt;
-- 
cgit v1.2.3


From 2b1c8b0f925c3f5943cf95d263d72927baae88e7 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 18 Nov 2009 07:09:39 +0000
Subject: veth: Fix veth_get_stats()

veth_get_stats() can be called in parallel on several cpus.

It's better to not reset dev->stats as it could give wrong result on
one cpu. Use temporary variables, then store the final results.

Also, we should loop on every possible cpus, not only online cpus,
or cpu hotplug can suddenly give wrong veth stats.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/veth.c | 35 ++++++++++++++++-------------------
 1 file changed, 16 insertions(+), 19 deletions(-)

(limited to 'drivers/net/veth.c')

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index ade5b344f75d..52af5017c46b 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -210,32 +210,29 @@ rx_drop:
 static struct net_device_stats *veth_get_stats(struct net_device *dev)
 {
 	struct veth_priv *priv;
-	struct net_device_stats *dev_stats;
 	int cpu;
-	struct veth_net_stats *stats;
+	struct veth_net_stats *stats, total = {0};
 
 	priv = netdev_priv(dev);
-	dev_stats = &dev->stats;
-
-	dev_stats->rx_packets = 0;
-	dev_stats->tx_packets = 0;
-	dev_stats->rx_bytes = 0;
-	dev_stats->tx_bytes = 0;
-	dev_stats->tx_dropped = 0;
-	dev_stats->rx_dropped = 0;
 
-	for_each_online_cpu(cpu) {
+	for_each_possible_cpu(cpu) {
 		stats = per_cpu_ptr(priv->stats, cpu);
 
-		dev_stats->rx_packets += stats->rx_packets;
-		dev_stats->tx_packets += stats->tx_packets;
-		dev_stats->rx_bytes += stats->rx_bytes;
-		dev_stats->tx_bytes += stats->tx_bytes;
-		dev_stats->tx_dropped += stats->tx_dropped;
-		dev_stats->rx_dropped += stats->rx_dropped;
+		total.rx_packets += stats->rx_packets;
+		total.tx_packets += stats->tx_packets;
+		total.rx_bytes   += stats->rx_bytes;
+		total.tx_bytes   += stats->tx_bytes;
+		total.tx_dropped += stats->tx_dropped;
+		total.rx_dropped += stats->rx_dropped;
 	}
-
-	return dev_stats;
+	dev->stats.rx_packets = total.rx_packets;
+	dev->stats.tx_packets = total.tx_packets;
+	dev->stats.rx_bytes   = total.rx_bytes;
+	dev->stats.tx_bytes   = total.tx_bytes;
+	dev->stats.tx_dropped = total.tx_dropped;
+	dev->stats.rx_dropped = total.rx_dropped;
+
+	return &dev->stats;
 }
 
 static int veth_open(struct net_device *dev)
-- 
cgit v1.2.3


From 445409602c09219767c06497c0dc2285eac244ed Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 26 Nov 2009 06:07:08 +0000
Subject: veth: move loopback logic to common location

The veth driver contains code to forward an skb
from the start_xmit function of one network
device into the receive path of another device.

Moving that code into a common location lets us
reuse the code for direct forwarding of data
between macvlan ports, and possibly in other
drivers.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/veth.c        | 17 +++--------------
 include/linux/netdevice.h |  2 ++
 net/core/dev.c            | 40 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 45 insertions(+), 14 deletions(-)

(limited to 'drivers/net/veth.c')

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 2d657f2314cb..6c4b5a2d7877 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -155,8 +155,6 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct veth_net_stats *stats, *rcv_stats;
 	int length, cpu;
 
-	skb_orphan(skb);
-
 	priv = netdev_priv(dev);
 	rcv = priv->peer;
 	rcv_priv = netdev_priv(rcv);
@@ -168,20 +166,12 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (!(rcv->flags & IFF_UP))
 		goto tx_drop;
 
-	if (skb->len > (rcv->mtu + MTU_PAD))
-		goto rx_drop;
-
-        skb->tstamp.tv64 = 0;
-	skb->pkt_type = PACKET_HOST;
-	skb->protocol = eth_type_trans(skb, rcv);
 	if (dev->features & NETIF_F_NO_CSUM)
 		skb->ip_summed = rcv_priv->ip_summed;
 
-	skb->mark = 0;
-	secpath_reset(skb);
-	nf_reset(skb);
-
-	length = skb->len;
+	length = skb->len + ETH_HLEN;
+	if (dev_forward_skb(rcv, skb) != NET_RX_SUCCESS)
+		goto rx_drop;
 
 	stats->tx_bytes += length;
 	stats->tx_packets++;
@@ -189,7 +179,6 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
 	rcv_stats->rx_bytes += length;
 	rcv_stats->rx_packets++;
 
-	netif_rx(skb);
 	return NETDEV_TX_OK;
 
 tx_drop:
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 97873e31661c..9428793775a0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1562,6 +1562,8 @@ extern int		dev_set_mac_address(struct net_device *,
 extern int		dev_hard_start_xmit(struct sk_buff *skb,
 					    struct net_device *dev,
 					    struct netdev_queue *txq);
+extern int		dev_forward_skb(struct net_device *dev,
+					struct sk_buff *skb);
 
 extern int		netdev_budget;
 
diff --git a/net/core/dev.c b/net/core/dev.c
index e65af6041415..7775e8b48deb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -105,6 +105,7 @@
 #include <net/dst.h>
 #include <net/pkt_sched.h>
 #include <net/checksum.h>
+#include <net/xfrm.h>
 #include <linux/highmem.h>
 #include <linux/init.h>
 #include <linux/kmod.h>
@@ -1419,6 +1420,45 @@ static inline void net_timestamp(struct sk_buff *skb)
 		skb->tstamp.tv64 = 0;
 }
 
+/**
+ * dev_forward_skb - loopback an skb to another netif
+ *
+ * @dev: destination network device
+ * @skb: buffer to forward
+ *
+ * return values:
+ *	NET_RX_SUCCESS	(no congestion)
+ *	NET_RX_DROP     (packet was dropped)
+ *
+ * dev_forward_skb can be used for injecting an skb from the
+ * start_xmit function of one device into the receive queue
+ * of another device.
+ *
+ * The receiving device may be in another namespace, so
+ * we have to clear all information in the skb that could
+ * impact namespace isolation.
+ */
+int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
+{
+	skb_orphan(skb);
+
+	if (!(dev->flags & IFF_UP))
+		return NET_RX_DROP;
+
+	if (skb->len > (dev->mtu + dev->hard_header_len))
+		return NET_RX_DROP;
+
+	skb_dst_drop(skb);
+	skb->tstamp.tv64 = 0;
+	skb->pkt_type = PACKET_HOST;
+	skb->protocol = eth_type_trans(skb, dev);
+	skb->mark = 0;
+	secpath_reset(skb);
+	nf_reset(skb);
+	return netif_rx(skb);
+}
+EXPORT_SYMBOL_GPL(dev_forward_skb);
+
 /*
  *	Support routine. Sends outgoing frames to any network
  *	taps currently in use.
-- 
cgit v1.2.3