From e8a83e10d7dfe5d0841062780769b30f65417e15 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Sun, 7 Sep 2008 18:41:21 -0700 Subject: pkt_sched: Fix qdisc state in net_tx_action() net_tx_action() can skip __QDISC_STATE_SCHED bit clearing while qdisc is neither ran nor rescheduled, which may cause endless loop in dev_deactivate(). Reported-by: Denys Fedoryshchenko Tested-by: Denys Fedoryshchenko Signed-off-by: Jarek Poplawski Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 60c51f765887..e719ed29310f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1991,8 +1991,13 @@ static void net_tx_action(struct softirq_action *h) spin_unlock(root_lock); } else { if (!test_bit(__QDISC_STATE_DEACTIVATED, - &q->state)) + &q->state)) { __netif_reschedule(q); + } else { + smp_mb__before_clear_bit(); + clear_bit(__QDISC_STATE_SCHED, + &q->state); + } } } } -- cgit v1.2.3 From e2a6b85247aacc52d6ba0d9b37a99b8d1a3e0d83 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 8 Sep 2008 16:10:02 -0700 Subject: net: Enable TSO if supported by at least one device As it stands users of netdev_compute_features (e.g., bridges/bonding) will only enable TSO if all consituent devices support it. This is unnecessarily pessimistic since even on devices that do not support hardware TSO and SG, emulated TSO still performs to a par with TSO off. This patch enables TSO if at least on constituent device supports it in hardware. The direct beneficiaries will be virtualisation that uses bridging since this means that TSO will always be enabled for communication from the host to the guests. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 60c51f765887..abef86ec4cb0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4663,6 +4663,12 @@ int netdev_compute_features(unsigned long all, unsigned long one) one |= NETIF_F_GSO_SOFTWARE; one |= NETIF_F_GSO; + /* + * If even one device supports a GSO protocol with software fallback, + * enable it for all. + */ + all |= one & NETIF_F_GSO_SOFTWARE; + /* If even one device supports robust GSO, enable it for all. */ if (one & NETIF_F_GSO_ROBUST) all |= NETIF_F_GSO_ROBUST; -- cgit v1.2.3 From ad55dcaff0e34269f86975ce2ea0da22e9eb74a1 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Sat, 20 Sep 2008 22:05:50 -0700 Subject: netdev: simple_tx_hash shouldn't hash inside fragments Currently simple_tx_hash is hashing inside of udp fragments. As a result packets are getting getting sent to all queues when they shouldn't be. This causes a serious performance regression which can be seen by sending UDP frames larger than mtu on multiqueue devices. This change will make it so that fragments are hashed only as IP datagrams w/o any protocol information. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- net/core/dev.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index e719ed29310f..e8eb2b478344 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -122,6 +122,7 @@ #include #include #include +#include #include #include #include @@ -1667,7 +1668,7 @@ static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb) { u32 addr1, addr2, ports; u32 hash, ihl; - u8 ip_proto; + u8 ip_proto = 0; if (unlikely(!simple_tx_hashrnd_initialized)) { get_random_bytes(&simple_tx_hashrnd, 4); @@ -1676,7 +1677,8 @@ static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb) switch (skb->protocol) { case __constant_htons(ETH_P_IP): - ip_proto = ip_hdr(skb)->protocol; + if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))) + ip_proto = ip_hdr(skb)->protocol; addr1 = ip_hdr(skb)->saddr; addr2 = ip_hdr(skb)->daddr; ihl = ip_hdr(skb)->ihl; -- cgit v1.2.3 From 6067804047b64dde89f4f133fc7eba48ee44107d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 20 Sep 2008 22:20:49 -0700 Subject: net: Use hton[sl]() instead of __constant_hton[sl]() where applicable Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/8021q/vlan_dev.c | 2 +- net/atm/br2684.c | 8 ++++---- net/core/dev.c | 4 ++-- net/ethernet/eth.c | 2 +- net/ipv4/ipvs/ip_vs_core.c | 4 ++-- net/ipv6/ip6_tunnel.c | 4 ++-- net/mac80211/wme.c | 2 +- net/sched/cls_flow.c | 28 ++++++++++++++-------------- net/sched/sch_dsmark.c | 8 ++++---- net/sched/sch_sfq.c | 4 ++-- net/sunrpc/xprtrdma/rpc_rdma.c | 4 ++-- 11 files changed, 35 insertions(+), 35 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 97688cdb5501..8883e9c8a223 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -48,7 +48,7 @@ static int vlan_dev_rebuild_header(struct sk_buff *skb) switch (veth->h_vlan_encapsulated_proto) { #ifdef CONFIG_INET - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): /* TODO: Confirm this will work with VLAN headers... */ return arp_find(veth->h_dest, skb); diff --git a/net/atm/br2684.c b/net/atm/br2684.c index 8d9a6f158880..280de481edc7 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -375,11 +375,11 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb) if (memcmp (skb->data + 6, ethertype_ipv6, sizeof(ethertype_ipv6)) == 0) - skb->protocol = __constant_htons(ETH_P_IPV6); + skb->protocol = htons(ETH_P_IPV6); else if (memcmp (skb->data + 6, ethertype_ipv4, sizeof(ethertype_ipv4)) == 0) - skb->protocol = __constant_htons(ETH_P_IP); + skb->protocol = htons(ETH_P_IP); else goto error; skb_pull(skb, sizeof(llc_oui_ipv4)); @@ -404,9 +404,9 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb) skb_reset_network_header(skb); iph = ip_hdr(skb); if (iph->version == 4) - skb->protocol = __constant_htons(ETH_P_IP); + skb->protocol = htons(ETH_P_IP); else if (iph->version == 6) - skb->protocol = __constant_htons(ETH_P_IPV6); + skb->protocol = htons(ETH_P_IPV6); else goto error; skb->pkt_type = PACKET_HOST; diff --git a/net/core/dev.c b/net/core/dev.c index f48d1b24f9ce..fdfc4b6a6448 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1675,13 +1675,13 @@ static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb) } switch (skb->protocol) { - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): ip_proto = ip_hdr(skb)->protocol; addr1 = ip_hdr(skb)->saddr; addr2 = ip_hdr(skb)->daddr; ihl = ip_hdr(skb)->ihl; break; - case __constant_htons(ETH_P_IPV6): + case htons(ETH_P_IPV6): ip_proto = ipv6_hdr(skb)->nexthdr; addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3]; addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3]; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index a80839b02e3f..647a9edee375 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -129,7 +129,7 @@ int eth_rebuild_header(struct sk_buff *skb) switch (eth->h_proto) { #ifdef CONFIG_INET - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): return arp_find(eth->h_dest, skb); #endif default: diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c index ece748dbd0cd..958abf3e5f8c 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/ipv4/ipvs/ip_vs_core.c @@ -938,7 +938,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, EnterFunction(11); - af = (skb->protocol == __constant_htons(ETH_P_IP)) ? AF_INET : AF_INET6; + af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6; if (skb->ipvs_property) return NF_ACCEPT; @@ -1258,7 +1258,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, struct ip_vs_conn *cp; int ret, restart, af; - af = (skb->protocol == __constant_htons(ETH_P_IP)) ? AF_INET : AF_INET6; + af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6; ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 17c7b098cdb0..64ce3d33d9c6 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1050,10 +1050,10 @@ ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) } switch (skb->protocol) { - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): ret = ip4ip6_tnl_xmit(skb, dev); break; - case __constant_htons(ETH_P_IPV6): + case htons(ETH_P_IPV6): ret = ip6ip6_tnl_xmit(skb, dev); break; default: diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 6748dedcab50..c703f8b44e92 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -39,7 +39,7 @@ static unsigned int classify_1d(struct sk_buff *skb) return skb->priority - 256; switch (skb->protocol) { - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): dscp = ip_hdr(skb)->tos & 0xfc; break; diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 8f63a1a94014..0ebaff637e31 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -67,9 +67,9 @@ static inline u32 addr_fold(void *addr) static u32 flow_get_src(const struct sk_buff *skb) { switch (skb->protocol) { - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): return ntohl(ip_hdr(skb)->saddr); - case __constant_htons(ETH_P_IPV6): + case htons(ETH_P_IPV6): return ntohl(ipv6_hdr(skb)->saddr.s6_addr32[3]); default: return addr_fold(skb->sk); @@ -79,9 +79,9 @@ static u32 flow_get_src(const struct sk_buff *skb) static u32 flow_get_dst(const struct sk_buff *skb) { switch (skb->protocol) { - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): return ntohl(ip_hdr(skb)->daddr); - case __constant_htons(ETH_P_IPV6): + case htons(ETH_P_IPV6): return ntohl(ipv6_hdr(skb)->daddr.s6_addr32[3]); default: return addr_fold(skb->dst) ^ (__force u16)skb->protocol; @@ -91,9 +91,9 @@ static u32 flow_get_dst(const struct sk_buff *skb) static u32 flow_get_proto(const struct sk_buff *skb) { switch (skb->protocol) { - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): return ip_hdr(skb)->protocol; - case __constant_htons(ETH_P_IPV6): + case htons(ETH_P_IPV6): return ipv6_hdr(skb)->nexthdr; default: return 0; @@ -120,7 +120,7 @@ static u32 flow_get_proto_src(const struct sk_buff *skb) u32 res = 0; switch (skb->protocol) { - case __constant_htons(ETH_P_IP): { + case htons(ETH_P_IP): { struct iphdr *iph = ip_hdr(skb); if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && @@ -128,7 +128,7 @@ static u32 flow_get_proto_src(const struct sk_buff *skb) res = ntohs(*(__be16 *)((void *)iph + iph->ihl * 4)); break; } - case __constant_htons(ETH_P_IPV6): { + case htons(ETH_P_IPV6): { struct ipv6hdr *iph = ipv6_hdr(skb); if (has_ports(iph->nexthdr)) @@ -147,7 +147,7 @@ static u32 flow_get_proto_dst(const struct sk_buff *skb) u32 res = 0; switch (skb->protocol) { - case __constant_htons(ETH_P_IP): { + case htons(ETH_P_IP): { struct iphdr *iph = ip_hdr(skb); if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && @@ -155,7 +155,7 @@ static u32 flow_get_proto_dst(const struct sk_buff *skb) res = ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + 2)); break; } - case __constant_htons(ETH_P_IPV6): { + case htons(ETH_P_IPV6): { struct ipv6hdr *iph = ipv6_hdr(skb); if (has_ports(iph->nexthdr)) @@ -213,9 +213,9 @@ static u32 flow_get_nfct(const struct sk_buff *skb) static u32 flow_get_nfct_src(const struct sk_buff *skb) { switch (skb->protocol) { - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): return ntohl(CTTUPLE(skb, src.u3.ip)); - case __constant_htons(ETH_P_IPV6): + case htons(ETH_P_IPV6): return ntohl(CTTUPLE(skb, src.u3.ip6[3])); } fallback: @@ -225,9 +225,9 @@ fallback: static u32 flow_get_nfct_dst(const struct sk_buff *skb) { switch (skb->protocol) { - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): return ntohl(CTTUPLE(skb, dst.u3.ip)); - case __constant_htons(ETH_P_IPV6): + case htons(ETH_P_IPV6): return ntohl(CTTUPLE(skb, dst.u3.ip6[3])); } fallback: diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index edd1298f85f6..ba43aab3a851 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -202,7 +202,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (p->set_tc_index) { switch (skb->protocol) { - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): if (skb_cow_head(skb, sizeof(struct iphdr))) goto drop; @@ -210,7 +210,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch) & ~INET_ECN_MASK; break; - case __constant_htons(ETH_P_IPV6): + case htons(ETH_P_IPV6): if (skb_cow_head(skb, sizeof(struct ipv6hdr))) goto drop; @@ -289,11 +289,11 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) pr_debug("index %d->%d\n", skb->tc_index, index); switch (skb->protocol) { - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): ipv4_change_dsfield(ip_hdr(skb), p->mask[index], p->value[index]); break; - case __constant_htons(ETH_P_IPV6): + case htons(ETH_P_IPV6): ipv6_change_dsfield(ipv6_hdr(skb), p->mask[index], p->value[index]); break; diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 6e041d10dbdb..fe1508ef0d3d 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -119,7 +119,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) u32 h, h2; switch (skb->protocol) { - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): { const struct iphdr *iph = ip_hdr(skb); h = iph->daddr; @@ -134,7 +134,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) h2 ^= *(((u32*)iph) + iph->ihl); break; } - case __constant_htons(ETH_P_IPV6): + case htons(ETH_P_IPV6): { struct ipv6hdr *iph = ipv6_hdr(skb); h = iph->daddr.s6_addr32[3]; diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index e55427f73dfe..5c1954d28d09 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -769,7 +769,7 @@ repost: /* check for expected message types */ /* The order of some of these tests is important. */ switch (headerp->rm_type) { - case __constant_htonl(RDMA_MSG): + case htonl(RDMA_MSG): /* never expect read chunks */ /* never expect reply chunks (two ways to check) */ /* never expect write chunks without having offered RDMA */ @@ -802,7 +802,7 @@ repost: rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len); break; - case __constant_htonl(RDMA_NOMSG): + case htonl(RDMA_NOMSG): /* never expect read or write chunks, always reply chunks */ if (headerp->rm_body.rm_chunks[0] != xdr_zero || headerp->rm_body.rm_chunks[1] != xdr_zero || -- cgit v1.2.3 From 0b815a1a6d43ab498674b8430c8c35ab08487a16 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 22 Sep 2008 21:28:11 -0700 Subject: net: network device name ifalias support This patch add support for keeping an additional character alias associated with an network interface. This is useful for maintaining the SNMP ifAlias value which is a user defined value. Routers use this to hold information like which circuit or line it is connected to. It is just an arbitrary text label on the network device. There are two exposed interfaces with this patch, the value can be read/written either via netlink or sysfs. This could be maintained just by the snmp daemon, but it is more generally useful for other management tools, and the kernel is good place to act as an agreed upon interface to store it. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/if.h | 1 + include/linux/if_link.h | 1 + include/linux/netdevice.h | 3 +++ net/core/dev.c | 23 +++++++++++++++++++++++ net/core/net-sysfs.c | 36 ++++++++++++++++++++++++++++++++++++ net/core/rtnetlink.c | 13 +++++++++++++ 6 files changed, 77 insertions(+) (limited to 'net/core/dev.c') diff --git a/include/linux/if.h b/include/linux/if.h index 5c9d1fa93fef..65246846c844 100644 --- a/include/linux/if.h +++ b/include/linux/if.h @@ -24,6 +24,7 @@ #include /* for "__user" et al */ #define IFNAMSIZ 16 +#define IFALIASZ 256 #include /* Standard interface flags (netdevice->flags). */ diff --git a/include/linux/if_link.h b/include/linux/if_link.h index 84c3492ae5cb..f9032c88716a 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -79,6 +79,7 @@ enum IFLA_LINKINFO, #define IFLA_LINKINFO IFLA_LINKINFO IFLA_NET_NS_PID, + IFLA_IFALIAS, __IFLA_MAX }; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 488c56e649b5..d675df08b946 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -471,6 +471,8 @@ struct net_device char name[IFNAMSIZ]; /* device name hash chain */ struct hlist_node name_hlist; + /* snmp alias */ + char *ifalias; /* * I/O specific fields @@ -1224,6 +1226,7 @@ extern int dev_ethtool(struct net *net, struct ifreq *); extern unsigned dev_get_flags(const struct net_device *); extern int dev_change_flags(struct net_device *, unsigned); extern int dev_change_name(struct net_device *, char *); +extern int dev_set_alias(struct net_device *, const char *, size_t); extern int dev_change_net_namespace(struct net_device *, struct net *, const char *); extern int dev_set_mtu(struct net_device *, int); diff --git a/net/core/dev.c b/net/core/dev.c index fdfc4b6a6448..e91390533999 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -953,6 +953,29 @@ rollback: return err; } +/** + * dev_set_alias - change ifalias of a device + * @dev: device + * @alias: name up to IFALIASZ + * + * Set ifalias for a device, + */ +int dev_set_alias(struct net_device *dev, const char *alias, size_t len) +{ + ASSERT_RTNL(); + + if (len >= IFALIASZ) + return -EINVAL; + + dev->ifalias = krealloc(dev->ifalias, len+1, GFP_KERNEL); + if (!dev->ifalias) + return -ENOMEM; + + strlcpy(dev->ifalias, alias, len+1); + return len; +} + + /** * netdev_features_change - device changes features * @dev: device to cause notification diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index c1f4e0d428c0..92d6b9467314 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -209,9 +209,44 @@ static ssize_t store_tx_queue_len(struct device *dev, return netdev_store(dev, attr, buf, len, change_tx_queue_len); } +static ssize_t store_ifalias(struct device *dev, struct device_attribute *attr, + const char *buf, size_t len) +{ + struct net_device *netdev = to_net_dev(dev); + size_t count = len; + ssize_t ret; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + /* ignore trailing newline */ + if (len > 0 && buf[len - 1] == '\n') + --count; + + rtnl_lock(); + ret = dev_set_alias(netdev, buf, count); + rtnl_unlock(); + + return ret < 0 ? ret : len; +} + +static ssize_t show_ifalias(struct device *dev, + struct device_attribute *attr, char *buf) +{ + const struct net_device *netdev = to_net_dev(dev); + ssize_t ret = 0; + + rtnl_lock(); + if (netdev->ifalias) + ret = sprintf(buf, "%s\n", netdev->ifalias); + rtnl_unlock(); + return ret; +} + static struct device_attribute net_class_attributes[] = { __ATTR(addr_len, S_IRUGO, show_addr_len, NULL), __ATTR(dev_id, S_IRUGO, show_dev_id, NULL), + __ATTR(ifalias, S_IRUGO | S_IWUSR, show_ifalias, store_ifalias), __ATTR(iflink, S_IRUGO, show_iflink, NULL), __ATTR(ifindex, S_IRUGO, show_ifindex, NULL), __ATTR(features, S_IRUGO, show_features, NULL), @@ -418,6 +453,7 @@ static void netdev_release(struct device *d) BUG_ON(dev->reg_state != NETREG_RELEASED); + kfree(dev->ifalias); kfree((char *)dev - dev->padded); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 71edb8b36341..8862498fd4a6 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -586,6 +586,7 @@ static inline size_t if_nlmsg_size(const struct net_device *dev) { return NLMSG_ALIGN(sizeof(struct ifinfomsg)) + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ + + nla_total_size(IFALIASZ) /* IFLA_IFALIAS */ + nla_total_size(IFNAMSIZ) /* IFLA_QDISC */ + nla_total_size(sizeof(struct rtnl_link_ifmap)) + nla_total_size(sizeof(struct rtnl_link_stats)) @@ -640,6 +641,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (txq->qdisc_sleeping) NLA_PUT_STRING(skb, IFLA_QDISC, txq->qdisc_sleeping->ops->id); + if (dev->ifalias) + NLA_PUT_STRING(skb, IFLA_IFALIAS, dev->ifalias); + if (1) { struct rtnl_link_ifmap map = { .mem_start = dev->mem_start, @@ -713,6 +717,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_LINKMODE] = { .type = NLA_U8 }, [IFLA_LINKINFO] = { .type = NLA_NESTED }, [IFLA_NET_NS_PID] = { .type = NLA_U32 }, + [IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { @@ -853,6 +858,14 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, modified = 1; } + if (tb[IFLA_IFALIAS]) { + err = dev_set_alias(dev, nla_data(tb[IFLA_IFALIAS]), + nla_len(tb[IFLA_IFALIAS])); + if (err < 0) + goto errout; + modified = 1; + } + if (tb[IFLA_BROADCAST]) { nla_memcpy(dev->broadcast, tb[IFLA_BROADCAST], dev->addr_len); send_addr_notify = 1; -- cgit v1.2.3 From 96ca4a2cc1454cf633a1e0796b7ef39d937b87ec Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 23 Sep 2008 21:23:19 -0700 Subject: net: remove ifalias on empty given alias This patch removes the potentially allocated ifalias when the (new) given alias is empty. E.g. when setting echo "" > /sys/class/net/eth0/ifalias Signed-off-by: Oliver Hartkopp Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/dev.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index e91390533999..a90737fe2472 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -967,6 +967,14 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len) if (len >= IFALIASZ) return -EINVAL; + if (!len) { + if (dev->ifalias) { + kfree(dev->ifalias); + dev->ifalias = NULL; + } + return 0; + } + dev->ifalias = krealloc(dev->ifalias, len+1, GFP_KERNEL); if (!dev->ifalias) return -ENOMEM; -- cgit v1.2.3 From cf04a4c764cd3e651a64b3e667bb6a673ead99e1 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 30 Sep 2008 02:22:14 -0700 Subject: netdev: use const for some name functions dev_change_name and netdev_drivername should use const char on parameters that are read-only input values. The strcpy to newname is not needed since newname is not used later in function. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- net/core/dev.c | 9 ++++----- 2 files changed, 6 insertions(+), 7 deletions(-) (limited to 'net/core/dev.c') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d675df08b946..9cfd20be8b7f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1225,7 +1225,7 @@ extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *); extern int dev_ethtool(struct net *net, struct ifreq *); extern unsigned dev_get_flags(const struct net_device *); extern int dev_change_flags(struct net_device *, unsigned); -extern int dev_change_name(struct net_device *, char *); +extern int dev_change_name(struct net_device *, const char *); extern int dev_set_alias(struct net_device *, const char *, size_t); extern int dev_change_net_namespace(struct net_device *, struct net *, const char *); @@ -1670,7 +1670,7 @@ extern void dev_seq_stop(struct seq_file *seq, void *v); extern int netdev_class_create_file(struct class_attribute *class_attr); extern void netdev_class_remove_file(struct class_attribute *class_attr); -extern char *netdev_drivername(struct net_device *dev, char *buffer, int len); +extern char *netdev_drivername(const struct net_device *dev, char *buffer, int len); extern void linkwatch_run_queue(void); diff --git a/net/core/dev.c b/net/core/dev.c index a90737fe2472..64f0d5b7cdfc 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -890,7 +890,7 @@ int dev_alloc_name(struct net_device *dev, const char *name) * Change name of a device, can pass format strings "eth%d". * for wildcarding. */ -int dev_change_name(struct net_device *dev, char *newname) +int dev_change_name(struct net_device *dev, const char *newname) { char oldname[IFNAMSIZ]; int err = 0; @@ -916,7 +916,6 @@ int dev_change_name(struct net_device *dev, char *newname) err = dev_alloc_name(dev, newname); if (err < 0) return err; - strcpy(newname, dev->name); } else if (__dev_get_by_name(net, newname)) return -EEXIST; @@ -4754,10 +4753,10 @@ err_name: return -ENOMEM; } -char *netdev_drivername(struct net_device *dev, char *buffer, int len) +char *netdev_drivername(const struct net_device *dev, char *buffer, int len) { - struct device_driver *driver; - struct device *parent; + const struct device_driver *driver; + const struct device *parent; if (len <= 0 || !buffer) return buffer; -- cgit v1.2.3 From f0db275a81ef184293ca7ef3646fe065b336efb7 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 30 Sep 2008 02:23:58 -0700 Subject: netdev: docbook comment update (revised) Add more docbook comments to network device functions and cleanup the comments. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/dev.c | 46 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 64f0d5b7cdfc..2cc258b7acce 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -956,6 +956,7 @@ rollback: * dev_set_alias - change ifalias of a device * @dev: device * @alias: name up to IFALIASZ + * @len: limit of bytes to copy from info * * Set ifalias for a device, */ @@ -3330,6 +3331,12 @@ static void dev_addr_discard(struct net_device *dev) netif_addr_unlock_bh(dev); } +/** + * dev_get_flags - get flags reported to userspace + * @dev: device + * + * Get the combination of flag bits exported through APIs to userspace. + */ unsigned dev_get_flags(const struct net_device *dev) { unsigned flags; @@ -3354,6 +3361,14 @@ unsigned dev_get_flags(const struct net_device *dev) return flags; } +/** + * dev_change_flags - change device settings + * @dev: device + * @flags: device state flags + * + * Change settings on device based state flags. The flags are + * in the userspace exported format. + */ int dev_change_flags(struct net_device *dev, unsigned flags) { int ret, changes; @@ -3423,6 +3438,13 @@ int dev_change_flags(struct net_device *dev, unsigned flags) return ret; } +/** + * dev_set_mtu - Change maximum transfer unit + * @dev: device + * @new_mtu: new transfer unit + * + * Change the maximum transfer size of the network device. + */ int dev_set_mtu(struct net_device *dev, int new_mtu) { int err; @@ -3447,6 +3469,13 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) return err; } +/** + * dev_set_mac_address - Change Media Access Control Address + * @dev: device + * @sa: new address + * + * Change the hardware (MAC) address of the device + */ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) { int err; @@ -4350,7 +4379,12 @@ void free_netdev(struct net_device *dev) put_device(&dev->dev); } -/* Synchronize with packet receive processing. */ +/** + * synchronize_net - Synchronize with packet receive processing + * + * Wait for packets currently being received to be done. + * Does not block later packets from starting. + */ void synchronize_net(void) { might_sleep(); @@ -4652,7 +4686,7 @@ netdev_dma_event(struct dma_client *client, struct dma_chan *chan, } /** - * netdev_dma_regiser - register the networking subsystem as a DMA client + * netdev_dma_register - register the networking subsystem as a DMA client */ static int __init netdev_dma_register(void) { @@ -4753,6 +4787,14 @@ err_name: return -ENOMEM; } +/** + * netdev_drivername - network driver for the device + * @dev: network device + * @buffer: buffer for resulting name + * @len: size of buffer + * + * Determine network driver for device. + */ char *netdev_drivername(const struct net_device *dev, char *buffer, int len) { const struct device_driver *driver; -- cgit v1.2.3 From b6c40d68ff6498b7f63ddf97cf0aa818d748dee7 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 7 Oct 2008 15:26:48 -0700 Subject: net: only invoke dev->change_rx_flags when device is UP Jesper Dangaard Brouer reported a bug when setting a VLAN device down that is in promiscous mode: When the VLAN device is set down, the promiscous count on the real device is decremented by one by vlan_dev_stop(). When removing the promiscous flag from the VLAN device afterwards, the promiscous count on the real device is decremented a second time by the vlan_change_rx_flags() callback. The root cause for this is that the ->change_rx_flags() callback is invoked while the device is down. The synchronization is meant to mirror the behaviour of the ->set_rx_mode callbacks, meaning the ->open function is responsible for doing a full sync on open, the ->close() function is responsible for doing full cleanup on ->stop() and ->change_rx_flags() is meant to do incremental changes while the device is UP. Only invoke ->change_rx_flags() while the device is UP to provide the intended behaviour. Tested-by: Jesper Dangaard Brouer Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/core/dev.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index e8eb2b478344..fd992c0f2717 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2918,6 +2918,12 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) return 0; } +static void dev_change_rx_flags(struct net_device *dev, int flags) +{ + if (dev->flags & IFF_UP && dev->change_rx_flags) + dev->change_rx_flags(dev, flags); +} + static int __dev_set_promiscuity(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; @@ -2955,8 +2961,7 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc) current->uid, current->gid, audit_get_sessionid(current)); - if (dev->change_rx_flags) - dev->change_rx_flags(dev, IFF_PROMISC); + dev_change_rx_flags(dev, IFF_PROMISC); } return 0; } @@ -3022,8 +3027,7 @@ int dev_set_allmulti(struct net_device *dev, int inc) } } if (dev->flags ^ old_flags) { - if (dev->change_rx_flags) - dev->change_rx_flags(dev, IFF_ALLMULTI); + dev_change_rx_flags(dev, IFF_ALLMULTI); dev_set_rx_mode(dev); } return 0; @@ -3347,8 +3351,8 @@ int dev_change_flags(struct net_device *dev, unsigned flags) * Load in the correct multicast list now the flags have changed. */ - if (dev->change_rx_flags && (old_flags ^ flags) & IFF_MULTICAST) - dev->change_rx_flags(dev, IFF_MULTICAST); + if ((old_flags ^ flags) & IFF_MULTICAST) + dev_change_rx_flags(dev, IFF_MULTICAST); dev_set_rx_mode(dev); -- cgit v1.2.3 From 58ec3b4db9eb5a28e3aec5f407a54e28f7039c19 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 7 Oct 2008 15:50:03 -0700 Subject: net: Fix netdev_run_todo dead-lock Benjamin Thery tracked down a bug that explains many instances of the error unregister_netdevice: waiting for %s to become free. Usage count = %d It turns out that netdev_run_todo can dead-lock with itself if a second instance of it is run in a thread that will then free a reference to the device waited on by the first instance. The problem is really quite silly. We were trying to create parallelism where none was required. As netdev_run_todo always follows a RTNL section, and that todo tasks can only be added with the RTNL held, by definition you should only need to wait for the very ones that you've added and be done with it. There is no need for a second mutex or spinlock. This is exactly what the following patch does. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 27 ++++++--------------------- net/core/rtnetlink.c | 2 +- 2 files changed, 7 insertions(+), 22 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index fd992c0f2717..0ae08d3f57e7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3812,14 +3812,11 @@ static int dev_new_index(struct net *net) } /* Delayed registration/unregisteration */ -static DEFINE_SPINLOCK(net_todo_list_lock); static LIST_HEAD(net_todo_list); static void net_set_todo(struct net_device *dev) { - spin_lock(&net_todo_list_lock); list_add_tail(&dev->todo_list, &net_todo_list); - spin_unlock(&net_todo_list_lock); } static void rollback_registered(struct net_device *dev) @@ -4146,33 +4143,24 @@ static void netdev_wait_allrefs(struct net_device *dev) * free_netdev(y1); * free_netdev(y2); * - * We are invoked by rtnl_unlock() after it drops the semaphore. + * We are invoked by rtnl_unlock(). * This allows us to deal with problems: * 1) We can delete sysfs objects which invoke hotplug * without deadlocking with linkwatch via keventd. * 2) Since we run with the RTNL semaphore not held, we can sleep * safely in order to wait for the netdev refcnt to drop to zero. + * + * We must not return until all unregister events added during + * the interval the lock was held have been completed. */ -static DEFINE_MUTEX(net_todo_run_mutex); void netdev_run_todo(void) { struct list_head list; - /* Need to guard against multiple cpu's getting out of order. */ - mutex_lock(&net_todo_run_mutex); - - /* Not safe to do outside the semaphore. We must not return - * until all unregister events invoked by the local processor - * have been completed (either by this todo run, or one on - * another cpu). - */ - if (list_empty(&net_todo_list)) - goto out; - /* Snapshot list, allow later requests */ - spin_lock(&net_todo_list_lock); list_replace_init(&net_todo_list, &list); - spin_unlock(&net_todo_list_lock); + + __rtnl_unlock(); while (!list_empty(&list)) { struct net_device *dev @@ -4204,9 +4192,6 @@ void netdev_run_todo(void) /* Free network device */ kobject_put(&dev->dev.kobj); } - -out: - mutex_unlock(&net_todo_run_mutex); } static struct net_device_stats *internal_stats(struct net_device *dev) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 71edb8b36341..d6381c2a4693 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -73,7 +73,7 @@ void __rtnl_unlock(void) void rtnl_unlock(void) { - mutex_unlock(&rtnl_mutex); + /* This fellow will unlock it for us. */ netdev_run_todo(); } -- cgit v1.2.3