From 5bc1421e34ecfe0bd4b26dc3232b7d5e25179144 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Tue, 22 Nov 2011 05:10:51 +0000 Subject: net: add network priority cgroup infrastructure (v4) This patch adds in the infrastructure code to create the network priority cgroup. The cgroup, in addition to the standard processes file creates two control files: 1) prioidx - This is a read-only file that exports the index of this cgroup. This is a value that is both arbitrary and unique to a cgroup in this subsystem, and is used to index the per-device priority map 2) priomap - This is a writeable file. On read it reports a table of 2-tuples where name is the name of a network interface and priority is indicates the priority assigned to frames egresessing on the named interface and originating from a pid in this cgroup This cgroup allows for skb priority to be set prior to a root qdisc getting selected. This is benenficial for DCB enabled systems, in that it allows for any application to use dcb configured priorities so without application modification Signed-off-by: Neil Horman Signed-off-by: John Fastabend CC: Robert Love CC: "David S. Miller" Signed-off-by: David S. Miller --- net/core/netprio_cgroup.c | 344 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 344 insertions(+) create mode 100644 net/core/netprio_cgroup.c (limited to 'net/core/netprio_cgroup.c') diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c new file mode 100644 index 000000000000..72ad0bc6841e --- /dev/null +++ b/net/core/netprio_cgroup.c @@ -0,0 +1,344 @@ +/* + * net/core/netprio_cgroup.c Priority Control Group + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Neil Horman + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss, + struct cgroup *cgrp); +static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp); +static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp); + +struct cgroup_subsys net_prio_subsys = { + .name = "net_prio", + .create = cgrp_create, + .destroy = cgrp_destroy, + .populate = cgrp_populate, +#ifdef CONFIG_NETPRIO_CGROUP + .subsys_id = net_prio_subsys_id, +#endif + .module = THIS_MODULE +}; + +#define PRIOIDX_SZ 128 + +static unsigned long prioidx_map[PRIOIDX_SZ]; +static DEFINE_SPINLOCK(prioidx_map_lock); +static atomic_t max_prioidx = ATOMIC_INIT(0); + +static inline struct cgroup_netprio_state *cgrp_netprio_state(struct cgroup *cgrp) +{ + return container_of(cgroup_subsys_state(cgrp, net_prio_subsys_id), + struct cgroup_netprio_state, css); +} + +static int get_prioidx(u32 *prio) +{ + unsigned long flags; + u32 prioidx; + + spin_lock_irqsave(&prioidx_map_lock, flags); + prioidx = find_first_zero_bit(prioidx_map, sizeof(unsigned long) * PRIOIDX_SZ); + set_bit(prioidx, prioidx_map); + spin_unlock_irqrestore(&prioidx_map_lock, flags); + if (prioidx == sizeof(unsigned long) * PRIOIDX_SZ) + return -ENOSPC; + + atomic_set(&max_prioidx, prioidx); + *prio = prioidx; + return 0; +} + +static void put_prioidx(u32 idx) +{ + unsigned long flags; + + spin_lock_irqsave(&prioidx_map_lock, flags); + clear_bit(idx, prioidx_map); + spin_unlock_irqrestore(&prioidx_map_lock, flags); +} + +static void extend_netdev_table(struct net_device *dev, u32 new_len) +{ + size_t new_size = sizeof(struct netprio_map) + + ((sizeof(u32) * new_len)); + struct netprio_map *new_priomap = kzalloc(new_size, GFP_KERNEL); + struct netprio_map *old_priomap; + int i; + + old_priomap = rtnl_dereference(dev->priomap); + + if (!new_priomap) { + printk(KERN_WARNING "Unable to alloc new priomap!\n"); + return; + } + + for (i = 0; + old_priomap && (i < old_priomap->priomap_len); + i++) + new_priomap->priomap[i] = old_priomap->priomap[i]; + + new_priomap->priomap_len = new_len; + + rcu_assign_pointer(dev->priomap, new_priomap); + if (old_priomap) + kfree_rcu(old_priomap, rcu); +} + +static void update_netdev_tables(void) +{ + struct net_device *dev; + u32 max_len = atomic_read(&max_prioidx); + struct netprio_map *map; + + rtnl_lock(); + for_each_netdev(&init_net, dev) { + map = rtnl_dereference(dev->priomap); + if ((!map) || + (map->priomap_len < max_len)) + extend_netdev_table(dev, max_len); + } + rtnl_unlock(); +} + +static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss, + struct cgroup *cgrp) +{ + struct cgroup_netprio_state *cs; + int ret; + + cs = kzalloc(sizeof(*cs), GFP_KERNEL); + if (!cs) + return ERR_PTR(-ENOMEM); + + if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx) { + kfree(cs); + return ERR_PTR(-EINVAL); + } + + ret = get_prioidx(&cs->prioidx); + if (ret != 0) { + printk(KERN_WARNING "No space in priority index array\n"); + kfree(cs); + return ERR_PTR(ret); + } + + return &cs->css; +} + +static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) +{ + struct cgroup_netprio_state *cs; + struct net_device *dev; + struct netprio_map *map; + + cs = cgrp_netprio_state(cgrp); + rtnl_lock(); + for_each_netdev(&init_net, dev) { + map = rtnl_dereference(dev->priomap); + if (map) + map->priomap[cs->prioidx] = 0; + } + rtnl_unlock(); + put_prioidx(cs->prioidx); + kfree(cs); +} + +static u64 read_prioidx(struct cgroup *cgrp, struct cftype *cft) +{ + return (u64)cgrp_netprio_state(cgrp)->prioidx; +} + +static int read_priomap(struct cgroup *cont, struct cftype *cft, + struct cgroup_map_cb *cb) +{ + struct net_device *dev; + u32 prioidx = cgrp_netprio_state(cont)->prioidx; + u32 priority; + struct netprio_map *map; + + rcu_read_lock(); + for_each_netdev_rcu(&init_net, dev) { + map = rcu_dereference(dev->priomap); + priority = map ? map->priomap[prioidx] : 0; + cb->fill(cb, dev->name, priority); + } + rcu_read_unlock(); + return 0; +} + +static int write_priomap(struct cgroup *cgrp, struct cftype *cft, + const char *buffer) +{ + char *devname = kstrdup(buffer, GFP_KERNEL); + int ret = -EINVAL; + u32 prioidx = cgrp_netprio_state(cgrp)->prioidx; + unsigned long priority; + char *priostr; + struct net_device *dev; + struct netprio_map *map; + + if (!devname) + return -ENOMEM; + + /* + * Minimally sized valid priomap string + */ + if (strlen(devname) < 3) + goto out_free_devname; + + priostr = strstr(devname, " "); + if (!priostr) + goto out_free_devname; + + /* + *Separate the devname from the associated priority + *and advance the priostr poitner to the priority value + */ + *priostr = '\0'; + priostr++; + + /* + * If the priostr points to NULL, we're at the end of the passed + * in string, and its not a valid write + */ + if (*priostr == '\0') + goto out_free_devname; + + ret = kstrtoul(priostr, 10, &priority); + if (ret < 0) + goto out_free_devname; + + ret = -ENODEV; + + dev = dev_get_by_name(&init_net, devname); + if (!dev) + goto out_free_devname; + + update_netdev_tables(); + ret = 0; + rcu_read_lock(); + map = rcu_dereference(dev->priomap); + if (map) + map->priomap[prioidx] = priority; + rcu_read_unlock(); + dev_put(dev); + +out_free_devname: + kfree(devname); + return ret; +} + +static struct cftype ss_files[] = { + { + .name = "prioidx", + .read_u64 = read_prioidx, + }, + { + .name = "ifpriomap", + .read_map = read_priomap, + .write_string = write_priomap, + }, +}; + +static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) +{ + return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files)); +} + +static int netprio_device_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = ptr; + struct netprio_map *old; + u32 max_len = atomic_read(&max_prioidx); + + /* + * Note this is called with rtnl_lock held so we have update side + * protection on our rcu assignments + */ + + switch (event) { + + case NETDEV_REGISTER: + if (max_len) + extend_netdev_table(dev, max_len); + break; + case NETDEV_UNREGISTER: + old = rtnl_dereference(dev->priomap); + rcu_assign_pointer(dev->priomap, NULL); + if (old) + kfree_rcu(old, rcu); + break; + } + return NOTIFY_DONE; +} + +static struct notifier_block netprio_device_notifier = { + .notifier_call = netprio_device_event +}; + +static int __init init_cgroup_netprio(void) +{ + int ret; + + ret = cgroup_load_subsys(&net_prio_subsys); + if (ret) + goto out; +#ifndef CONFIG_NETPRIO_CGROUP + smp_wmb(); + net_prio_subsys_id = net_prio_subsys.subsys_id; +#endif + + register_netdevice_notifier(&netprio_device_notifier); + +out: + return ret; +} + +static void __exit exit_cgroup_netprio(void) +{ + struct netprio_map *old; + struct net_device *dev; + + unregister_netdevice_notifier(&netprio_device_notifier); + + cgroup_unload_subsys(&net_prio_subsys); + +#ifndef CONFIG_NETPRIO_CGROUP + net_prio_subsys_id = -1; + synchronize_rcu(); +#endif + + rtnl_lock(); + for_each_netdev(&init_net, dev) { + old = rtnl_dereference(dev->priomap); + rcu_assign_pointer(dev->priomap, NULL); + if (old) + kfree_rcu(old, rcu); + } + rtnl_unlock(); +} + +module_init(init_cgroup_netprio); +module_exit(exit_cgroup_netprio); +MODULE_LICENSE("GPL v2"); -- cgit v1.2.3 From 2cfa5a0471fef43fda0b7bd87e3a5e4dbadb7809 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 23 Nov 2011 07:09:32 +0000 Subject: net: treewide use of RCU_INIT_POINTER rcu_assign_pointer(ptr, NULL) can be safely replaced by RCU_INIT_POINTER(ptr, NULL) (old rcu_assign_pointer() macro was testing the NULL value and could omit the smp_wmb(), but this had to be removed because of compiler warnings) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2.c | 2 +- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 2 +- drivers/net/ethernet/broadcom/cnic.c | 6 +++--- drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c | 4 ++-- drivers/net/macvtap.c | 8 ++++---- drivers/net/ppp/pptp.c | 2 +- drivers/net/team/team_mode_activebackup.c | 2 +- drivers/net/wireless/ath/carl9170/main.c | 12 ++++++------ net/core/netprio_cgroup.c | 4 ++-- 9 files changed, 21 insertions(+), 21 deletions(-) (limited to 'net/core/netprio_cgroup.c') diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index 83d8cefba8c0..d573169279b7 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -409,7 +409,7 @@ static int bnx2_unregister_cnic(struct net_device *dev) mutex_lock(&bp->cnic_lock); cp->drv_state = 0; bnapi->cnic_present = 0; - rcu_assign_pointer(bp->cnic_ops, NULL); + RCU_INIT_POINTER(bp->cnic_ops, NULL); mutex_unlock(&bp->cnic_lock); synchronize_rcu(); return 0; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 83481e20f144..0cdbb70ef83e 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -11587,7 +11587,7 @@ static int bnx2x_unregister_cnic(struct net_device *dev) mutex_lock(&bp->cnic_mutex); cp->drv_state = 0; - rcu_assign_pointer(bp->cnic_ops, NULL); + RCU_INIT_POINTER(bp->cnic_ops, NULL); mutex_unlock(&bp->cnic_mutex); synchronize_rcu(); kfree(bp->cnic_kwq); diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c index 099f41d99ec0..b336e55e0d80 100644 --- a/drivers/net/ethernet/broadcom/cnic.c +++ b/drivers/net/ethernet/broadcom/cnic.c @@ -506,7 +506,7 @@ int cnic_unregister_driver(int ulp_type) } read_unlock(&cnic_dev_lock); - rcu_assign_pointer(cnic_ulp_tbl[ulp_type], NULL); + RCU_INIT_POINTER(cnic_ulp_tbl[ulp_type], NULL); mutex_unlock(&cnic_lock); synchronize_rcu(); @@ -579,7 +579,7 @@ static int cnic_unregister_device(struct cnic_dev *dev, int ulp_type) } mutex_lock(&cnic_lock); if (rcu_dereference(cp->ulp_ops[ulp_type])) { - rcu_assign_pointer(cp->ulp_ops[ulp_type], NULL); + RCU_INIT_POINTER(cp->ulp_ops[ulp_type], NULL); cnic_put(dev); } else { pr_err("%s: device not registered to this ulp type %d\n", @@ -5134,7 +5134,7 @@ static void cnic_stop_hw(struct cnic_dev *dev) } cnic_shutdown_rings(dev); clear_bit(CNIC_F_CNIC_UP, &dev->flags); - rcu_assign_pointer(cp->ulp_ops[CNIC_ULP_L4], NULL); + RCU_INIT_POINTER(cp->ulp_ops[CNIC_ULP_L4], NULL); synchronize_rcu(); cnic_cm_shutdown(dev); cp->stop_hw(dev); diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c index 90ff1318cc05..7f7882d24bc6 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c @@ -1301,7 +1301,7 @@ int cxgb3_offload_activate(struct adapter *adapter) out_free_l2t: t3_free_l2t(L2DATA(dev)); - rcu_assign_pointer(dev->l2opt, NULL); + RCU_INIT_POINTER(dev->l2opt, NULL); out_free: kfree(t); return err; @@ -1329,7 +1329,7 @@ void cxgb3_offload_deactivate(struct adapter *adapter) rcu_read_lock(); d = L2DATA(tdev); rcu_read_unlock(); - rcu_assign_pointer(tdev->l2opt, NULL); + RCU_INIT_POINTER(tdev->l2opt, NULL); call_rcu(&d->rcu_head, clean_l2_data); if (t->nofail_skb) kfree_skb(t->nofail_skb); diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 1b7082d08f33..7c88d136e723 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -145,8 +145,8 @@ static void macvtap_put_queue(struct macvtap_queue *q) if (vlan) { int index = get_slot(vlan, q); - rcu_assign_pointer(vlan->taps[index], NULL); - rcu_assign_pointer(q->vlan, NULL); + RCU_INIT_POINTER(vlan->taps[index], NULL); + RCU_INIT_POINTER(q->vlan, NULL); sock_put(&q->sk); --vlan->numvtaps; } @@ -223,8 +223,8 @@ static void macvtap_del_queues(struct net_device *dev) lockdep_is_held(&macvtap_lock)); if (q) { qlist[j++] = q; - rcu_assign_pointer(vlan->taps[i], NULL); - rcu_assign_pointer(q->vlan, NULL); + RCU_INIT_POINTER(vlan->taps[i], NULL); + RCU_INIT_POINTER(q->vlan, NULL); vlan->numvtaps--; } } diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index 89f829f5f725..ede899ca0ee6 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -162,7 +162,7 @@ static void del_chan(struct pppox_sock *sock) { spin_lock(&chan_lock); clear_bit(sock->proto.pptp.src_addr.call_id, callid_bitmap); - rcu_assign_pointer(callid_sock[sock->proto.pptp.src_addr.call_id], NULL); + RCU_INIT_POINTER(callid_sock[sock->proto.pptp.src_addr.call_id], NULL); spin_unlock(&chan_lock); synchronize_rcu(); } diff --git a/drivers/net/team/team_mode_activebackup.c b/drivers/net/team/team_mode_activebackup.c index b34427502b54..f4d960e82e29 100644 --- a/drivers/net/team/team_mode_activebackup.c +++ b/drivers/net/team/team_mode_activebackup.c @@ -56,7 +56,7 @@ drop: static void ab_port_leave(struct team *team, struct team_port *port) { if (ab_priv(team)->active_port == port) - rcu_assign_pointer(ab_priv(team)->active_port, NULL); + RCU_INIT_POINTER(ab_priv(team)->active_port, NULL); } static int ab_active_port_get(struct team *team, void *arg) diff --git a/drivers/net/wireless/ath/carl9170/main.c b/drivers/net/wireless/ath/carl9170/main.c index f06e0695d412..551859214ee9 100644 --- a/drivers/net/wireless/ath/carl9170/main.c +++ b/drivers/net/wireless/ath/carl9170/main.c @@ -446,7 +446,7 @@ static void carl9170_op_stop(struct ieee80211_hw *hw) mutex_lock(&ar->mutex); if (IS_ACCEPTING_CMD(ar)) { - rcu_assign_pointer(ar->beacon_iter, NULL); + RCU_INIT_POINTER(ar->beacon_iter, NULL); carl9170_led_set_state(ar, 0); @@ -678,7 +678,7 @@ unlock: vif_priv->active = false; bitmap_release_region(&ar->vif_bitmap, vif_id, 0); ar->vifs--; - rcu_assign_pointer(ar->vif_priv[vif_id].vif, NULL); + RCU_INIT_POINTER(ar->vif_priv[vif_id].vif, NULL); list_del_rcu(&vif_priv->list); mutex_unlock(&ar->mutex); synchronize_rcu(); @@ -716,7 +716,7 @@ static void carl9170_op_remove_interface(struct ieee80211_hw *hw, WARN_ON(vif_priv->enable_beacon); vif_priv->enable_beacon = false; list_del_rcu(&vif_priv->list); - rcu_assign_pointer(ar->vif_priv[id].vif, NULL); + RCU_INIT_POINTER(ar->vif_priv[id].vif, NULL); if (vif == main_vif) { rcu_read_unlock(); @@ -1258,7 +1258,7 @@ static int carl9170_op_sta_add(struct ieee80211_hw *hw, } for (i = 0; i < CARL9170_NUM_TID; i++) - rcu_assign_pointer(sta_info->agg[i], NULL); + RCU_INIT_POINTER(sta_info->agg[i], NULL); sta_info->ampdu_max_len = 1 << (3 + sta->ht_cap.ampdu_factor); sta_info->ht_sta = true; @@ -1285,7 +1285,7 @@ static int carl9170_op_sta_remove(struct ieee80211_hw *hw, struct carl9170_sta_tid *tid_info; tid_info = rcu_dereference(sta_info->agg[i]); - rcu_assign_pointer(sta_info->agg[i], NULL); + RCU_INIT_POINTER(sta_info->agg[i], NULL); if (!tid_info) continue; @@ -1398,7 +1398,7 @@ static int carl9170_op_ampdu_action(struct ieee80211_hw *hw, spin_unlock_bh(&ar->tx_ampdu_list_lock); } - rcu_assign_pointer(sta_info->agg[tid], NULL); + RCU_INIT_POINTER(sta_info->agg[tid], NULL); rcu_read_unlock(); ieee80211_stop_tx_ba_cb_irqsafe(vif, sta->addr, tid); diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 72ad0bc6841e..3a9fd4826b75 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -285,7 +285,7 @@ static int netprio_device_event(struct notifier_block *unused, break; case NETDEV_UNREGISTER: old = rtnl_dereference(dev->priomap); - rcu_assign_pointer(dev->priomap, NULL); + RCU_INIT_POINTER(dev->priomap, NULL); if (old) kfree_rcu(old, rcu); break; @@ -332,7 +332,7 @@ static void __exit exit_cgroup_netprio(void) rtnl_lock(); for_each_netdev(&init_net, dev) { old = rtnl_dereference(dev->priomap); - rcu_assign_pointer(dev->priomap, NULL); + RCU_INIT_POINTER(dev->priomap, NULL); if (old) kfree_rcu(old, rcu); } -- cgit v1.2.3 From 865d9f9f748fdc1943679ea65d9ee1dc55e4a6ae Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 7 Dec 2011 19:17:17 +0000 Subject: net: netprio_cgroup: make net_prio_subsys static net_prio_subsys can be made static this removes the sparse warning it was throwing. Signed-off-by: John Fastabend Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/core/netprio_cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/netprio_cgroup.c') diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 3a9fd4826b75..ea16c8faf20c 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -28,7 +28,7 @@ static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss, static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp); static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp); -struct cgroup_subsys net_prio_subsys = { +static struct cgroup_subsys net_prio_subsys = { .name = "net_prio", .create = cgrp_create, .destroy = cgrp_destroy, -- cgit v1.2.3 From 0221cd51543972782af558c527e4ac58b32049fa Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 9 Dec 2011 13:39:27 -0500 Subject: Revert "net: netprio_cgroup: make net_prio_subsys static" This reverts commit 865d9f9f748fdc1943679ea65d9ee1dc55e4a6ae. This commit breaks the build with CONFIG_NETPRIO_CGROUP=y so revert it. It does build as a module though. The SUBSYS macro in the cgroup core code automatically defines a subsys structure as extern. Long term we should fix the macro. And I need to fully build test things. Tested with CONFIG_NETPRIO_CGROUP={y|m|n} with and without CONFIG_CGROUPS defined. Signed-off-by: John Fastabend CC: Neil Horman Reported-By: Eric Dumazet Signed-off-by: David S. Miller --- net/core/netprio_cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/netprio_cgroup.c') diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index ea16c8faf20c..3a9fd4826b75 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -28,7 +28,7 @@ static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss, static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp); static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp); -static struct cgroup_subsys net_prio_subsys = { +struct cgroup_subsys net_prio_subsys = { .name = "net_prio", .create = cgrp_create, .destroy = cgrp_destroy, -- cgit v1.2.3 From 5962b35c1de3254a2f03b95efd3b7854b874d7b7 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Fri, 3 Feb 2012 05:18:43 +0000 Subject: netprio_cgroup: Fix obo in get_prioidx It was recently pointed out to me that the get_prioidx function sets a bit in the prioidx map prior to checking to see if the index being set is out of bounds. This patch corrects that, avoiding the possiblity of us writing beyond the end of the array Signed-off-by: Neil Horman Reported-by: Stanislaw Gruszka CC: Stanislaw Gruszka CC: "David S. Miller" Signed-off-by: David S. Miller --- net/core/netprio_cgroup.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net/core/netprio_cgroup.c') diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 3a9fd4826b75..9ae183a9a381 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -58,11 +58,12 @@ static int get_prioidx(u32 *prio) spin_lock_irqsave(&prioidx_map_lock, flags); prioidx = find_first_zero_bit(prioidx_map, sizeof(unsigned long) * PRIOIDX_SZ); + if (prioidx == sizeof(unsigned long) * PRIOIDX_SZ) { + spin_unlock_irqrestore(&prioidx_map_lock, flags); + return -ENOSPC; + } set_bit(prioidx, prioidx_map); spin_unlock_irqrestore(&prioidx_map_lock, flags); - if (prioidx == sizeof(unsigned long) * PRIOIDX_SZ) - return -ENOSPC; - atomic_set(&max_prioidx, prioidx); *prio = prioidx; return 0; -- cgit v1.2.3 From a87dfe14a78501c931a4d5481efff6a809aa907d Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Fri, 10 Feb 2012 05:43:36 +0000 Subject: netprio_cgroup: fix an off-by-one bug # mount -t cgroup xxx /mnt # mkdir /mnt/tmp # cat /mnt/tmp/net_prio.ifpriomap lo 0 eth0 0 virbr0 0 # echo 'lo 999' > /mnt/tmp/net_prio.ifpriomap # cat /mnt/tmp/net_prio.ifpriomap lo 999 eth0 0 virbr0 4101267344 We got weired output, because we exceeded the boundary of the array. We may even crash the kernel.. Origionally-authored-by: Li Zefan Signed-off-by: Li Zefan Signed-off-by: Neil Horman CC: "David S. Miller" Signed-off-by: David S. Miller --- net/core/netprio_cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/netprio_cgroup.c') diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 9ae183a9a381..72c638780805 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -108,7 +108,7 @@ static void extend_netdev_table(struct net_device *dev, u32 new_len) static void update_netdev_tables(void) { struct net_device *dev; - u32 max_len = atomic_read(&max_prioidx); + u32 max_len = atomic_read(&max_prioidx) + 1; struct netprio_map *map; rtnl_lock(); -- cgit v1.2.3 From f5c38208d32412d72b97a4f0d44af0eb39feb20b Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Fri, 10 Feb 2012 05:43:37 +0000 Subject: netprio_cgroup: don't allocate prio table when a device is registered So we delay the allocation till the priority is set through cgroup, and this makes skb_update_priority() faster when it's not set. This also eliminates an off-by-one bug similar with the one fixed in the previous patch. Origionally-authored-by: Li Zefan Signed-off-by: Li Zefan Signed-off-by: Neil Horman CC: "David S. Miller" Signed-off-by: David S. Miller --- net/core/netprio_cgroup.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'net/core/netprio_cgroup.c') diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 72c638780805..4dacc44637ef 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -271,7 +271,6 @@ static int netprio_device_event(struct notifier_block *unused, { struct net_device *dev = ptr; struct netprio_map *old; - u32 max_len = atomic_read(&max_prioidx); /* * Note this is called with rtnl_lock held so we have update side @@ -279,11 +278,6 @@ static int netprio_device_event(struct notifier_block *unused, */ switch (event) { - - case NETDEV_REGISTER: - if (max_len) - extend_netdev_table(dev, max_len); - break; case NETDEV_UNREGISTER: old = rtnl_dereference(dev->priomap); RCU_INIT_POINTER(dev->priomap, NULL); -- cgit v1.2.3