From b761c9b1f4f69eb53fb6147547a1ab25237a93b3 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Wed, 4 Jul 2012 23:28:40 +0000 Subject: cgroup: fix panic in netprio_cgroup we set max_prioidx to the first zero bit index of prioidx_map in function get_prioidx. So when we delete the low index netprio cgroup and adding a new netprio cgroup again,the max_prioidx will be set to the low index. when we set the high index cgroup's net_prio.ifpriomap,the function write_priomap will call update_netdev_tables to alloc memory which size is sizeof(struct netprio_map) + sizeof(u32) * (max_prioidx + 1), so the size of array that map->priomap point to is max_prioidx +1, which is low than what we actually need. fix this by adding check in get_prioidx,only set max_prioidx when max_prioidx low than the new prioidx. Signed-off-by: Gao feng Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/core/netprio_cgroup.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/core/netprio_cgroup.c') diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 5b8aa2fae48b..aa907ed466ea 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -49,8 +49,9 @@ static int get_prioidx(u32 *prio) return -ENOSPC; } set_bit(prioidx, prioidx_map); + if (atomic_read(&max_prioidx) < prioidx) + atomic_set(&max_prioidx, prioidx); spin_unlock_irqrestore(&prioidx_map_lock, flags); - atomic_set(&max_prioidx, prioidx); *prio = prioidx; return 0; } -- cgit v1.2.3 From 91c68ce2b26319248a32d7baa1226f819d283758 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 8 Jul 2012 21:45:10 +0000 Subject: net: cgroup: fix out of bounds accesses dev->priomap is allocated by extend_netdev_table() called from update_netdev_tables(). And this is only called if write_priomap() is called. But if write_priomap() is not called, it seems we can have out of bounds accesses in cgrp_destroy(), read_priomap() & skb_update_prio() With help from Gao Feng Signed-off-by: Eric Dumazet Cc: Neil Horman Cc: Gao feng Acked-by: Gao feng Signed-off-by: David S. Miller --- net/core/dev.c | 8 ++++++-- net/core/netprio_cgroup.c | 4 ++-- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'net/core/netprio_cgroup.c') diff --git a/net/core/dev.c b/net/core/dev.c index 84f01ba81a34..0f28a9e0b8ad 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2444,8 +2444,12 @@ static void skb_update_prio(struct sk_buff *skb) { struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap); - if ((!skb->priority) && (skb->sk) && map) - skb->priority = map->priomap[skb->sk->sk_cgrp_prioidx]; + if (!skb->priority && skb->sk && map) { + unsigned int prioidx = skb->sk->sk_cgrp_prioidx; + + if (prioidx < map->priomap_len) + skb->priority = map->priomap[prioidx]; + } } #else #define skb_update_prio(skb) diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index aa907ed466ea..3e953eaddbfc 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -142,7 +142,7 @@ static void cgrp_destroy(struct cgroup *cgrp) rtnl_lock(); for_each_netdev(&init_net, dev) { map = rtnl_dereference(dev->priomap); - if (map) + if (map && cs->prioidx < map->priomap_len) map->priomap[cs->prioidx] = 0; } rtnl_unlock(); @@ -166,7 +166,7 @@ static int read_priomap(struct cgroup *cont, struct cftype *cft, rcu_read_lock(); for_each_netdev_rcu(&init_net, dev) { map = rcu_dereference(dev->priomap); - priority = map ? map->priomap[prioidx] : 0; + priority = (map && prioidx < map->priomap_len) ? map->priomap[prioidx] : 0; cb->fill(cb, dev->name, priority); } rcu_read_unlock(); -- cgit v1.2.3 From 0f307323a48e47f064aa38e87f6fa03c88b551fc Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Sat, 7 Jul 2012 10:51:56 +0800 Subject: netprio_cgroup.c: fix comment typo poitner -> pointer. Signed-off-by: Liu Bo Signed-off-by: Jiri Kosina --- net/core/netprio_cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/netprio_cgroup.c') diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 5b8aa2fae48b..e13ad4946565 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -198,7 +198,7 @@ static int write_priomap(struct cgroup *cgrp, struct cftype *cft, /* *Separate the devname from the associated priority - *and advance the priostr poitner to the priority value + *and advance the priostr pointer to the priority value */ *priostr = '\0'; priostr++; -- cgit v1.2.3 From ef209f15980360f6945873df3cd710c5f62f2a3e Mon Sep 17 00:00:00 2001 From: Gao feng Date: Wed, 11 Jul 2012 21:50:15 +0000 Subject: net: cgroup: fix access the unallocated memory in netprio cgroup there are some out of bound accesses in netprio cgroup. now before accessing the dev->priomap.priomap array,we only check if the dev->priomap exist.and because we don't want to see additional bound checkings in fast path, so we should make sure that dev->priomap is null or array size of dev->priomap.priomap is equal to max_prioidx + 1; so in write_priomap logic,we should call extend_netdev_table when dev->priomap is null and dev->priomap.priomap_len < max_len. and in cgrp_create->update_netdev_tables logic,we should call extend_netdev_table only when dev->priomap exist and dev->priomap.priomap_len < max_len. and it's not needed to call update_netdev_tables in write_priomap, we can only allocate the net device's priomap which we change through net_prio.ifpriomap. this patch also add a return value for update_netdev_tables & extend_netdev_table, so when new_priomap is allocated failed, write_priomap will stop to access the priomap,and return -ENOMEM back to the userspace to tell the user what happend. Change From v3: 1. add rtnl protect when reading max_prioidx in write_priomap. 2. only call extend_netdev_table when map->priomap_len < max_len, this will make sure array size of dev->map->priomap always bigger than any prioidx. 3. add a function write_update_netdev_table to make codes clear. Change From v2: 1. protect extend_netdev_table by RTNL. 2. when extend_netdev_table failed,call dev_put to reduce device's refcount. Signed-off-by: Gao feng Cc: Neil Horman Cc: Eric Dumazet Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/core/netprio_cgroup.c | 71 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 54 insertions(+), 17 deletions(-) (limited to 'net/core/netprio_cgroup.c') diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 3e953eaddbfc..b2e9caa1ad1a 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -65,7 +65,7 @@ static void put_prioidx(u32 idx) spin_unlock_irqrestore(&prioidx_map_lock, flags); } -static void extend_netdev_table(struct net_device *dev, u32 new_len) +static int extend_netdev_table(struct net_device *dev, u32 new_len) { size_t new_size = sizeof(struct netprio_map) + ((sizeof(u32) * new_len)); @@ -77,7 +77,7 @@ static void extend_netdev_table(struct net_device *dev, u32 new_len) if (!new_priomap) { pr_warn("Unable to alloc new priomap!\n"); - return; + return -ENOMEM; } for (i = 0; @@ -90,46 +90,79 @@ static void extend_netdev_table(struct net_device *dev, u32 new_len) rcu_assign_pointer(dev->priomap, new_priomap); if (old_priomap) kfree_rcu(old_priomap, rcu); + return 0; } -static void update_netdev_tables(void) +static int write_update_netdev_table(struct net_device *dev) { + int ret = 0; + u32 max_len; + struct netprio_map *map; + + rtnl_lock(); + max_len = atomic_read(&max_prioidx) + 1; + map = rtnl_dereference(dev->priomap); + if (!map || map->priomap_len < max_len) + ret = extend_netdev_table(dev, max_len); + rtnl_unlock(); + + return ret; +} + +static int update_netdev_tables(void) +{ + int ret = 0; struct net_device *dev; - u32 max_len = atomic_read(&max_prioidx) + 1; + u32 max_len; struct netprio_map *map; rtnl_lock(); + max_len = atomic_read(&max_prioidx) + 1; for_each_netdev(&init_net, dev) { map = rtnl_dereference(dev->priomap); - if ((!map) || - (map->priomap_len < max_len)) - extend_netdev_table(dev, max_len); + /* + * don't allocate priomap if we didn't + * change net_prio.ifpriomap (map == NULL), + * this will speed up skb_update_prio. + */ + if (map && map->priomap_len < max_len) { + ret = extend_netdev_table(dev, max_len); + if (ret < 0) + break; + } } rtnl_unlock(); + return ret; } static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp) { struct cgroup_netprio_state *cs; - int ret; + int ret = -EINVAL; cs = kzalloc(sizeof(*cs), GFP_KERNEL); if (!cs) return ERR_PTR(-ENOMEM); - if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx) { - kfree(cs); - return ERR_PTR(-EINVAL); - } + if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx) + goto out; ret = get_prioidx(&cs->prioidx); - if (ret != 0) { + if (ret < 0) { pr_warn("No space in priority index array\n"); - kfree(cs); - return ERR_PTR(ret); + goto out; + } + + ret = update_netdev_tables(); + if (ret < 0) { + put_prioidx(cs->prioidx); + goto out; } return &cs->css; +out: + kfree(cs); + return ERR_PTR(ret); } static void cgrp_destroy(struct cgroup *cgrp) @@ -221,13 +254,17 @@ static int write_priomap(struct cgroup *cgrp, struct cftype *cft, if (!dev) goto out_free_devname; - update_netdev_tables(); - ret = 0; + ret = write_update_netdev_table(dev); + if (ret < 0) + goto out_put_dev; + rcu_read_lock(); map = rcu_dereference(dev->priomap); if (map) map->priomap[prioidx] = priority; rcu_read_unlock(); + +out_put_dev: dev_put(dev); out_free_devname: -- cgit v1.2.3 From 406a3c638ce8b17d9704052c07955490f732c2b8 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 20 Jul 2012 10:39:25 +0000 Subject: net: netprio_cgroup: rework update socket logic Instead of updating the sk_cgrp_prioidx struct field on every send this only updates the field when a task is moved via cgroup infrastructure. This allows sockets that may be used by a kernel worker thread to be managed. For example in the iscsi case today a user can put iscsid in a netprio cgroup and control traffic will be sent with the correct sk_cgrp_prioidx value set but as soon as data is sent the kernel worker thread isssues a send and sk_cgrp_prioidx is updated with the kernel worker threads value which is the default case. It seems more correct to only update the field when the user explicitly sets it via control group infrastructure. This allows the users to manage sockets that may be used with other threads. Signed-off-by: John Fastabend Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/linux/net.h | 1 + include/net/netprio_cgroup.h | 4 ++-- net/core/netprio_cgroup.c | 53 ++++++++++++++++++++++++++++++++++++++++++++ net/core/sock.c | 6 ++--- net/socket.c | 5 ++--- 5 files changed, 61 insertions(+), 8 deletions(-) (limited to 'net/core/netprio_cgroup.c') diff --git a/include/linux/net.h b/include/linux/net.h index dc95700de5df..99276c3dc89a 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -248,6 +248,7 @@ extern int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags); extern int sock_map_fd(struct socket *sock, int flags); extern struct socket *sockfd_lookup(int fd, int *err); +extern struct socket *sock_from_file(struct file *file, int *err); #define sockfd_put(sock) fput(sock->file) extern int net_ratelimit(void); diff --git a/include/net/netprio_cgroup.h b/include/net/netprio_cgroup.h index d58fdec47597..2719dec6b5a8 100644 --- a/include/net/netprio_cgroup.h +++ b/include/net/netprio_cgroup.h @@ -35,7 +35,7 @@ struct cgroup_netprio_state { extern int net_prio_subsys_id; #endif -extern void sock_update_netprioidx(struct sock *sk); +extern void sock_update_netprioidx(struct sock *sk, struct task_struct *task); #if IS_BUILTIN(CONFIG_NETPRIO_CGROUP) @@ -82,7 +82,7 @@ static inline u32 task_netprioidx(struct task_struct *p) #endif /* CONFIG_NETPRIO_CGROUP */ #else -#define sock_update_netprioidx(sk) +#define sock_update_netprioidx(sk, task) #endif #endif /* _NET_CLS_CGROUP_H */ diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index b2e9caa1ad1a..63d15e8f80e9 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -25,6 +25,8 @@ #include #include +#include + #define PRIOIDX_SZ 128 static unsigned long prioidx_map[PRIOIDX_SZ]; @@ -272,6 +274,56 @@ out_free_devname: return ret; } +void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) +{ + struct task_struct *p; + char *tmp = kzalloc(sizeof(char) * PATH_MAX, GFP_KERNEL); + + if (!tmp) { + pr_warn("Unable to attach cgrp due to alloc failure!\n"); + return; + } + + cgroup_taskset_for_each(p, cgrp, tset) { + unsigned int fd; + struct fdtable *fdt; + struct files_struct *files; + + task_lock(p); + files = p->files; + if (!files) { + task_unlock(p); + continue; + } + + rcu_read_lock(); + fdt = files_fdtable(files); + for (fd = 0; fd < fdt->max_fds; fd++) { + char *path; + struct file *file; + struct socket *sock; + unsigned long s; + int rv, err = 0; + + file = fcheck_files(files, fd); + if (!file) + continue; + + path = d_path(&file->f_path, tmp, PAGE_SIZE); + rv = sscanf(path, "socket:[%lu]", &s); + if (rv <= 0) + continue; + + sock = sock_from_file(file, &err); + if (!err) + sock_update_netprioidx(sock->sk, p); + } + rcu_read_unlock(); + task_unlock(p); + } + kfree(tmp); +} + static struct cftype ss_files[] = { { .name = "prioidx", @@ -289,6 +341,7 @@ struct cgroup_subsys net_prio_subsys = { .name = "net_prio", .create = cgrp_create, .destroy = cgrp_destroy, + .attach = net_prio_attach, #ifdef CONFIG_NETPRIO_CGROUP .subsys_id = net_prio_subsys_id, #endif diff --git a/net/core/sock.c b/net/core/sock.c index 24039ac12426..2676a88f533e 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1180,12 +1180,12 @@ void sock_update_classid(struct sock *sk) } EXPORT_SYMBOL(sock_update_classid); -void sock_update_netprioidx(struct sock *sk) +void sock_update_netprioidx(struct sock *sk, struct task_struct *task) { if (in_interrupt()) return; - sk->sk_cgrp_prioidx = task_netprioidx(current); + sk->sk_cgrp_prioidx = task_netprioidx(task); } EXPORT_SYMBOL_GPL(sock_update_netprioidx); #endif @@ -1215,7 +1215,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, atomic_set(&sk->sk_wmem_alloc, 1); sock_update_classid(sk); - sock_update_netprioidx(sk); + sock_update_netprioidx(sk, current); } return sk; diff --git a/net/socket.c b/net/socket.c index 0452dca4cd24..dfe5b66c97e0 100644 --- a/net/socket.c +++ b/net/socket.c @@ -398,7 +398,7 @@ int sock_map_fd(struct socket *sock, int flags) } EXPORT_SYMBOL(sock_map_fd); -static struct socket *sock_from_file(struct file *file, int *err) +struct socket *sock_from_file(struct file *file, int *err) { if (file->f_op == &socket_file_ops) return file->private_data; /* set in sock_map_fd */ @@ -406,6 +406,7 @@ static struct socket *sock_from_file(struct file *file, int *err) *err = -ENOTSOCK; return NULL; } +EXPORT_SYMBOL(sock_from_file); /** * sockfd_lookup - Go from a file number to its socket slot @@ -554,8 +555,6 @@ static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock, sock_update_classid(sock->sk); - sock_update_netprioidx(sock->sk); - si->sock = sock; si->scm = NULL; si->msg = msg; -- cgit v1.2.3