diff options
author | David S. Miller <davem@davemloft.net> | 2015-05-11 10:50:19 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-05-11 10:50:19 -0400 |
commit | 0198e09c4bdd7bce00c451c51a86a239c356a315 (patch) | |
tree | 0f787a635911a4c854638de7ff36cb93a32ca5aa /net/core/sock.c | |
parent | 80ba92fa1a92dea128283f69f55b02242e213650 (diff) | |
parent | affb9792f1d99e1e4d64411e147b648d65f2576e (diff) |
Merge branch 'kernel_socket_netns'
Eric W. Biederman says:
====================
Cleanup the kernel sockets.
Right now the situtation for allocating kernel sockets is a mess.
- sock_create_kern does not take a namespace parameter.
- kernel sockets must not reference count a network namespace and keep
it alive or else we will have a reference counting loop.
- The way we avoid the reference counting loop with sk_change_net
and sk_release_kernel are major hacks.
This patchset addresses this mess by fixing sock_create_kern to do
everything necessary to create a kernel socket. None of the current
users of kernel sockets need the network namespace reference counted.
Either kernel sockets are network namespace aware (and using the current
hacks) or kernel sockets are limited to the initial network namespace
in which case it does not matter.
This patchset starts by addressing tun which should be using normal
userspace sockets like macvtap.
Then sock_create_kern is fixed to take a network namespace.
Then the in kernel status of sockets are passed through to sk_alloc.
Then sk_alloc is fixed to not reference count the network namespace
of kernel sockets.
Then the callers of sock_create_kern are fixed up to stop using hacks.
Then netlink which uses it's own flavor of sock_create_kern is fixed.
Finally the hacks that are sk_change_net and sk_release_kernel are removed.
When it is all done the code is easier to follow, easier to use, easier
to maintain and shorter by about 70 lines.
====================
Reported-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core/sock.c')
-rw-r--r-- | net/core/sock.c | 30 |
1 files changed, 8 insertions, 22 deletions
diff --git a/net/core/sock.c b/net/core/sock.c index e891bcf325ca..c18738a795b0 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1396,9 +1396,10 @@ EXPORT_SYMBOL_GPL(sock_update_netprioidx); * @family: protocol family * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) * @prot: struct proto associated with this new sock instance + * @kern: is this to be a kernel socket? */ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, - struct proto *prot) + struct proto *prot, int kern) { struct sock *sk; @@ -1411,7 +1412,10 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, */ sk->sk_prot = sk->sk_prot_creator = prot; sock_lock_init(sk); - sock_net_set(sk, get_net(net)); + sk->sk_net_refcnt = kern ? 0 : 1; + if (likely(sk->sk_net_refcnt)) + get_net(net); + sock_net_set(sk, net); atomic_set(&sk->sk_wmem_alloc, 1); sock_update_classid(sk); @@ -1445,7 +1449,8 @@ static void __sk_free(struct sock *sk) if (sk->sk_peer_cred) put_cred(sk->sk_peer_cred); put_pid(sk->sk_peer_pid); - put_net(sock_net(sk)); + if (likely(sk->sk_net_refcnt)) + put_net(sock_net(sk)); sk_prot_free(sk->sk_prot_creator, sk); } @@ -1461,25 +1466,6 @@ void sk_free(struct sock *sk) } EXPORT_SYMBOL(sk_free); -/* - * Last sock_put should drop reference to sk->sk_net. It has already - * been dropped in sk_change_net. Taking reference to stopping namespace - * is not an option. - * Take reference to a socket to remove it from hash _alive_ and after that - * destroy it in the context of init_net. - */ -void sk_release_kernel(struct sock *sk) -{ - if (sk == NULL || sk->sk_socket == NULL) - return; - - sock_hold(sk); - sock_net_set(sk, get_net(&init_net)); - sock_release(sk->sk_socket); - sock_put(sk); -} -EXPORT_SYMBOL(sk_release_kernel); - static void sk_update_clone(const struct sock *sk, struct sock *newsk) { if (mem_cgroup_sockets_enabled && sk->sk_cgrp) |