summaryrefslogtreecommitdiff
path: root/net/ipv4/inet_connection_sock.c
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2016-02-11 03:54:23 -0500
committerDavid S. Miller <davem@davemloft.net>2016-02-11 03:54:23 -0500
commitfd1914b2901badd942f008ce57bf4a938d29fde4 (patch)
tree2764c897bbba3f7604ce942dbe62d17352036d58 /net/ipv4/inet_connection_sock.c
parent30c1de08dda9202699c1ddc7fd658693faf93bf2 (diff)
parent4b2a6aed2115cd72faaffc92e03d6516e8113904 (diff)
Merge branch 'tcp-fast-so_reuseport'
Craig Gallek says: ==================== Faster SO_REUSEPORT for TCP This patch series complements an earlier series (6a5ef90c58da) which added faster SO_REUSEPORT lookup for UDP sockets by extending the feature to TCP sockets. It uses the same array-based data structure which allows for socket selection after finding the first listening socket that matches an incoming packet. Prior to this feature, every socket in the reuseport group needed to be found and examined before a selection could be made. With this series the SO_ATTACH_REUSEPORT_CBPF and SO_ATTACH_REUSEPORT_EBPF socket options now work for TCP sockets as well. The test at the end of the series includes an example of how to use these options to select a reuseport socket based on the cpu core id handling the incoming packet. There are several refactoring patches that precede the feature implementation. Only the last two patches in this series should result in any behavioral changes. v4 - Fix build issue when compiling IPv6 as a module. This required moving the ipv6_rcv_saddr_equal into an object that is included as a built-in object. I included this change in the second patch which adds inet6_hash since that is where ipv6_rcv_saddr_equal will later be called from non-module code. v3: - Another warning in the first patch caught by a build bot. Return 0 in the no-op UDP hash function. v2: - In the first patched I missed a couple of hash functions that should now be returning int instead of void. I missed these the first time through as it only generated a warning and not an error :\ ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/inet_connection_sock.c')
-rw-r--r--net/ipv4/inet_connection_sock.c22
1 files changed, 15 insertions, 7 deletions
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 9b17c1792dce..c16a2e6273d9 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -24,6 +24,7 @@
#include <net/tcp_states.h>
#include <net/xfrm.h>
#include <net/tcp.h>
+#include <net/sock_reuseport.h>
#ifdef INET_CSK_DEBUG
const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
@@ -67,7 +68,8 @@ int inet_csk_bind_conflict(const struct sock *sk,
if ((!reuse || !sk2->sk_reuse ||
sk2->sk_state == TCP_LISTEN) &&
(!reuseport || !sk2->sk_reuseport ||
- (sk2->sk_state != TCP_TIME_WAIT &&
+ rcu_access_pointer(sk->sk_reuseport_cb) ||
+ (sk2->sk_state != TCP_TIME_WAIT &&
!uid_eq(uid, sock_i_uid(sk2))))) {
if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr ||
@@ -132,6 +134,7 @@ again:
sk->sk_state != TCP_LISTEN) ||
(tb->fastreuseport > 0 &&
sk->sk_reuseport &&
+ !rcu_access_pointer(sk->sk_reuseport_cb) &&
uid_eq(tb->fastuid, uid))) &&
(tb->num_owners < smallest_size || smallest_size == -1)) {
smallest_size = tb->num_owners;
@@ -193,15 +196,18 @@ tb_found:
if (((tb->fastreuse > 0 &&
sk->sk_reuse && sk->sk_state != TCP_LISTEN) ||
(tb->fastreuseport > 0 &&
- sk->sk_reuseport && uid_eq(tb->fastuid, uid))) &&
- smallest_size == -1) {
+ sk->sk_reuseport &&
+ !rcu_access_pointer(sk->sk_reuseport_cb) &&
+ uid_eq(tb->fastuid, uid))) && smallest_size == -1) {
goto success;
} else {
ret = 1;
if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) {
if (((sk->sk_reuse && sk->sk_state != TCP_LISTEN) ||
(tb->fastreuseport > 0 &&
- sk->sk_reuseport && uid_eq(tb->fastuid, uid))) &&
+ sk->sk_reuseport &&
+ !rcu_access_pointer(sk->sk_reuseport_cb) &&
+ uid_eq(tb->fastuid, uid))) &&
smallest_size != -1 && --attempts >= 0) {
spin_unlock(&head->lock);
goto again;
@@ -734,6 +740,7 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct inet_sock *inet = inet_sk(sk);
+ int err = -EADDRINUSE;
reqsk_queue_alloc(&icsk->icsk_accept_queue);
@@ -751,13 +758,14 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
inet->inet_sport = htons(inet->inet_num);
sk_dst_reset(sk);
- sk->sk_prot->hash(sk);
+ err = sk->sk_prot->hash(sk);
- return 0;
+ if (likely(!err))
+ return 0;
}
sk->sk_state = TCP_CLOSE;
- return -EADDRINUSE;
+ return err;
}
EXPORT_SYMBOL_GPL(inet_csk_listen_start);