summaryrefslogtreecommitdiff
path: root/include/net/inet_hashtables.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/net/inet_hashtables.h')
-rw-r--r--include/net/inet_hashtables.h145
1 files changed, 84 insertions, 61 deletions
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index f72ec113ae56..a0887b70967b 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -90,11 +90,32 @@ struct inet_bind_bucket {
struct hlist_head owners;
};
+struct inet_bind2_bucket {
+ possible_net_t ib_net;
+ int l3mdev;
+ unsigned short port;
+ union {
+#if IS_ENABLED(CONFIG_IPV6)
+ struct in6_addr v6_rcv_saddr;
+#endif
+ __be32 rcv_saddr;
+ };
+ /* Node in the inet2_bind_hashbucket chain */
+ struct hlist_node node;
+ /* List of sockets hashed to this bucket */
+ struct hlist_head owners;
+};
+
static inline struct net *ib_net(struct inet_bind_bucket *ib)
{
return read_pnet(&ib->ib_net);
}
+static inline struct net *ib2_net(struct inet_bind2_bucket *ib)
+{
+ return read_pnet(&ib->ib_net);
+}
+
#define inet_bind_bucket_for_each(tb, head) \
hlist_for_each_entry(tb, head, node)
@@ -103,6 +124,15 @@ struct inet_bind_hashbucket {
struct hlist_head chain;
};
+/* This is synchronized using the inet_bind_hashbucket's spinlock.
+ * Instead of having separate spinlocks, the inet_bind2_hashbucket can share
+ * the inet_bind_hashbucket's given that in every case where the bhash2 table
+ * is useful, a lookup in the bhash table also occurs.
+ */
+struct inet_bind2_hashbucket {
+ struct hlist_head chain;
+};
+
/* Sockets can be hashed in established or listening table.
* We must use different 'nulls' end-of-chain value for all hash buckets :
* A socket might transition from ESTABLISH to LISTEN state without
@@ -111,11 +141,7 @@ struct inet_bind_hashbucket {
#define LISTENING_NULLS_BASE (1U << 29)
struct inet_listen_hashbucket {
spinlock_t lock;
- unsigned int count;
- union {
- struct hlist_head head;
- struct hlist_nulls_head nulls_head;
- };
+ struct hlist_nulls_head nulls_head;
};
/* This is for listening sockets, thus all sockets which possess wildcards. */
@@ -138,37 +164,19 @@ struct inet_hashinfo {
*/
struct kmem_cache *bind_bucket_cachep;
struct inet_bind_hashbucket *bhash;
+ /* The 2nd binding table hashed by port and address.
+ * This is used primarily for expediting the resolution of bind
+ * conflicts.
+ */
+ struct kmem_cache *bind2_bucket_cachep;
+ struct inet_bind2_hashbucket *bhash2;
unsigned int bhash_size;
/* The 2nd listener table hashed by local port and address */
unsigned int lhash2_mask;
struct inet_listen_hashbucket *lhash2;
-
- /* All the above members are written once at bootup and
- * never written again _or_ are predominantly read-access.
- *
- * Now align to a new cache line as all the following members
- * might be often dirty.
- */
- /* All sockets in TCP_LISTEN state will be in listening_hash.
- * This is the only table where wildcard'd TCP sockets can
- * exist. listening_hash is only hashed by local port number.
- * If lhash2 is initialized, the same socket will also be hashed
- * to lhash2 by port and address.
- */
- struct inet_listen_hashbucket listening_hash[INET_LHTABLE_SIZE]
- ____cacheline_aligned_in_smp;
};
-#define inet_lhash2_for_each_icsk_continue(__icsk) \
- hlist_for_each_entry_continue(__icsk, icsk_listen_portaddr_node)
-
-#define inet_lhash2_for_each_icsk(__icsk, list) \
- hlist_for_each_entry(__icsk, list, icsk_listen_portaddr_node)
-
-#define inet_lhash2_for_each_icsk_rcu(__icsk, list) \
- hlist_for_each_entry_rcu(__icsk, list, icsk_listen_portaddr_node)
-
static inline struct inet_listen_hashbucket *
inet_lhash2_bucket(struct inet_hashinfo *h, u32 hash)
{
@@ -221,32 +229,50 @@ inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net,
void inet_bind_bucket_destroy(struct kmem_cache *cachep,
struct inet_bind_bucket *tb);
-static inline u32 inet_bhashfn(const struct net *net, const __u16 lport,
- const u32 bhash_size)
+static inline bool check_bind_bucket_match(struct inet_bind_bucket *tb,
+ struct net *net,
+ const unsigned short port,
+ int l3mdev)
{
- return (lport + net_hash_mix(net)) & (bhash_size - 1);
+ return net_eq(ib_net(tb), net) && tb->port == port &&
+ tb->l3mdev == l3mdev;
}
-void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
- const unsigned short snum);
+struct inet_bind2_bucket *
+inet_bind2_bucket_create(struct kmem_cache *cachep, struct net *net,
+ struct inet_bind2_hashbucket *head,
+ const unsigned short port, int l3mdev,
+ const struct sock *sk);
-/* These can have wildcards, don't try too hard. */
-static inline u32 inet_lhashfn(const struct net *net, const unsigned short num)
-{
- return (num + net_hash_mix(net)) & (INET_LHTABLE_SIZE - 1);
-}
+void inet_bind2_bucket_destroy(struct kmem_cache *cachep,
+ struct inet_bind2_bucket *tb);
+
+struct inet_bind2_bucket *
+inet_bind2_bucket_find(struct inet_hashinfo *hinfo, struct net *net,
+ const unsigned short port, int l3mdev,
+ struct sock *sk,
+ struct inet_bind2_hashbucket **head);
-static inline int inet_sk_listen_hashfn(const struct sock *sk)
+bool check_bind2_bucket_match_nulladdr(struct inet_bind2_bucket *tb,
+ struct net *net,
+ const unsigned short port,
+ int l3mdev,
+ const struct sock *sk);
+
+static inline u32 inet_bhashfn(const struct net *net, const __u16 lport,
+ const u32 bhash_size)
{
- return inet_lhashfn(sock_net(sk), inet_sk(sk)->inet_num);
+ return (lport + net_hash_mix(net)) & (bhash_size - 1);
}
+void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
+ struct inet_bind2_bucket *tb2, const unsigned short snum);
+
/* Caller must disable local BH processing. */
int __inet_inherit_port(const struct sock *sk, struct sock *child);
void inet_put_port(struct sock *sk);
-void inet_hashinfo_init(struct inet_hashinfo *h);
void inet_hashinfo2_init(struct inet_hashinfo *h, const char *name,
unsigned long numentries, int scale,
unsigned long low_limit,
@@ -295,7 +321,6 @@ static inline struct sock *inet_lookup_listener(struct net *net,
((__force __portpair)(((__u32)(__dport) << 16) | (__force __u32)(__be16)(__sport)))
#endif
-#if (BITS_PER_LONG == 64)
#ifdef __BIG_ENDIAN
#define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
const __addrpair __name = (__force __addrpair) ( \
@@ -307,24 +332,22 @@ static inline struct sock *inet_lookup_listener(struct net *net,
(((__force __u64)(__be32)(__daddr)) << 32) | \
((__force __u64)(__be32)(__saddr)))
#endif /* __BIG_ENDIAN */
-#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \
- (((__sk)->sk_portpair == (__ports)) && \
- ((__sk)->sk_addrpair == (__cookie)) && \
- (((__sk)->sk_bound_dev_if == (__dif)) || \
- ((__sk)->sk_bound_dev_if == (__sdif))) && \
- net_eq(sock_net(__sk), (__net)))
-#else /* 32-bit arch */
-#define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
- const int __name __deprecated __attribute__((unused))
-#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \
- (((__sk)->sk_portpair == (__ports)) && \
- ((__sk)->sk_daddr == (__saddr)) && \
- ((__sk)->sk_rcv_saddr == (__daddr)) && \
- (((__sk)->sk_bound_dev_if == (__dif)) || \
- ((__sk)->sk_bound_dev_if == (__sdif))) && \
- net_eq(sock_net(__sk), (__net)))
-#endif /* 64-bit arch */
+static inline bool inet_match(struct net *net, const struct sock *sk,
+ const __addrpair cookie, const __portpair ports,
+ int dif, int sdif)
+{
+ int bound_dev_if;
+
+ if (!net_eq(sock_net(sk), net) ||
+ sk->sk_portpair != ports ||
+ sk->sk_addrpair != cookie)
+ return false;
+
+ /* Paired with WRITE_ONCE() from sock_bindtoindex_locked() */
+ bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
+ return bound_dev_if == dif || bound_dev_if == sdif;
+}
/* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need
* not check it for lookups anymore, thanks Alexey. -DaveM
@@ -425,7 +448,7 @@ static inline void sk_rcv_saddr_set(struct sock *sk, __be32 addr)
}
int __inet_hash_connect(struct inet_timewait_death_row *death_row,
- struct sock *sk, u32 port_offset,
+ struct sock *sk, u64 port_offset,
int (*check_established)(struct inet_timewait_death_row *,
struct sock *, __u16,
struct inet_timewait_sock **));