summaryrefslogtreecommitdiff
path: root/net/core/sock.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/sock.c')
-rw-r--r--net/core/sock.c136
1 files changed, 89 insertions, 47 deletions
diff --git a/net/core/sock.c b/net/core/sock.c
index ac2a404c73eb..c0b5b2f17412 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -307,16 +307,6 @@ static struct lock_class_key af_wlock_keys[AF_MAX];
static struct lock_class_key af_elock_keys[AF_MAX];
static struct lock_class_key af_kern_callback_keys[AF_MAX];
-/* Take into consideration the size of the struct sk_buff overhead in the
- * determination of these values, since that is non-constant across
- * platforms. This makes socket queueing behavior and performance
- * not depend upon such differences.
- */
-#define _SK_MEM_PACKETS 256
-#define _SK_MEM_OVERHEAD SKB_TRUESIZE(256)
-#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
-#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
-
/* Run time adjustable parameters. */
__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
EXPORT_SYMBOL(sysctl_wmem_max);
@@ -1055,6 +1045,20 @@ set_rcvbuf:
if (val == 1)
dst_negative_advice(sk);
break;
+
+ case SO_ZEROCOPY:
+ if (sk->sk_family != PF_INET && sk->sk_family != PF_INET6)
+ ret = -ENOTSUPP;
+ else if (sk->sk_protocol != IPPROTO_TCP)
+ ret = -ENOTSUPP;
+ else if (sk->sk_state != TCP_CLOSE)
+ ret = -EBUSY;
+ else if (val < 0 || val > 1)
+ ret = -EINVAL;
+ else
+ sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool);
+ break;
+
default:
ret = -ENOPROTOOPT;
break;
@@ -1383,6 +1387,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
v.val64 = sock_gen_cookie(sk);
break;
+ case SO_ZEROCOPY:
+ v.val = sock_flag(sk, SOCK_ZEROCOPY);
+ break;
+
default:
/* We implement the SO_SNDLOWAT etc to not be settable
* (1003.1g 7).
@@ -1461,8 +1469,6 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
sk = kmalloc(prot->obj_size, priority);
if (sk != NULL) {
- kmemcheck_annotate_bitfield(sk, flags);
-
if (security_sk_alloc(sk, family, priority))
goto out_free;
@@ -1646,6 +1652,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
sock_copy(newsk, sk);
+ newsk->sk_prot_creator = sk->sk_prot;
+
/* SANITY */
if (likely(newsk->sk_net_refcnt))
get_net(sock_net(newsk));
@@ -1667,19 +1675,28 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
newsk->sk_dst_pending_confirm = 0;
newsk->sk_wmem_queued = 0;
newsk->sk_forward_alloc = 0;
+
+ /* sk->sk_memcg will be populated at accept() time */
+ newsk->sk_memcg = NULL;
+
atomic_set(&newsk->sk_drops, 0);
newsk->sk_send_head = NULL;
newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
+ atomic_set(&newsk->sk_zckey, 0);
sock_reset_flag(newsk, SOCK_DONE);
+ cgroup_sk_alloc(&newsk->sk_cgrp_data);
- filter = rcu_dereference_protected(newsk->sk_filter, 1);
+ rcu_read_lock();
+ filter = rcu_dereference(sk->sk_filter);
if (filter != NULL)
/* though it's an empty new sock, the charging may fail
* if sysctl_optmem_max was changed between creation of
* original socket and cloning
*/
is_charged = sk_filter_charge(newsk, filter);
+ RCU_INIT_POINTER(newsk->sk_filter, filter);
+ rcu_read_unlock();
if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
/* We need to make sure that we don't uncharge the new
@@ -1700,9 +1717,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
newsk->sk_incoming_cpu = raw_smp_processor_id();
atomic64_set(&newsk->sk_cookie, 0);
- mem_cgroup_sk_alloc(newsk);
- cgroup_sk_alloc(&newsk->sk_cgrp_data);
-
/*
* Before updating sk_refcnt, we must commit prior changes to memory
* (Documentation/RCU/rculist_nulls.txt for details)
@@ -1757,7 +1771,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
sk->sk_route_caps &= ~sk->sk_route_nocaps;
if (sk_can_gso(sk)) {
- if (dst->header_len) {
+ if (dst->header_len && !xfrm_dst_offload_ok(dst)) {
sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
} else {
sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
@@ -1923,6 +1937,33 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
}
EXPORT_SYMBOL(sock_wmalloc);
+static void sock_ofree(struct sk_buff *skb)
+{
+ struct sock *sk = skb->sk;
+
+ atomic_sub(skb->truesize, &sk->sk_omem_alloc);
+}
+
+struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
+ gfp_t priority)
+{
+ struct sk_buff *skb;
+
+ /* small safe race: SKB_TRUESIZE may differ from final skb->truesize */
+ if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) >
+ sysctl_optmem_max)
+ return NULL;
+
+ skb = alloc_skb(size, priority);
+ if (!skb)
+ return NULL;
+
+ atomic_add(skb->truesize, &sk->sk_omem_alloc);
+ skb->sk = sk;
+ skb->destructor = sock_ofree;
+ return skb;
+}
+
/*
* Allocate a memory block from the socket's option memory buffer.
*/
@@ -2303,16 +2344,18 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
/* guarantee minimum buffer size under pressure */
if (kind == SK_MEM_RECV) {
- if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
+ if (atomic_read(&sk->sk_rmem_alloc) < sk_get_rmem0(sk, prot))
return 1;
} else { /* SK_MEM_SEND */
+ int wmem0 = sk_get_wmem0(sk, prot);
+
if (sk->sk_type == SOCK_STREAM) {
- if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
+ if (sk->sk_wmem_queued < wmem0)
return 1;
- } else if (refcount_read(&sk->sk_wmem_alloc) <
- prot->sysctl_wmem[0])
+ } else if (refcount_read(&sk->sk_wmem_alloc) < wmem0) {
return 1;
+ }
}
if (sk_has_memory_pressure(sk)) {
@@ -2408,9 +2451,6 @@ EXPORT_SYMBOL(__sk_mem_reclaim);
int sk_set_peek_off(struct sock *sk, int val)
{
- if (val < 0)
- return -EINVAL;
-
sk->sk_peek_off = val;
return 0;
}
@@ -2500,6 +2540,12 @@ int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
}
EXPORT_SYMBOL(sock_no_sendmsg);
+int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *m, size_t len)
+{
+ return -EOPNOTSUPP;
+}
+EXPORT_SYMBOL(sock_no_sendmsg_locked);
+
int sock_no_recvmsg(struct socket *sock, struct msghdr *m, size_t len,
int flags)
{
@@ -2528,6 +2574,22 @@ ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, siz
}
EXPORT_SYMBOL(sock_no_sendpage);
+ssize_t sock_no_sendpage_locked(struct sock *sk, struct page *page,
+ int offset, size_t size, int flags)
+{
+ ssize_t res;
+ struct msghdr msg = {.msg_flags = flags};
+ struct kvec iov;
+ char *kaddr = kmap(page);
+
+ iov.iov_base = kaddr + offset;
+ iov.iov_len = size;
+ res = kernel_sendmsg_locked(sk, &msg, &iov, 1, size);
+ kunmap(page);
+ return res;
+}
+EXPORT_SYMBOL(sock_no_sendpage_locked);
+
/*
* Default Socket Callbacks
*/
@@ -2623,7 +2685,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk_init_common(sk);
sk->sk_send_head = NULL;
- init_timer(&sk->sk_timer);
+ timer_setup(&sk->sk_timer, NULL, 0);
sk->sk_allocation = GFP_KERNEL;
sk->sk_rcvbuf = sysctl_rmem_default;
@@ -2673,6 +2735,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
sk->sk_stamp = SK_DEFAULT_STAMP;
+ atomic_set(&sk->sk_zckey, 0);
#ifdef CONFIG_NET_RX_BUSY_POLL
sk->sk_napi_id = 0;
@@ -2681,6 +2744,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_max_pacing_rate = ~0U;
sk->sk_pacing_rate = ~0U;
+ sk->sk_pacing_shift = 10;
sk->sk_incoming_cpu = -1;
/*
* Before updating sk_refcnt, we must commit prior changes to memory
@@ -2979,7 +3043,6 @@ struct prot_inuse {
static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
-#ifdef CONFIG_NET_NS
void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
{
__this_cpu_add(net->core.inuse->val[prot->inuse_idx], val);
@@ -3023,27 +3086,6 @@ static __init int net_inuse_init(void)
}
core_initcall(net_inuse_init);
-#else
-static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
-
-void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
-{
- __this_cpu_add(prot_inuse.val[prot->inuse_idx], val);
-}
-EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
-
-int sock_prot_inuse_get(struct net *net, struct proto *prot)
-{
- int cpu, idx = prot->inuse_idx;
- int res = 0;
-
- for_each_possible_cpu(cpu)
- res += per_cpu(prot_inuse, cpu).val[idx];
-
- return res >= 0 ? res : 0;
-}
-EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
-#endif
static void assign_proto_idx(struct proto *prot)
{