diff options
Diffstat (limited to 'net/core/sock.c')
-rw-r--r-- | net/core/sock.c | 136 |
1 files changed, 89 insertions, 47 deletions
diff --git a/net/core/sock.c b/net/core/sock.c index ac2a404c73eb..c0b5b2f17412 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -307,16 +307,6 @@ static struct lock_class_key af_wlock_keys[AF_MAX]; static struct lock_class_key af_elock_keys[AF_MAX]; static struct lock_class_key af_kern_callback_keys[AF_MAX]; -/* Take into consideration the size of the struct sk_buff overhead in the - * determination of these values, since that is non-constant across - * platforms. This makes socket queueing behavior and performance - * not depend upon such differences. - */ -#define _SK_MEM_PACKETS 256 -#define _SK_MEM_OVERHEAD SKB_TRUESIZE(256) -#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) -#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) - /* Run time adjustable parameters. */ __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX; EXPORT_SYMBOL(sysctl_wmem_max); @@ -1055,6 +1045,20 @@ set_rcvbuf: if (val == 1) dst_negative_advice(sk); break; + + case SO_ZEROCOPY: + if (sk->sk_family != PF_INET && sk->sk_family != PF_INET6) + ret = -ENOTSUPP; + else if (sk->sk_protocol != IPPROTO_TCP) + ret = -ENOTSUPP; + else if (sk->sk_state != TCP_CLOSE) + ret = -EBUSY; + else if (val < 0 || val > 1) + ret = -EINVAL; + else + sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool); + break; + default: ret = -ENOPROTOOPT; break; @@ -1383,6 +1387,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val64 = sock_gen_cookie(sk); break; + case SO_ZEROCOPY: + v.val = sock_flag(sk, SOCK_ZEROCOPY); + break; + default: /* We implement the SO_SNDLOWAT etc to not be settable * (1003.1g 7). @@ -1461,8 +1469,6 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, sk = kmalloc(prot->obj_size, priority); if (sk != NULL) { - kmemcheck_annotate_bitfield(sk, flags); - if (security_sk_alloc(sk, family, priority)) goto out_free; @@ -1646,6 +1652,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) sock_copy(newsk, sk); + newsk->sk_prot_creator = sk->sk_prot; + /* SANITY */ if (likely(newsk->sk_net_refcnt)) get_net(sock_net(newsk)); @@ -1667,19 +1675,28 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) newsk->sk_dst_pending_confirm = 0; newsk->sk_wmem_queued = 0; newsk->sk_forward_alloc = 0; + + /* sk->sk_memcg will be populated at accept() time */ + newsk->sk_memcg = NULL; + atomic_set(&newsk->sk_drops, 0); newsk->sk_send_head = NULL; newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; + atomic_set(&newsk->sk_zckey, 0); sock_reset_flag(newsk, SOCK_DONE); + cgroup_sk_alloc(&newsk->sk_cgrp_data); - filter = rcu_dereference_protected(newsk->sk_filter, 1); + rcu_read_lock(); + filter = rcu_dereference(sk->sk_filter); if (filter != NULL) /* though it's an empty new sock, the charging may fail * if sysctl_optmem_max was changed between creation of * original socket and cloning */ is_charged = sk_filter_charge(newsk, filter); + RCU_INIT_POINTER(newsk->sk_filter, filter); + rcu_read_unlock(); if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) { /* We need to make sure that we don't uncharge the new @@ -1700,9 +1717,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) newsk->sk_incoming_cpu = raw_smp_processor_id(); atomic64_set(&newsk->sk_cookie, 0); - mem_cgroup_sk_alloc(newsk); - cgroup_sk_alloc(&newsk->sk_cgrp_data); - /* * Before updating sk_refcnt, we must commit prior changes to memory * (Documentation/RCU/rculist_nulls.txt for details) @@ -1757,7 +1771,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; sk->sk_route_caps &= ~sk->sk_route_nocaps; if (sk_can_gso(sk)) { - if (dst->header_len) { + if (dst->header_len && !xfrm_dst_offload_ok(dst)) { sk->sk_route_caps &= ~NETIF_F_GSO_MASK; } else { sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; @@ -1923,6 +1937,33 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, } EXPORT_SYMBOL(sock_wmalloc); +static void sock_ofree(struct sk_buff *skb) +{ + struct sock *sk = skb->sk; + + atomic_sub(skb->truesize, &sk->sk_omem_alloc); +} + +struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size, + gfp_t priority) +{ + struct sk_buff *skb; + + /* small safe race: SKB_TRUESIZE may differ from final skb->truesize */ + if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) > + sysctl_optmem_max) + return NULL; + + skb = alloc_skb(size, priority); + if (!skb) + return NULL; + + atomic_add(skb->truesize, &sk->sk_omem_alloc); + skb->sk = sk; + skb->destructor = sock_ofree; + return skb; +} + /* * Allocate a memory block from the socket's option memory buffer. */ @@ -2303,16 +2344,18 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) /* guarantee minimum buffer size under pressure */ if (kind == SK_MEM_RECV) { - if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0]) + if (atomic_read(&sk->sk_rmem_alloc) < sk_get_rmem0(sk, prot)) return 1; } else { /* SK_MEM_SEND */ + int wmem0 = sk_get_wmem0(sk, prot); + if (sk->sk_type == SOCK_STREAM) { - if (sk->sk_wmem_queued < prot->sysctl_wmem[0]) + if (sk->sk_wmem_queued < wmem0) return 1; - } else if (refcount_read(&sk->sk_wmem_alloc) < - prot->sysctl_wmem[0]) + } else if (refcount_read(&sk->sk_wmem_alloc) < wmem0) { return 1; + } } if (sk_has_memory_pressure(sk)) { @@ -2408,9 +2451,6 @@ EXPORT_SYMBOL(__sk_mem_reclaim); int sk_set_peek_off(struct sock *sk, int val) { - if (val < 0) - return -EINVAL; - sk->sk_peek_off = val; return 0; } @@ -2500,6 +2540,12 @@ int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len) } EXPORT_SYMBOL(sock_no_sendmsg); +int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *m, size_t len) +{ + return -EOPNOTSUPP; +} +EXPORT_SYMBOL(sock_no_sendmsg_locked); + int sock_no_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int flags) { @@ -2528,6 +2574,22 @@ ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, siz } EXPORT_SYMBOL(sock_no_sendpage); +ssize_t sock_no_sendpage_locked(struct sock *sk, struct page *page, + int offset, size_t size, int flags) +{ + ssize_t res; + struct msghdr msg = {.msg_flags = flags}; + struct kvec iov; + char *kaddr = kmap(page); + + iov.iov_base = kaddr + offset; + iov.iov_len = size; + res = kernel_sendmsg_locked(sk, &msg, &iov, 1, size); + kunmap(page); + return res; +} +EXPORT_SYMBOL(sock_no_sendpage_locked); + /* * Default Socket Callbacks */ @@ -2623,7 +2685,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk_init_common(sk); sk->sk_send_head = NULL; - init_timer(&sk->sk_timer); + timer_setup(&sk->sk_timer, NULL, 0); sk->sk_allocation = GFP_KERNEL; sk->sk_rcvbuf = sysctl_rmem_default; @@ -2673,6 +2735,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; sk->sk_stamp = SK_DEFAULT_STAMP; + atomic_set(&sk->sk_zckey, 0); #ifdef CONFIG_NET_RX_BUSY_POLL sk->sk_napi_id = 0; @@ -2681,6 +2744,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_max_pacing_rate = ~0U; sk->sk_pacing_rate = ~0U; + sk->sk_pacing_shift = 10; sk->sk_incoming_cpu = -1; /* * Before updating sk_refcnt, we must commit prior changes to memory @@ -2979,7 +3043,6 @@ struct prot_inuse { static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR); -#ifdef CONFIG_NET_NS void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) { __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val); @@ -3023,27 +3086,6 @@ static __init int net_inuse_init(void) } core_initcall(net_inuse_init); -#else -static DEFINE_PER_CPU(struct prot_inuse, prot_inuse); - -void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) -{ - __this_cpu_add(prot_inuse.val[prot->inuse_idx], val); -} -EXPORT_SYMBOL_GPL(sock_prot_inuse_add); - -int sock_prot_inuse_get(struct net *net, struct proto *prot) -{ - int cpu, idx = prot->inuse_idx; - int res = 0; - - for_each_possible_cpu(cpu) - res += per_cpu(prot_inuse, cpu).val[idx]; - - return res >= 0 ? res : 0; -} -EXPORT_SYMBOL_GPL(sock_prot_inuse_get); -#endif static void assign_proto_idx(struct proto *prot) { |