diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/datagram.c | 2 | ||||
-rw-r--r-- | net/core/dev.c | 65 | ||||
-rw-r--r-- | net/core/ethtool.c | 4 | ||||
-rw-r--r-- | net/core/filter.c | 108 | ||||
-rw-r--r-- | net/core/request_sock.c | 4 | ||||
-rw-r--r-- | net/core/skbuff.c | 34 | ||||
-rw-r--r-- | net/core/sock.c | 11 | ||||
-rw-r--r-- | net/core/timestamping.c | 2 |
8 files changed, 144 insertions, 86 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c index cd1e039c8755..18ac112ea7ae 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -177,7 +177,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, * interrupt level will suddenly eat the receive_queue. * * Look at current nfs client by the way... - * However, this function was corrent in any case. 8) + * However, this function was correct in any case. 8) */ unsigned long cpu_flags; diff --git a/net/core/dev.c b/net/core/dev.c index cd2437495428..d28b3a023bb2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -743,34 +743,31 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex) EXPORT_SYMBOL(dev_get_by_index); /** - * dev_getbyhwaddr - find a device by its hardware address + * dev_getbyhwaddr_rcu - find a device by its hardware address * @net: the applicable net namespace * @type: media type of device * @ha: hardware address * * Search for an interface by MAC address. Returns NULL if the device - * is not found or a pointer to the device. The caller must hold the - * rtnl semaphore. The returned device has not had its ref count increased + * is not found or a pointer to the device. The caller must hold RCU + * The returned device has not had its ref count increased * and the caller must therefore be careful about locking * - * BUGS: - * If the API was consistent this would be __dev_get_by_hwaddr */ -struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha) +struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type, + const char *ha) { struct net_device *dev; - ASSERT_RTNL(); - - for_each_netdev(net, dev) + for_each_netdev_rcu(net, dev) if (dev->type == type && !memcmp(dev->dev_addr, ha, dev->addr_len)) return dev; return NULL; } -EXPORT_SYMBOL(dev_getbyhwaddr); +EXPORT_SYMBOL(dev_getbyhwaddr_rcu); struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type) { @@ -2025,9 +2022,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, int rc = NETDEV_TX_OK; if (likely(!skb->next)) { - if (!list_empty(&ptype_all)) - dev_queue_xmit_nit(skb, dev); - /* * If device doesnt need skb->dst, release it right now while * its hot in this cpu cache @@ -2035,6 +2029,9 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, if (dev->priv_flags & IFF_XMIT_DST_RELEASE) skb_dst_drop(skb); + if (!list_empty(&ptype_all)) + dev_queue_xmit_nit(skb, dev); + skb_orphan_try(skb); if (vlan_tx_tag_present(skb) && @@ -5041,10 +5038,13 @@ unsigned long netdev_fix_features(unsigned long features, const char *name) } if (features & NETIF_F_UFO) { - if (!(features & NETIF_F_GEN_CSUM)) { + /* maybe split UFO into V4 and V6? */ + if (!((features & NETIF_F_GEN_CSUM) || + (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) + == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { if (name) printk(KERN_ERR "%s: Dropping NETIF_F_UFO " - "since no NETIF_F_HW_CSUM feature.\n", + "since no checksum offload features.\n", name); features &= ~NETIF_F_UFO; } @@ -5109,11 +5109,21 @@ static int netif_alloc_rx_queues(struct net_device *dev) } #endif +static void netdev_init_one_queue(struct net_device *dev, + struct netdev_queue *queue, void *_unused) +{ + /* Initialize queue lock */ + spin_lock_init(&queue->_xmit_lock); + netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type); + queue->xmit_lock_owner = -1; + netdev_queue_numa_node_write(queue, -1); + queue->dev = dev; +} + static int netif_alloc_netdev_queues(struct net_device *dev) { unsigned int count = dev->num_tx_queues; struct netdev_queue *tx; - int i; BUG_ON(count < 1); @@ -5125,27 +5135,10 @@ static int netif_alloc_netdev_queues(struct net_device *dev) } dev->_tx = tx; - for (i = 0; i < count; i++) { - netdev_queue_numa_node_write(&tx[i], -1); - tx[i].dev = dev; - } - return 0; -} - -static void netdev_init_one_queue(struct net_device *dev, - struct netdev_queue *queue, - void *_unused) -{ - /* Initialize queue lock */ - spin_lock_init(&queue->_xmit_lock); - netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type); - queue->xmit_lock_owner = -1; -} - -static void netdev_init_queues(struct net_device *dev) -{ netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); spin_lock_init(&dev->tx_global_lock); + + return 0; } /** @@ -5184,8 +5177,6 @@ int register_netdevice(struct net_device *dev) dev->iflink = -1; - netdev_init_queues(dev); - /* Init, if this function is available */ if (dev->netdev_ops->ndo_init) { ret = dev->netdev_ops->ndo_init(dev); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 956a9f4971cb..d5bc28818883 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1171,7 +1171,9 @@ static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr) return -EFAULT; if (edata.data && !(dev->features & NETIF_F_SG)) return -EINVAL; - if (edata.data && !(dev->features & NETIF_F_HW_CSUM)) + if (edata.data && !((dev->features & NETIF_F_GEN_CSUM) || + (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) + == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) return -EINVAL; return dev->ethtool_ops->set_ufo(dev, edata.data); } diff --git a/net/core/filter.c b/net/core/filter.c index a44d27f9f0f0..e8a6ac411ffb 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -88,7 +88,7 @@ enum { }; /* No hurry in this branch */ -static void *__load_pointer(struct sk_buff *skb, int k) +static void *__load_pointer(const struct sk_buff *skb, int k, unsigned int size) { u8 *ptr = NULL; @@ -97,12 +97,12 @@ static void *__load_pointer(struct sk_buff *skb, int k) else if (k >= SKF_LL_OFF) ptr = skb_mac_header(skb) + k - SKF_LL_OFF; - if (ptr >= skb->head && ptr < skb_tail_pointer(skb)) + if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb)) return ptr; return NULL; } -static inline void *load_pointer(struct sk_buff *skb, int k, +static inline void *load_pointer(const struct sk_buff *skb, int k, unsigned int size, void *buffer) { if (k >= 0) @@ -110,7 +110,7 @@ static inline void *load_pointer(struct sk_buff *skb, int k, else { if (k >= SKF_AD_OFF) return NULL; - return __load_pointer(skb, k); + return __load_pointer(skb, k, size); } } @@ -160,17 +160,16 @@ EXPORT_SYMBOL(sk_filter); * and last instruction guaranteed to be a RET, we dont need to check * flen. (We used to pass to this function the length of filter) */ -unsigned int sk_run_filter(struct sk_buff *skb, const struct sock_filter *fentry) +unsigned int sk_run_filter(const struct sk_buff *skb, + const struct sock_filter *fentry) { void *ptr; u32 A = 0; /* Accumulator */ u32 X = 0; /* Index Register */ u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */ - unsigned long memvalid = 0; u32 tmp; int k; - BUILD_BUG_ON(BPF_MEMWORDS > BITS_PER_LONG); /* * Process array of filter instructions. */ @@ -318,12 +317,10 @@ load_b: X = K; continue; case BPF_S_LD_MEM: - A = (memvalid & (1UL << K)) ? - mem[K] : 0; + A = mem[K]; continue; case BPF_S_LDX_MEM: - X = (memvalid & (1UL << K)) ? - mem[K] : 0; + X = mem[K]; continue; case BPF_S_MISC_TAX: X = A; @@ -336,11 +333,9 @@ load_b: case BPF_S_RET_A: return A; case BPF_S_ST: - memvalid |= 1UL << K; mem[K] = A; continue; case BPF_S_STX: - memvalid |= 1UL << K; mem[K] = X; continue; default: @@ -375,6 +370,12 @@ load_b: return 0; A = skb->dev->type; continue; + case SKF_AD_RXHASH: + A = skb->rxhash; + continue; + case SKF_AD_CPU: + A = raw_smp_processor_id(); + continue; case SKF_AD_NLATTR: { struct nlattr *nla; @@ -419,6 +420,66 @@ load_b: } EXPORT_SYMBOL(sk_run_filter); +/* + * Security : + * A BPF program is able to use 16 cells of memory to store intermediate + * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter()) + * As we dont want to clear mem[] array for each packet going through + * sk_run_filter(), we check that filter loaded by user never try to read + * a cell if not previously written, and we check all branches to be sure + * a malicious user doesnt try to abuse us. + */ +static int check_load_and_stores(struct sock_filter *filter, int flen) +{ + u16 *masks, memvalid = 0; /* one bit per cell, 16 cells */ + int pc, ret = 0; + + BUILD_BUG_ON(BPF_MEMWORDS > 16); + masks = kmalloc(flen * sizeof(*masks), GFP_KERNEL); + if (!masks) + return -ENOMEM; + memset(masks, 0xff, flen * sizeof(*masks)); + + for (pc = 0; pc < flen; pc++) { + memvalid &= masks[pc]; + + switch (filter[pc].code) { + case BPF_S_ST: + case BPF_S_STX: + memvalid |= (1 << filter[pc].k); + break; + case BPF_S_LD_MEM: + case BPF_S_LDX_MEM: + if (!(memvalid & (1 << filter[pc].k))) { + ret = -EINVAL; + goto error; + } + break; + case BPF_S_JMP_JA: + /* a jump must set masks on target */ + masks[pc + 1 + filter[pc].k] &= memvalid; + memvalid = ~0; + break; + case BPF_S_JMP_JEQ_K: + case BPF_S_JMP_JEQ_X: + case BPF_S_JMP_JGE_K: + case BPF_S_JMP_JGE_X: + case BPF_S_JMP_JGT_K: + case BPF_S_JMP_JGT_X: + case BPF_S_JMP_JSET_X: + case BPF_S_JMP_JSET_K: + /* a jump must set masks on targets */ + masks[pc + 1 + filter[pc].jt] &= memvalid; + masks[pc + 1 + filter[pc].jf] &= memvalid; + memvalid = ~0; + break; + } + } +error: + kfree(masks); + return ret; +} + /** * sk_chk_filter - verify socket filter code * @filter: filter to verify @@ -547,30 +608,23 @@ int sk_chk_filter(struct sock_filter *filter, int flen) switch (filter[flen - 1].code) { case BPF_S_RET_K: case BPF_S_RET_A: - return 0; + return check_load_and_stores(filter, flen); } return -EINVAL; } EXPORT_SYMBOL(sk_chk_filter); /** - * sk_filter_rcu_release - Release a socket filter by rcu_head + * sk_filter_release_rcu - Release a socket filter by rcu_head * @rcu: rcu_head that contains the sk_filter to free */ -static void sk_filter_rcu_release(struct rcu_head *rcu) +void sk_filter_release_rcu(struct rcu_head *rcu) { struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); - sk_filter_release(fp); -} - -static void sk_filter_delayed_uncharge(struct sock *sk, struct sk_filter *fp) -{ - unsigned int size = sk_filter_len(fp); - - atomic_sub(size, &sk->sk_omem_alloc); - call_rcu_bh(&fp->rcu, sk_filter_rcu_release); + kfree(fp); } +EXPORT_SYMBOL(sk_filter_release_rcu); /** * sk_attach_filter - attach a socket filter @@ -614,7 +668,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) rcu_assign_pointer(sk->sk_filter, fp); if (old_fp) - sk_filter_delayed_uncharge(sk, old_fp); + sk_filter_uncharge(sk, old_fp); return 0; } EXPORT_SYMBOL_GPL(sk_attach_filter); @@ -628,7 +682,7 @@ int sk_detach_filter(struct sock *sk) sock_owned_by_user(sk)); if (filter) { rcu_assign_pointer(sk->sk_filter, NULL); - sk_filter_delayed_uncharge(sk, filter); + sk_filter_uncharge(sk, filter); ret = 0; } return ret; diff --git a/net/core/request_sock.c b/net/core/request_sock.c index 41d99435f62d..182236b2510a 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -46,9 +46,7 @@ int reqsk_queue_alloc(struct request_sock_queue *queue, nr_table_entries = roundup_pow_of_two(nr_table_entries + 1); lopt_size += nr_table_entries * sizeof(struct request_sock *); if (lopt_size > PAGE_SIZE) - lopt = __vmalloc(lopt_size, - GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, - PAGE_KERNEL); + lopt = vzalloc(lopt_size); else lopt = kzalloc(lopt_size, GFP_KERNEL); if (lopt == NULL) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 104f8444754a..8814a9a52f47 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -778,6 +778,28 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, size = SKB_DATA_ALIGN(size); + /* Check if we can avoid taking references on fragments if we own + * the last reference on skb->head. (see skb_release_data()) + */ + if (!skb->cloned) + fastpath = true; + else { + int delta = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1; + + fastpath = atomic_read(&skb_shinfo(skb)->dataref) == delta; + } + + if (fastpath && + size + sizeof(struct skb_shared_info) <= ksize(skb->head)) { + memmove(skb->head + size, skb_shinfo(skb), + offsetof(struct skb_shared_info, + frags[skb_shinfo(skb)->nr_frags])); + memmove(skb->head + nhead, skb->head, + skb_tail_pointer(skb) - skb->head); + off = nhead; + goto adjust_others; + } + data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); if (!data) goto nodata; @@ -791,17 +813,6 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb_shinfo(skb), offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags])); - /* Check if we can avoid taking references on fragments if we own - * the last reference on skb->head. (see skb_release_data()) - */ - if (!skb->cloned) - fastpath = true; - else { - int delta = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1; - - fastpath = atomic_read(&skb_shinfo(skb)->dataref) == delta; - } - if (fastpath) { kfree(skb->head); } else { @@ -816,6 +827,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, off = (data + nhead) - skb->head; skb->head = data; +adjust_others: skb->data += off; #ifdef NET_SKBUFF_DATA_USES_OFFSET skb->end = size; diff --git a/net/core/sock.c b/net/core/sock.c index fb6080111461..bcdb6ff6e621 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -992,17 +992,18 @@ static inline void sock_lock_init(struct sock *sk) /* * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet, * even temporarly, because of RCU lookups. sk_node should also be left as is. + * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end */ static void sock_copy(struct sock *nsk, const struct sock *osk) { #ifdef CONFIG_SECURITY_NETWORK void *sptr = nsk->sk_security; #endif - BUILD_BUG_ON(offsetof(struct sock, sk_copy_start) != - sizeof(osk->sk_node) + sizeof(osk->sk_refcnt) + - sizeof(osk->sk_tx_queue_mapping)); - memcpy(&nsk->sk_copy_start, &osk->sk_copy_start, - osk->sk_prot->obj_size - offsetof(struct sock, sk_copy_start)); + memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin)); + + memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end, + osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end)); + #ifdef CONFIG_SECURITY_NETWORK nsk->sk_security = sptr; security_sk_clone(osk, nsk); diff --git a/net/core/timestamping.c b/net/core/timestamping.c index dac7ed687f60..b124d28ff1c8 100644 --- a/net/core/timestamping.c +++ b/net/core/timestamping.c @@ -26,7 +26,7 @@ static struct sock_filter ptp_filter[] = { PTP_FILTER }; -static unsigned int classify(struct sk_buff *skb) +static unsigned int classify(const struct sk_buff *skb) { if (likely(skb->dev && skb->dev->phydev && |