From fb3cfe6e75b9d05c87265e85e67d7caf6e5b44a7 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 24 Jul 2014 16:50:30 +0200 Subject: inet: frag: remove hash size assumptions from callers hide actual hash size from individual users: The _find function will now fold the given hash value into the required range. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv4/inet_fragment.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'net/ipv4/inet_fragment.c') diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 3b01959bf4bb..930d23870811 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -46,6 +46,12 @@ const u8 ip_frag_ecn_table[16] = { }; EXPORT_SYMBOL(ip_frag_ecn_table); +static unsigned int +inet_frag_hashfn(const struct inet_frags *f, const struct inet_frag_queue *q) +{ + return f->hashfn(q) & (INETFRAGS_HASHSZ - 1); +} + static void inet_frag_secret_rebuild(unsigned long dummy) { struct inet_frags *f = (struct inet_frags *)dummy; @@ -63,7 +69,7 @@ static void inet_frag_secret_rebuild(unsigned long dummy) hb = &f->hash[i]; hlist_for_each_entry_safe(q, n, &hb->chain, list) { - unsigned int hval = f->hashfn(q); + unsigned int hval = inet_frag_hashfn(f, q); if (hval != i) { struct inet_frag_bucket *hb_dest; @@ -133,7 +139,7 @@ static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) unsigned int hash; read_lock(&f->lock); - hash = f->hashfn(fq); + hash = inet_frag_hashfn(f, fq); hb = &f->hash[hash]; spin_lock(&hb->chain_lock); @@ -252,7 +258,7 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, * the rnd seed, so we need to re-calculate the hash * chain. Fortunatelly the qp_in can be used to get one. */ - hash = f->hashfn(qp_in); + hash = inet_frag_hashfn(f, qp_in); hb = &f->hash[hash]; spin_lock(&hb->chain_lock); @@ -326,6 +332,7 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, struct inet_frag_queue *q; int depth = 0; + hash &= (INETFRAGS_HASHSZ - 1); hb = &f->hash[hash]; spin_lock(&hb->chain_lock); -- cgit v1.2.3 From 86e93e470cadedda9181a2bd9aee1d9d2e5e9c0f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 24 Jul 2014 16:50:31 +0200 Subject: inet: frag: move evictor calls into frag_find function First step to move eviction handling into a work queue. We lose two spots that accounted evicted fragments in MIB counters. Accounting will be restored since the upcoming work-queue evictor invokes the frag queue timer callbacks instead. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/inet_frag.h | 1 - net/ieee802154/reassembly.c | 2 -- net/ipv4/inet_fragment.c | 16 +++++++++------- net/ipv4/ip_fragment.c | 15 --------------- net/ipv6/netfilter/nf_conntrack_reasm.c | 4 ---- net/ipv6/reassembly.c | 6 ------ 6 files changed, 9 insertions(+), 35 deletions(-) (limited to 'net/ipv4/inet_fragment.c') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 15033057d44e..9fe644d1a26e 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -90,7 +90,6 @@ void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f); void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f); void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, int *work); -int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force); struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, struct inet_frags *f, void *key, unsigned int hash) __releases(&f->lock); diff --git a/net/ieee802154/reassembly.c b/net/ieee802154/reassembly.c index a707995fd4d7..9503a48556f7 100644 --- a/net/ieee802154/reassembly.c +++ b/net/ieee802154/reassembly.c @@ -369,8 +369,6 @@ int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type) if (frag_info->d_size > ieee802154_lowpan->max_dsize) goto err; - inet_frag_evictor(&ieee802154_lowpan->frags, &lowpan_frags, false); - fq = fq_find(net, frag_info, &source, &dest); if (fq != NULL) { int ret; diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 930d23870811..535636017534 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -46,6 +46,8 @@ const u8 ip_frag_ecn_table[16] = { }; EXPORT_SYMBOL(ip_frag_ecn_table); +static int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force); + static unsigned int inet_frag_hashfn(const struct inet_frags *f, const struct inet_frag_queue *q) { @@ -203,16 +205,11 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, } EXPORT_SYMBOL(inet_frag_destroy); -int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force) +static int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force) { struct inet_frag_queue *q; int work, evicted = 0; - if (!force) { - if (frag_mem_limit(nf) <= nf->high_thresh) - return 0; - } - work = frag_mem_limit(nf) - nf->low_thresh; while (work > 0 || force) { spin_lock(&nf->lru_lock); @@ -242,7 +239,6 @@ int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force) return evicted; } -EXPORT_SYMBOL(inet_frag_evictor); static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, struct inet_frag_queue *qp_in, struct inet_frags *f, @@ -296,6 +292,9 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, { struct inet_frag_queue *q; + if (frag_mem_limit(nf) > nf->high_thresh) + return NULL; + q = kzalloc(f->qsize, GFP_ATOMIC); if (q == NULL) return NULL; @@ -332,6 +331,9 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, struct inet_frag_queue *q; int depth = 0; + if (frag_mem_limit(nf) > nf->high_thresh) + inet_frag_evictor(nf, f, false); + hash &= (INETFRAGS_HASHSZ - 1); hb = &f->hash[hash]; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index b769eb6c83c0..54988672d00d 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -177,18 +177,6 @@ static void ipq_kill(struct ipq *ipq) inet_frag_kill(&ipq->q, &ip4_frags); } -/* Memory limiting on fragments. Evictor trashes the oldest - * fragment queue until we are back under the threshold. - */ -static void ip_evictor(struct net *net) -{ - int evicted; - - evicted = inet_frag_evictor(&net->ipv4.frags, &ip4_frags, false); - if (evicted) - IP_ADD_STATS_BH(net, IPSTATS_MIB_REASMFAILS, evicted); -} - /* * Oops, a fragment queue timed out. Kill it and send an ICMP reply. */ @@ -655,9 +643,6 @@ int ip_defrag(struct sk_buff *skb, u32 user) net = skb->dev ? dev_net(skb->dev) : dev_net(skb_dst(skb)->dev); IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS); - /* Start by cleaning up the memory. */ - ip_evictor(net); - /* Lookup (or create) queue header */ if ((qp = ip_find(net, ip_hdr(skb), user)) != NULL) { int ret; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 607e4a94ef41..58e32cf91c95 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -594,10 +594,6 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user) hdr = ipv6_hdr(clone); fhdr = (struct frag_hdr *)skb_transport_header(clone); - local_bh_disable(); - inet_frag_evictor(&net->nf_frag.frags, &nf_frags, false); - local_bh_enable(); - fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr, ip6_frag_ecn(hdr)); if (fq == NULL) { diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 2b76549a1016..97acbc490d9e 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -519,7 +519,6 @@ static int ipv6_frag_rcv(struct sk_buff *skb) struct frag_queue *fq; const struct ipv6hdr *hdr = ipv6_hdr(skb); struct net *net = dev_net(skb_dst(skb)->dev); - int evicted; if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED) goto fail_hdr; @@ -548,11 +547,6 @@ static int ipv6_frag_rcv(struct sk_buff *skb) return 1; } - evicted = inet_frag_evictor(&net->ipv6.frags, &ip6_frags, false); - if (evicted) - IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_REASMFAILS, evicted); - fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr, ip6_frag_ecn(hdr)); if (fq != NULL) { -- cgit v1.2.3 From b13d3cbfb8e8a8f53930af67d1ebf05149f32c24 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 24 Jul 2014 16:50:32 +0200 Subject: inet: frag: move eviction of queues to work queue When the high_thresh limit is reached we try to toss the 'oldest' incomplete fragment queues until memory limits are below the low_thresh value. This happens in softirq/packet processing context. This has two drawbacks: 1) processors might evict a queue that was about to be completed by another cpu, because they will compete wrt. resource usage and resource reclaim. 2) LRU list maintenance is expensive. But when constantly overloaded, even the 'least recently used' element is recent, so removing 'lru' queue first is not 'fairer' than removing any other fragment queue. This moves eviction out of the fast path: When the low threshold is reached, a work queue is scheduled which then iterates over the table and removes the queues that exceed the memory limits of the namespace. It sets a new flag called INET_FRAG_EVICTED on the evicted queues so the proper counters will get incremented when the queue is forcefully expired. When the high threshold is reached, no more fragment queues are created until we're below the limit again. The LRU list is now unused and will be removed in a followup patch. Joint work with Nikolay Aleksandrov. Suggested-by: Eric Dumazet Signed-off-by: Florian Westphal Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 4 +- include/net/inet_frag.h | 6 +- net/ipv4/inet_fragment.c | 142 +++++++++++++++++++++++---------- net/ipv4/ip_fragment.c | 3 +- net/ipv6/reassembly.c | 4 +- 5 files changed, 112 insertions(+), 47 deletions(-) (limited to 'net/ipv4/inet_fragment.c') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index f35bfe43bf7a..625c8dda4be7 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -104,7 +104,9 @@ ipfrag_high_thresh - INTEGER is reached. ipfrag_low_thresh - INTEGER - See ipfrag_high_thresh + Maximum memory used to reassemble IP fragments before the kernel + begins to remove incomplete fragment queues to free up resources. + The kernel still accepts new fragments for defragmentation. ipfrag_time - INTEGER Time in seconds to keep an IP fragment in memory. diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 9fe644d1a26e..e975032ea11b 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -32,6 +32,7 @@ struct inet_frag_queue { int meat; __u8 last_in; /* first/last segment arrived? */ +#define INET_FRAG_EVICTED 8 #define INET_FRAG_COMPLETE 4 #define INET_FRAG_FIRST_IN 2 #define INET_FRAG_LAST_IN 1 @@ -48,7 +49,7 @@ struct inet_frag_queue { * rounded up (SKB_TRUELEN(0) + sizeof(struct ipq or * struct frag_queue)) */ -#define INETFRAGS_MAXDEPTH 128 +#define INETFRAGS_MAXDEPTH 128 struct inet_frag_bucket { struct hlist_head chain; @@ -65,6 +66,9 @@ struct inet_frags { int secret_interval; struct timer_list secret_timer; + struct work_struct frags_work; + unsigned int next_bucket; + /* The first call to hashfn is responsible to initialize * rnd. This is best done with net_get_random_once. */ diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 535636017534..43315ecb9400 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -25,6 +25,9 @@ #include #include +#define INETFRAGS_EVICT_BUCKETS 128 +#define INETFRAGS_EVICT_MAX 512 + /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements * Value : 0xff if frame should be dropped. * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field @@ -46,8 +49,6 @@ const u8 ip_frag_ecn_table[16] = { }; EXPORT_SYMBOL(ip_frag_ecn_table); -static int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force); - static unsigned int inet_frag_hashfn(const struct inet_frags *f, const struct inet_frag_queue *q) { @@ -89,10 +90,92 @@ static void inet_frag_secret_rebuild(unsigned long dummy) mod_timer(&f->secret_timer, now + f->secret_interval); } +static bool inet_fragq_should_evict(const struct inet_frag_queue *q) +{ + return q->net->low_thresh == 0 || + frag_mem_limit(q->net) >= q->net->low_thresh; +} + +static unsigned int +inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb) +{ + struct inet_frag_queue *fq; + struct hlist_node *n; + unsigned int evicted = 0; + HLIST_HEAD(expired); + +evict_again: + spin_lock(&hb->chain_lock); + + hlist_for_each_entry_safe(fq, n, &hb->chain, list) { + if (!inet_fragq_should_evict(fq)) + continue; + + if (!del_timer(&fq->timer)) { + /* q expiring right now thus increment its refcount so + * it won't be freed under us and wait until the timer + * has finished executing then destroy it + */ + atomic_inc(&fq->refcnt); + spin_unlock(&hb->chain_lock); + del_timer_sync(&fq->timer); + WARN_ON(atomic_read(&fq->refcnt) != 1); + inet_frag_put(fq, f); + goto evict_again; + } + + /* suppress xmit of (icmp) error packet */ + fq->last_in &= ~INET_FRAG_FIRST_IN; + fq->last_in |= INET_FRAG_EVICTED; + hlist_del(&fq->list); + hlist_add_head(&fq->list, &expired); + ++evicted; + } + + spin_unlock(&hb->chain_lock); + + hlist_for_each_entry_safe(fq, n, &expired, list) + f->frag_expire((unsigned long) fq); + + return evicted; +} + +static void inet_frag_worker(struct work_struct *work) +{ + unsigned int budget = INETFRAGS_EVICT_BUCKETS; + unsigned int i, evicted = 0; + struct inet_frags *f; + + f = container_of(work, struct inet_frags, frags_work); + + BUILD_BUG_ON(INETFRAGS_EVICT_BUCKETS >= INETFRAGS_HASHSZ); + + read_lock_bh(&f->lock); + + for (i = ACCESS_ONCE(f->next_bucket); budget; --budget) { + evicted += inet_evict_bucket(f, &f->hash[i]); + i = (i + 1) & (INETFRAGS_HASHSZ - 1); + if (evicted > INETFRAGS_EVICT_MAX) + break; + } + + f->next_bucket = i; + + read_unlock_bh(&f->lock); +} + +static void inet_frag_schedule_worker(struct inet_frags *f) +{ + if (unlikely(!work_pending(&f->frags_work))) + schedule_work(&f->frags_work); +} + void inet_frags_init(struct inet_frags *f) { int i; + INIT_WORK(&f->frags_work, inet_frag_worker); + for (i = 0; i < INETFRAGS_HASHSZ; i++) { struct inet_frag_bucket *hb = &f->hash[i]; @@ -120,16 +203,22 @@ EXPORT_SYMBOL(inet_frags_init_net); void inet_frags_fini(struct inet_frags *f) { del_timer(&f->secret_timer); + cancel_work_sync(&f->frags_work); } EXPORT_SYMBOL(inet_frags_fini); void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f) { + int i; + nf->low_thresh = 0; - local_bh_disable(); - inet_frag_evictor(nf, f, true); - local_bh_enable(); + read_lock_bh(&f->lock); + + for (i = 0; i < INETFRAGS_HASHSZ ; i++) + inet_evict_bucket(f, &f->hash[i]); + + read_unlock_bh(&f->lock); percpu_counter_destroy(&nf->mem); } @@ -205,41 +294,6 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, } EXPORT_SYMBOL(inet_frag_destroy); -static int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force) -{ - struct inet_frag_queue *q; - int work, evicted = 0; - - work = frag_mem_limit(nf) - nf->low_thresh; - while (work > 0 || force) { - spin_lock(&nf->lru_lock); - - if (list_empty(&nf->lru_list)) { - spin_unlock(&nf->lru_lock); - break; - } - - q = list_first_entry(&nf->lru_list, - struct inet_frag_queue, lru_list); - atomic_inc(&q->refcnt); - /* Remove q from list to avoid several CPUs grabbing it */ - list_del_init(&q->lru_list); - - spin_unlock(&nf->lru_lock); - - spin_lock(&q->lock); - if (!(q->last_in & INET_FRAG_COMPLETE)) - inet_frag_kill(q, f); - spin_unlock(&q->lock); - - if (atomic_dec_and_test(&q->refcnt)) - inet_frag_destroy(q, f, &work); - evicted++; - } - - return evicted; -} - static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, struct inet_frag_queue *qp_in, struct inet_frags *f, void *arg) @@ -292,8 +346,10 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, { struct inet_frag_queue *q; - if (frag_mem_limit(nf) > nf->high_thresh) + if (frag_mem_limit(nf) > nf->high_thresh) { + inet_frag_schedule_worker(f); return NULL; + } q = kzalloc(f->qsize, GFP_ATOMIC); if (q == NULL) @@ -331,8 +387,8 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, struct inet_frag_queue *q; int depth = 0; - if (frag_mem_limit(nf) > nf->high_thresh) - inet_frag_evictor(nf, f, false); + if (frag_mem_limit(nf) > nf->low_thresh) + inet_frag_schedule_worker(f); hash &= (INETFRAGS_HASHSZ - 1); hb = &f->hash[hash]; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 54988672d00d..54bd170c5eb4 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -195,7 +195,8 @@ static void ip_expire(unsigned long arg) ipq_kill(qp); - IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT); + if (!(qp->q.last_in & INET_FRAG_EVICTED)) + IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT); IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) { diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 97acbc490d9e..b3924b10dff3 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -141,7 +141,9 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq, if (!dev) goto out_rcu_unlock; - IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); + if (!(fq->q.last_in & INET_FRAG_EVICTED)) + IP6_INC_STATS_BH(net, __in6_dev_get(dev), + IPSTATS_MIB_REASMTIMEOUT); IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); /* Don't send error if the first segment did not arrive. */ -- cgit v1.2.3 From 434d305405ab86414f6ea3f261307d443a2c3506 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 24 Jul 2014 16:50:33 +0200 Subject: inet: frag: don't account number of fragment queues The 'nqueues' counter is protected by the lru list lock, once thats removed this needs to be converted to atomic counter. Given this isn't used for anything except for reporting it to userspace via /proc, just remove it. We still report the memory currently used by fragment reassembly queues. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/inet_frag.h | 3 --- include/net/ip.h | 1 - include/net/ipv6.h | 5 ----- net/ipv4/inet_fragment.c | 1 - net/ipv4/ip_fragment.c | 5 ----- net/ipv4/proc.c | 5 +++-- net/ipv6/proc.c | 4 ++-- 7 files changed, 5 insertions(+), 19 deletions(-) (limited to 'net/ipv4/inet_fragment.c') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index e975032ea11b..68de33765705 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -4,7 +4,6 @@ #include struct netns_frags { - int nqueues; struct list_head lru_list; spinlock_t lru_lock; @@ -158,7 +157,6 @@ static inline void inet_frag_lru_del(struct inet_frag_queue *q) { spin_lock(&q->net->lru_lock); list_del_init(&q->lru_list); - q->net->nqueues--; spin_unlock(&q->net->lru_lock); } @@ -167,7 +165,6 @@ static inline void inet_frag_lru_add(struct netns_frags *nf, { spin_lock(&nf->lru_lock); list_add_tail(&q->lru_list, &nf->lru_list); - q->net->nqueues++; spin_unlock(&nf->lru_lock); } diff --git a/include/net/ip.h b/include/net/ip.h index 2e8f055989c3..ca14799545fd 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -495,7 +495,6 @@ static inline struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user) } #endif int ip_frag_mem(struct net *net); -int ip_frag_nqueues(struct net *net); /* * Functions provided by ip_forward.c diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 25c2170e1298..a2db816e8461 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -299,11 +299,6 @@ static inline bool ipv6_accept_ra(struct inet6_dev *idev) } #if IS_ENABLED(CONFIG_IPV6) -static inline int ip6_frag_nqueues(struct net *net) -{ - return net->ipv6.frags.nqueues; -} - static inline int ip6_frag_mem(struct net *net) { return sum_frag_mem_limit(&net->ipv6.frags); diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 43315ecb9400..231ca0b40811 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -193,7 +193,6 @@ EXPORT_SYMBOL(inet_frags_init); void inet_frags_init_net(struct netns_frags *nf) { - nf->nqueues = 0; init_frag_mem_limit(nf); INIT_LIST_HEAD(&nf->lru_list); spin_lock_init(&nf->lru_lock); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 54bd170c5eb4..1f42c2e3966b 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -86,11 +86,6 @@ static inline u8 ip4_frag_ecn(u8 tos) static struct inet_frags ip4_frags; -int ip_frag_nqueues(struct net *net) -{ - return net->ipv4.frags.nqueues; -} - int ip_frag_mem(struct net *net) { return sum_frag_mem_limit(&net->ipv4.frags); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index ae0af9386f7c..8e3eb39f84e7 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -52,6 +52,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) { struct net *net = seq->private; + unsigned int frag_mem; int orphans, sockets; local_bh_disable(); @@ -71,8 +72,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) sock_prot_inuse_get(net, &udplite_prot)); seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse_get(net, &raw_prot)); - seq_printf(seq, "FRAG: inuse %d memory %d\n", - ip_frag_nqueues(net), ip_frag_mem(net)); + frag_mem = ip_frag_mem(net); + seq_printf(seq, "FRAG: inuse %u memory %u\n", !!frag_mem, frag_mem); return 0; } diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 3317440ea341..2d6f860e5c1e 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -33,6 +33,7 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v) { struct net *net = seq->private; + unsigned int frag_mem = ip6_frag_mem(net); seq_printf(seq, "TCP6: inuse %d\n", sock_prot_inuse_get(net, &tcpv6_prot)); @@ -42,8 +43,7 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v) sock_prot_inuse_get(net, &udplitev6_prot)); seq_printf(seq, "RAW6: inuse %d\n", sock_prot_inuse_get(net, &rawv6_prot)); - seq_printf(seq, "FRAG6: inuse %d memory %d\n", - ip6_frag_nqueues(net), ip6_frag_mem(net)); + seq_printf(seq, "FRAG6: inuse %u memory %u\n", !!frag_mem, frag_mem); return 0; } -- cgit v1.2.3 From 3fd588eb90bfbba17091381006ecafe29c45db4a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 24 Jul 2014 16:50:34 +0200 Subject: inet: frag: remove lru list no longer used. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/inet_frag.h | 32 ++------------------------------ net/ieee802154/reassembly.c | 1 - net/ipv4/inet_fragment.c | 12 ++---------- net/ipv4/ip_fragment.c | 1 - net/ipv6/netfilter/nf_conntrack_reasm.c | 1 - net/ipv6/reassembly.c | 1 - 6 files changed, 4 insertions(+), 44 deletions(-) (limited to 'net/ipv4/inet_fragment.c') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 68de33765705..90d21ea62c59 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -4,9 +4,6 @@ #include struct netns_frags { - struct list_head lru_list; - spinlock_t lru_lock; - /* The percpu_counter "mem" need to be cacheline aligned. * mem.count must not share cacheline with other writers */ @@ -21,7 +18,6 @@ struct netns_frags { struct inet_frag_queue { spinlock_t lock; struct timer_list timer; /* when will this queue expire? */ - struct list_head lru_list; /* lru list member */ struct hlist_node list; atomic_t refcnt; struct sk_buff *fragments; /* list of received fragments */ @@ -91,8 +87,7 @@ void inet_frags_init_net(struct netns_frags *nf); void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f); void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f); -void inet_frag_destroy(struct inet_frag_queue *q, - struct inet_frags *f, int *work); +void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f); struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, struct inet_frags *f, void *key, unsigned int hash) __releases(&f->lock); @@ -102,7 +97,7 @@ void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q, static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f) { if (atomic_dec_and_test(&q->refcnt)) - inet_frag_destroy(q, f, NULL); + inet_frag_destroy(q, f); } /* Memory Tracking Functions. */ @@ -145,29 +140,6 @@ static inline unsigned int sum_frag_mem_limit(struct netns_frags *nf) return res; } -static inline void inet_frag_lru_move(struct inet_frag_queue *q) -{ - spin_lock(&q->net->lru_lock); - if (!list_empty(&q->lru_list)) - list_move_tail(&q->lru_list, &q->net->lru_list); - spin_unlock(&q->net->lru_lock); -} - -static inline void inet_frag_lru_del(struct inet_frag_queue *q) -{ - spin_lock(&q->net->lru_lock); - list_del_init(&q->lru_list); - spin_unlock(&q->net->lru_lock); -} - -static inline void inet_frag_lru_add(struct netns_frags *nf, - struct inet_frag_queue *q) -{ - spin_lock(&nf->lru_lock); - list_add_tail(&q->lru_list, &nf->lru_list); - spin_unlock(&nf->lru_lock); -} - /* RFC 3168 support : * We want to check ECN values of all fragments, do detect invalid combinations. * In ipq->ecn, we store the OR value of each ip4_frag_ecn() fragment value. diff --git a/net/ieee802154/reassembly.c b/net/ieee802154/reassembly.c index 9503a48556f7..b4bc7a50eccf 100644 --- a/net/ieee802154/reassembly.c +++ b/net/ieee802154/reassembly.c @@ -219,7 +219,6 @@ found: return res; } - inet_frag_lru_move(&fq->q); return -1; err: kfree_skb(skb); diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 231ca0b40811..198a5ed7a815 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -194,8 +194,6 @@ EXPORT_SYMBOL(inet_frags_init); void inet_frags_init_net(struct netns_frags *nf) { init_frag_mem_limit(nf); - INIT_LIST_HEAD(&nf->lru_list); - spin_lock_init(&nf->lru_lock); } EXPORT_SYMBOL(inet_frags_init_net); @@ -237,7 +235,6 @@ static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) spin_unlock(&hb->chain_lock); read_unlock(&f->lock); - inet_frag_lru_del(fq); } void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) @@ -261,8 +258,7 @@ static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f, kfree_skb(skb); } -void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, - int *work) +void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f) { struct sk_buff *fp; struct netns_frags *nf; @@ -282,14 +278,11 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, fp = xp; } sum = sum_truesize + f->qsize; - if (work) - *work -= sum; sub_frag_mem_limit(q, sum); if (f->destructor) f->destructor(q); kfree(q); - } EXPORT_SYMBOL(inet_frag_destroy); @@ -333,7 +326,7 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, atomic_inc(&qp->refcnt); hlist_add_head(&qp->list, &hb->chain); - inet_frag_lru_add(nf, qp); + spin_unlock(&hb->chain_lock); read_unlock(&f->lock); @@ -361,7 +354,6 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, setup_timer(&q->timer, f->frag_expire, (unsigned long)q); spin_lock_init(&q->lock); atomic_set(&q->refcnt, 1); - INIT_LIST_HEAD(&q->lru_list); return q; } diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 1f42c2e3966b..8fbeee495037 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -489,7 +489,6 @@ found: } skb_dst_drop(skb); - inet_frag_lru_move(&qp->q); return -EINPROGRESS; err: diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 58e32cf91c95..fb0f72a0ff31 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -349,7 +349,6 @@ found: fq->q.last_in |= INET_FRAG_FIRST_IN; } - inet_frag_lru_move(&fq->q); return 0; discard_fq: diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index b3924b10dff3..af85551682c2 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -351,7 +351,6 @@ found: } skb_dst_drop(skb); - inet_frag_lru_move(&fq->q); return -1; discard_fq: -- cgit v1.2.3 From e3a57d18b06179d68fcf7a0a06ad844493c65e06 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 24 Jul 2014 16:50:35 +0200 Subject: inet: frag: remove periodic secret rebuild timer merge functionality into the eviction workqueue. Instead of rebuilding every n seconds, take advantage of the upper hash chain length limit. If we hit it, mark table for rebuild and schedule workqueue. To prevent frequent rebuilds when we're completely overloaded, don't rebuild more than once every 5 seconds. ipfrag_secret_interval sysctl is now obsolete and has been marked as deprecated, it still can be changed so scripts won't be broken but it won't have any effect. A comment is left above each unused secret_timer variable to avoid confusion. Joint work with Nikolay Aleksandrov. Signed-off-by: Florian Westphal Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 10 -------- include/net/inet_frag.h | 4 +-- net/ieee802154/reassembly.c | 5 ++-- net/ipv4/inet_fragment.c | 43 ++++++++++++++++++++++----------- net/ipv4/ip_fragment.c | 5 ++-- net/ipv6/netfilter/nf_conntrack_reasm.c | 1 - net/ipv6/reassembly.c | 5 ++-- 7 files changed, 40 insertions(+), 33 deletions(-) (limited to 'net/ipv4/inet_fragment.c') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 625c8dda4be7..e8c304e37831 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -111,11 +111,6 @@ ipfrag_low_thresh - INTEGER ipfrag_time - INTEGER Time in seconds to keep an IP fragment in memory. -ipfrag_secret_interval - INTEGER - Regeneration interval (in seconds) of the hash secret (or lifetime - for the hash secret) for IP fragments. - Default: 600 - ipfrag_max_dist - INTEGER ipfrag_max_dist is a non-negative integer value which defines the maximum "disorder" which is allowed among fragments which share a @@ -1164,11 +1159,6 @@ ip6frag_low_thresh - INTEGER ip6frag_time - INTEGER Time in seconds to keep an IPv6 fragment in memory. -ip6frag_secret_interval - INTEGER - Regeneration interval (in seconds) of the hash secret (or lifetime - for the hash secret) for IPv6 fragments. - Default: 600 - conf/default/*: Change the interface-specific default settings. diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 90d21ea62c59..d9cc5bb64854 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -58,11 +58,11 @@ struct inet_frags { * Its primarily a rebuild protection rwlock. */ rwlock_t lock ____cacheline_aligned_in_smp; - int secret_interval; - struct timer_list secret_timer; struct work_struct frags_work; unsigned int next_bucket; + unsigned long last_rebuild_jiffies; + bool rebuild; /* The first call to hashfn is responsible to initialize * rnd. This is best done with net_get_random_once. diff --git a/net/ieee802154/reassembly.c b/net/ieee802154/reassembly.c index b4bc7a50eccf..20d219682d84 100644 --- a/net/ieee802154/reassembly.c +++ b/net/ieee802154/reassembly.c @@ -419,10 +419,12 @@ static struct ctl_table lowpan_frags_ns_ctl_table[] = { { } }; +/* secret interval has been deprecated */ +static int lowpan_frags_secret_interval_unused; static struct ctl_table lowpan_frags_ctl_table[] = { { .procname = "6lowpanfrag_secret_interval", - .data = &lowpan_frags.secret_interval, + .data = &lowpan_frags_secret_interval_unused, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -562,7 +564,6 @@ int __init lowpan_net_frag_init(void) lowpan_frags.qsize = sizeof(struct frag_queue); lowpan_frags.match = lowpan_frag_match; lowpan_frags.frag_expire = lowpan_frag_expire; - lowpan_frags.secret_interval = 10 * 60 * HZ; inet_frags_init(&lowpan_frags); return ret; diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 198a5ed7a815..58d4c38534f6 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -28,6 +28,9 @@ #define INETFRAGS_EVICT_BUCKETS 128 #define INETFRAGS_EVICT_MAX 512 +/* don't rebuild inetfrag table with new secret more often than this */ +#define INETFRAGS_MIN_REBUILD_INTERVAL (5 * HZ) + /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements * Value : 0xff if frame should be dropped. * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field @@ -55,16 +58,24 @@ inet_frag_hashfn(const struct inet_frags *f, const struct inet_frag_queue *q) return f->hashfn(q) & (INETFRAGS_HASHSZ - 1); } -static void inet_frag_secret_rebuild(unsigned long dummy) +static bool inet_frag_may_rebuild(struct inet_frags *f) +{ + return time_after(jiffies, + f->last_rebuild_jiffies + INETFRAGS_MIN_REBUILD_INTERVAL); +} + +static void inet_frag_secret_rebuild(struct inet_frags *f) { - struct inet_frags *f = (struct inet_frags *)dummy; - unsigned long now = jiffies; int i; /* Per bucket lock NOT needed here, due to write lock protection */ - write_lock(&f->lock); + write_lock_bh(&f->lock); + + if (!inet_frag_may_rebuild(f)) + goto out; get_random_bytes(&f->rnd, sizeof(u32)); + for (i = 0; i < INETFRAGS_HASHSZ; i++) { struct inet_frag_bucket *hb; struct inet_frag_queue *q; @@ -85,9 +96,11 @@ static void inet_frag_secret_rebuild(unsigned long dummy) } } } - write_unlock(&f->lock); - mod_timer(&f->secret_timer, now + f->secret_interval); + f->rebuild = false; + f->last_rebuild_jiffies = jiffies; +out: + write_unlock_bh(&f->lock); } static bool inet_fragq_should_evict(const struct inet_frag_queue *q) @@ -162,6 +175,8 @@ static void inet_frag_worker(struct work_struct *work) f->next_bucket = i; read_unlock_bh(&f->lock); + if (f->rebuild && inet_frag_may_rebuild(f)) + inet_frag_secret_rebuild(f); } static void inet_frag_schedule_worker(struct inet_frags *f) @@ -183,11 +198,7 @@ void inet_frags_init(struct inet_frags *f) INIT_HLIST_HEAD(&hb->chain); } rwlock_init(&f->lock); - - setup_timer(&f->secret_timer, inet_frag_secret_rebuild, - (unsigned long)f); - f->secret_timer.expires = jiffies + f->secret_interval; - add_timer(&f->secret_timer); + f->last_rebuild_jiffies = 0; } EXPORT_SYMBOL(inet_frags_init); @@ -199,7 +210,6 @@ EXPORT_SYMBOL(inet_frags_init_net); void inet_frags_fini(struct inet_frags *f) { - del_timer(&f->secret_timer); cancel_work_sync(&f->frags_work); } EXPORT_SYMBOL(inet_frags_fini); @@ -399,8 +409,13 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, if (depth <= INETFRAGS_MAXDEPTH) return inet_frag_create(nf, f, key); - else - return ERR_PTR(-ENOBUFS); + + if (inet_frag_may_rebuild(f)) { + f->rebuild = true; + inet_frag_schedule_worker(f); + } + + return ERR_PTR(-ENOBUFS); } EXPORT_SYMBOL(inet_frag_find); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 8fbeee495037..44e591a7e03f 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -720,10 +720,12 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = { { } }; +/* secret interval has been deprecated */ +static int ip4_frags_secret_interval_unused; static struct ctl_table ip4_frags_ctl_table[] = { { .procname = "ipfrag_secret_interval", - .data = &ip4_frags.secret_interval, + .data = &ip4_frags_secret_interval_unused, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -853,6 +855,5 @@ void __init ipfrag_init(void) ip4_frags.qsize = sizeof(struct ipq); ip4_frags.match = ip4_frag_match; ip4_frags.frag_expire = ip_expire; - ip4_frags.secret_interval = 10 * 60 * HZ; inet_frags_init(&ip4_frags); } diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index fb0f72a0ff31..3b3ef9774cc2 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -669,7 +669,6 @@ int nf_ct_frag6_init(void) nf_frags.qsize = sizeof(struct frag_queue); nf_frags.match = ip6_frag_match; nf_frags.frag_expire = nf_ct_frag6_expire; - nf_frags.secret_interval = 10 * 60 * HZ; inet_frags_init(&nf_frags); ret = register_pernet_subsys(&nf_ct_net_ops); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index af85551682c2..987fea46b915 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -604,10 +604,12 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = { { } }; +/* secret interval has been deprecated */ +static int ip6_frags_secret_interval_unused; static struct ctl_table ip6_frags_ctl_table[] = { { .procname = "ip6frag_secret_interval", - .data = &ip6_frags.secret_interval, + .data = &ip6_frags_secret_interval_unused, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -737,7 +739,6 @@ int __init ipv6_frag_init(void) ip6_frags.qsize = sizeof(struct frag_queue); ip6_frags.match = ip6_frag_match; ip6_frags.frag_expire = ip6_frag_expire; - ip6_frags.secret_interval = 10 * 60 * HZ; inet_frags_init(&ip6_frags); out: return ret; -- cgit v1.2.3 From ab1c724f633080ed2e8a0cfe61654599b55cf8f9 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 24 Jul 2014 16:50:36 +0200 Subject: inet: frag: use seqlock for hash rebuild rehash is rare operation, don't force readers to take the read-side rwlock. Instead, we only have to detect the (rare) case where the secret was altered while we are trying to insert a new inetfrag queue into the table. If it was changed, drop the bucket lock and recompute the hash to get the 'new' chain bucket that we have to insert into. Joint work with Nikolay Aleksandrov. Signed-off-by: Florian Westphal Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/net/inet_frag.h | 13 +++-- net/ieee802154/reassembly.c | 1 - net/ipv4/inet_fragment.c | 88 ++++++++++++++++++++------------- net/ipv4/ip_fragment.c | 1 - net/ipv6/netfilter/nf_conntrack_reasm.c | 2 +- net/ipv6/reassembly.c | 1 - 6 files changed, 62 insertions(+), 44 deletions(-) (limited to 'net/ipv4/inet_fragment.c') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index d9cc5bb64854..6f4930a0b660 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -53,11 +53,6 @@ struct inet_frag_bucket { struct inet_frags { struct inet_frag_bucket hash[INETFRAGS_HASHSZ]; - /* This rwlock is a global lock (seperate per IPv4, IPv6 and - * netfilter). Important to keep this on a seperate cacheline. - * Its primarily a rebuild protection rwlock. - */ - rwlock_t lock ____cacheline_aligned_in_smp; struct work_struct frags_work; unsigned int next_bucket; @@ -66,8 +61,12 @@ struct inet_frags { /* The first call to hashfn is responsible to initialize * rnd. This is best done with net_get_random_once. + * + * rnd_seqlock is used to let hash insertion detect + * when it needs to re-lookup the hash chain to use. */ u32 rnd; + seqlock_t rnd_seqlock; int qsize; unsigned int (*hashfn)(const struct inet_frag_queue *); @@ -89,8 +88,8 @@ void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f); void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f); void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f); struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, - struct inet_frags *f, void *key, unsigned int hash) - __releases(&f->lock); + struct inet_frags *f, void *key, unsigned int hash); + void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q, const char *prefix); diff --git a/net/ieee802154/reassembly.c b/net/ieee802154/reassembly.c index 20d219682d84..8da635d92a58 100644 --- a/net/ieee802154/reassembly.c +++ b/net/ieee802154/reassembly.c @@ -124,7 +124,6 @@ fq_find(struct net *net, const struct lowpan_frag_info *frag_info, arg.src = src; arg.dst = dst; - read_lock(&lowpan_frags.lock); hash = lowpan_hash_frag(frag_info->d_tag, frag_info->d_size, src, dst); q = inet_frag_find(&ieee802154_lowpan->frags, diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 58d4c38534f6..62b1f73749dc 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -68,8 +68,7 @@ static void inet_frag_secret_rebuild(struct inet_frags *f) { int i; - /* Per bucket lock NOT needed here, due to write lock protection */ - write_lock_bh(&f->lock); + write_seqlock_bh(&f->rnd_seqlock); if (!inet_frag_may_rebuild(f)) goto out; @@ -82,6 +81,8 @@ static void inet_frag_secret_rebuild(struct inet_frags *f) struct hlist_node *n; hb = &f->hash[i]; + spin_lock(&hb->chain_lock); + hlist_for_each_entry_safe(q, n, &hb->chain, list) { unsigned int hval = inet_frag_hashfn(f, q); @@ -92,15 +93,28 @@ static void inet_frag_secret_rebuild(struct inet_frags *f) /* Relink to new hash chain. */ hb_dest = &f->hash[hval]; + + /* This is the only place where we take + * another chain_lock while already holding + * one. As this will not run concurrently, + * we cannot deadlock on hb_dest lock below, if its + * already locked it will be released soon since + * other caller cannot be waiting for hb lock + * that we've taken above. + */ + spin_lock_nested(&hb_dest->chain_lock, + SINGLE_DEPTH_NESTING); hlist_add_head(&q->list, &hb_dest->chain); + spin_unlock(&hb_dest->chain_lock); } } + spin_unlock(&hb->chain_lock); } f->rebuild = false; f->last_rebuild_jiffies = jiffies; out: - write_unlock_bh(&f->lock); + write_sequnlock_bh(&f->rnd_seqlock); } static bool inet_fragq_should_evict(const struct inet_frag_queue *q) @@ -163,7 +177,7 @@ static void inet_frag_worker(struct work_struct *work) BUILD_BUG_ON(INETFRAGS_EVICT_BUCKETS >= INETFRAGS_HASHSZ); - read_lock_bh(&f->lock); + local_bh_disable(); for (i = ACCESS_ONCE(f->next_bucket); budget; --budget) { evicted += inet_evict_bucket(f, &f->hash[i]); @@ -174,7 +188,8 @@ static void inet_frag_worker(struct work_struct *work) f->next_bucket = i; - read_unlock_bh(&f->lock); + local_bh_enable(); + if (f->rebuild && inet_frag_may_rebuild(f)) inet_frag_secret_rebuild(f); } @@ -197,7 +212,8 @@ void inet_frags_init(struct inet_frags *f) spin_lock_init(&hb->chain_lock); INIT_HLIST_HEAD(&hb->chain); } - rwlock_init(&f->lock); + + seqlock_init(&f->rnd_seqlock); f->last_rebuild_jiffies = 0; } EXPORT_SYMBOL(inet_frags_init); @@ -216,35 +232,56 @@ EXPORT_SYMBOL(inet_frags_fini); void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f) { + unsigned int seq; int i; nf->low_thresh = 0; + local_bh_disable(); - read_lock_bh(&f->lock); +evict_again: + seq = read_seqbegin(&f->rnd_seqlock); for (i = 0; i < INETFRAGS_HASHSZ ; i++) inet_evict_bucket(f, &f->hash[i]); - read_unlock_bh(&f->lock); + if (read_seqretry(&f->rnd_seqlock, seq)) + goto evict_again; + + local_bh_enable(); percpu_counter_destroy(&nf->mem); } EXPORT_SYMBOL(inet_frags_exit_net); -static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) +static struct inet_frag_bucket * +get_frag_bucket_locked(struct inet_frag_queue *fq, struct inet_frags *f) +__acquires(hb->chain_lock) { struct inet_frag_bucket *hb; - unsigned int hash; + unsigned int seq, hash; + + restart: + seq = read_seqbegin(&f->rnd_seqlock); - read_lock(&f->lock); hash = inet_frag_hashfn(f, fq); hb = &f->hash[hash]; spin_lock(&hb->chain_lock); + if (read_seqretry(&f->rnd_seqlock, seq)) { + spin_unlock(&hb->chain_lock); + goto restart; + } + + return hb; +} + +static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) +{ + struct inet_frag_bucket *hb; + + hb = get_frag_bucket_locked(fq, f); hlist_del(&fq->list); spin_unlock(&hb->chain_lock); - - read_unlock(&f->lock); } void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) @@ -300,30 +337,18 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, struct inet_frag_queue *qp_in, struct inet_frags *f, void *arg) { - struct inet_frag_bucket *hb; + struct inet_frag_bucket *hb = get_frag_bucket_locked(qp_in, f); struct inet_frag_queue *qp; - unsigned int hash; - - read_lock(&f->lock); /* Protects against hash rebuild */ - /* - * While we stayed w/o the lock other CPU could update - * the rnd seed, so we need to re-calculate the hash - * chain. Fortunatelly the qp_in can be used to get one. - */ - hash = inet_frag_hashfn(f, qp_in); - hb = &f->hash[hash]; - spin_lock(&hb->chain_lock); #ifdef CONFIG_SMP /* With SMP race we have to recheck hash table, because - * such entry could be created on other cpu, while we - * released the hash bucket lock. + * such entry could have been created on other cpu before + * we acquired hash bucket lock. */ hlist_for_each_entry(qp, &hb->chain, list) { if (qp->net == nf && f->match(qp, arg)) { atomic_inc(&qp->refcnt); spin_unlock(&hb->chain_lock); - read_unlock(&f->lock); qp_in->last_in |= INET_FRAG_COMPLETE; inet_frag_put(qp_in, f); return qp; @@ -338,7 +363,6 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, hlist_add_head(&qp->list, &hb->chain); spin_unlock(&hb->chain_lock); - read_unlock(&f->lock); return qp; } @@ -382,7 +406,6 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, struct inet_frags *f, void *key, unsigned int hash) - __releases(&f->lock) { struct inet_frag_bucket *hb; struct inet_frag_queue *q; @@ -399,19 +422,18 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, if (q->net == nf && f->match(q, key)) { atomic_inc(&q->refcnt); spin_unlock(&hb->chain_lock); - read_unlock(&f->lock); return q; } depth++; } spin_unlock(&hb->chain_lock); - read_unlock(&f->lock); if (depth <= INETFRAGS_MAXDEPTH) return inet_frag_create(nf, f, key); if (inet_frag_may_rebuild(f)) { - f->rebuild = true; + if (!f->rebuild) + f->rebuild = true; inet_frag_schedule_worker(f); } diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 44e591a7e03f..ccee68dffd6e 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -244,7 +244,6 @@ static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user) arg.iph = iph; arg.user = user; - read_lock(&ip4_frags.lock); hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 3b3ef9774cc2..4d9da1e35f8c 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -193,7 +193,7 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id, arg.dst = dst; arg.ecn = ecn; - read_lock_bh(&nf_frags.lock); + local_bh_disable(); hash = nf_hash_frag(id, src, dst); q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 987fea46b915..57a9707b2032 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -190,7 +190,6 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src, arg.dst = dst; arg.ecn = ecn; - read_lock(&ip6_frags.lock); hash = inet6_hash_frag(id, src, dst); q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash); -- cgit v1.2.3 From 06aa8b8a0345c78f4d9a1fb3f852952b12a0e40c Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 1 Aug 2014 12:29:44 +0200 Subject: inet: frags: rename last_in to flags The last_in field has been used to store various flags different from first/last frag in so give it a more descriptive name: flags. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/net/inet_frag.h | 2 +- net/ieee802154/reassembly.c | 14 +++++++------- net/ipv4/inet_fragment.c | 14 +++++++------- net/ipv4/ip_fragment.c | 20 ++++++++++---------- net/ipv6/netfilter/nf_conntrack_reasm.c | 12 ++++++------ net/ipv6/reassembly.c | 18 +++++++++--------- 6 files changed, 40 insertions(+), 40 deletions(-) (limited to 'net/ipv4/inet_fragment.c') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 6f4930a0b660..5024d6c20407 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -25,7 +25,7 @@ struct inet_frag_queue { ktime_t stamp; int len; /* total length of orig datagram */ int meat; - __u8 last_in; /* first/last segment arrived? */ + __u8 flags; /* first/last segment arrived? */ #define INET_FRAG_EVICTED 8 #define INET_FRAG_COMPLETE 4 diff --git a/net/ieee802154/reassembly.c b/net/ieee802154/reassembly.c index f13d4f32e207..5607accd2fee 100644 --- a/net/ieee802154/reassembly.c +++ b/net/ieee802154/reassembly.c @@ -99,7 +99,7 @@ static void lowpan_frag_expire(unsigned long data) spin_lock(&fq->q.lock); - if (fq->q.last_in & INET_FRAG_COMPLETE) + if (fq->q.flags & INET_FRAG_COMPLETE) goto out; inet_frag_kill(&fq->q, &lowpan_frags); @@ -142,7 +142,7 @@ static int lowpan_frag_queue(struct lowpan_frag_queue *fq, struct net_device *dev; int end, offset; - if (fq->q.last_in & INET_FRAG_COMPLETE) + if (fq->q.flags & INET_FRAG_COMPLETE) goto err; offset = lowpan_cb(skb)->d_offset << 3; @@ -154,14 +154,14 @@ static int lowpan_frag_queue(struct lowpan_frag_queue *fq, * or have different end, the segment is corrupted. */ if (end < fq->q.len || - ((fq->q.last_in & INET_FRAG_LAST_IN) && end != fq->q.len)) + ((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len)) goto err; - fq->q.last_in |= INET_FRAG_LAST_IN; + fq->q.flags |= INET_FRAG_LAST_IN; fq->q.len = end; } else { if (end > fq->q.len) { /* Some bits beyond end -> corruption. */ - if (fq->q.last_in & INET_FRAG_LAST_IN) + if (fq->q.flags & INET_FRAG_LAST_IN) goto err; fq->q.len = end; } @@ -201,13 +201,13 @@ found: if (frag_type == LOWPAN_DISPATCH_FRAG1) { /* Calculate uncomp. 6lowpan header to estimate full size */ fq->q.meat += lowpan_uncompress_size(skb, NULL); - fq->q.last_in |= INET_FRAG_FIRST_IN; + fq->q.flags |= INET_FRAG_FIRST_IN; } else { fq->q.meat += skb->len; } add_frag_mem_limit(&fq->q, skb->truesize); - if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && + if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && fq->q.meat == fq->q.len) { int res; unsigned long orefdst = skb->_skb_refdst; diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 62b1f73749dc..e3ebc6608e5d 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -152,8 +152,8 @@ evict_again: } /* suppress xmit of (icmp) error packet */ - fq->last_in &= ~INET_FRAG_FIRST_IN; - fq->last_in |= INET_FRAG_EVICTED; + fq->flags &= ~INET_FRAG_FIRST_IN; + fq->flags |= INET_FRAG_EVICTED; hlist_del(&fq->list); hlist_add_head(&fq->list, &expired); ++evicted; @@ -289,16 +289,16 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) if (del_timer(&fq->timer)) atomic_dec(&fq->refcnt); - if (!(fq->last_in & INET_FRAG_COMPLETE)) { + if (!(fq->flags & INET_FRAG_COMPLETE)) { fq_unlink(fq, f); atomic_dec(&fq->refcnt); - fq->last_in |= INET_FRAG_COMPLETE; + fq->flags |= INET_FRAG_COMPLETE; } } EXPORT_SYMBOL(inet_frag_kill); static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f, - struct sk_buff *skb) + struct sk_buff *skb) { if (f->skb_free) f->skb_free(skb); @@ -311,7 +311,7 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f) struct netns_frags *nf; unsigned int sum, sum_truesize = 0; - WARN_ON(!(q->last_in & INET_FRAG_COMPLETE)); + WARN_ON(!(q->flags & INET_FRAG_COMPLETE)); WARN_ON(del_timer(&q->timer) != 0); /* Release all fragment data. */ @@ -349,7 +349,7 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, if (qp->net == nf && f->match(qp, arg)) { atomic_inc(&qp->refcnt); spin_unlock(&hb->chain_lock); - qp_in->last_in |= INET_FRAG_COMPLETE; + qp_in->flags |= INET_FRAG_COMPLETE; inet_frag_put(qp_in, f); return qp; } diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 634fc31aa243..6fce1ecc5bca 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -185,16 +185,16 @@ static void ip_expire(unsigned long arg) spin_lock(&qp->q.lock); - if (qp->q.last_in & INET_FRAG_COMPLETE) + if (qp->q.flags & INET_FRAG_COMPLETE) goto out; ipq_kill(qp); - if (!(qp->q.last_in & INET_FRAG_EVICTED)) + if (!(qp->q.flags & INET_FRAG_EVICTED)) IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT); IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); - if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) { + if ((qp->q.flags & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) { struct sk_buff *head = qp->q.fragments; const struct iphdr *iph; int err; @@ -302,7 +302,7 @@ static int ip_frag_reinit(struct ipq *qp) } while (fp); sub_frag_mem_limit(&qp->q, sum_truesize); - qp->q.last_in = 0; + qp->q.flags = 0; qp->q.len = 0; qp->q.meat = 0; qp->q.fragments = NULL; @@ -323,7 +323,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) int err = -ENOENT; u8 ecn; - if (qp->q.last_in & INET_FRAG_COMPLETE) + if (qp->q.flags & INET_FRAG_COMPLETE) goto err; if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) && @@ -350,9 +350,9 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) * or have different end, the segment is corrupted. */ if (end < qp->q.len || - ((qp->q.last_in & INET_FRAG_LAST_IN) && end != qp->q.len)) + ((qp->q.flags & INET_FRAG_LAST_IN) && end != qp->q.len)) goto err; - qp->q.last_in |= INET_FRAG_LAST_IN; + qp->q.flags |= INET_FRAG_LAST_IN; qp->q.len = end; } else { if (end&7) { @@ -362,7 +362,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) } if (end > qp->q.len) { /* Some bits beyond end -> corruption. */ - if (qp->q.last_in & INET_FRAG_LAST_IN) + if (qp->q.flags & INET_FRAG_LAST_IN) goto err; qp->q.len = end; } @@ -471,13 +471,13 @@ found: qp->ecn |= ecn; add_frag_mem_limit(&qp->q, skb->truesize); if (offset == 0) - qp->q.last_in |= INET_FRAG_FIRST_IN; + qp->q.flags |= INET_FRAG_FIRST_IN; if (ip_hdr(skb)->frag_off & htons(IP_DF) && skb->len + ihl > qp->q.max_size) qp->q.max_size = skb->len + ihl; - if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && + if (qp->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && qp->q.meat == qp->q.len) { unsigned long orefdst = skb->_skb_refdst; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 3d4bccf6d67d..cca686e42b97 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -222,7 +222,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, int offset, end; u8 ecn; - if (fq->q.last_in & INET_FRAG_COMPLETE) { + if (fq->q.flags & INET_FRAG_COMPLETE) { pr_debug("Already completed\n"); goto err; } @@ -253,11 +253,11 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, * or have different end, the segment is corrupted. */ if (end < fq->q.len || - ((fq->q.last_in & INET_FRAG_LAST_IN) && end != fq->q.len)) { + ((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len)) { pr_debug("already received last fragment\n"); goto err; } - fq->q.last_in |= INET_FRAG_LAST_IN; + fq->q.flags |= INET_FRAG_LAST_IN; fq->q.len = end; } else { /* Check if the fragment is rounded to 8 bytes. @@ -272,7 +272,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, } if (end > fq->q.len) { /* Some bits beyond end -> corruption. */ - if (fq->q.last_in & INET_FRAG_LAST_IN) { + if (fq->q.flags & INET_FRAG_LAST_IN) { pr_debug("last packet already reached.\n"); goto err; } @@ -354,7 +354,7 @@ found: */ if (offset == 0) { fq->nhoffset = nhoff; - fq->q.last_in |= INET_FRAG_FIRST_IN; + fq->q.flags |= INET_FRAG_FIRST_IN; } return 0; @@ -617,7 +617,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user) goto ret_orig; } - if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && + if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && fq->q.meat == fq->q.len) { ret_skb = nf_ct_frag6_reasm(fq, dev); if (ret_skb == NULL) diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 512ccc027ce3..b4baceed0d0d 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -131,7 +131,7 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq, spin_lock(&fq->q.lock); - if (fq->q.last_in & INET_FRAG_COMPLETE) + if (fq->q.flags & INET_FRAG_COMPLETE) goto out; inet_frag_kill(&fq->q, frags); @@ -141,13 +141,13 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq, if (!dev) goto out_rcu_unlock; - if (!(fq->q.last_in & INET_FRAG_EVICTED)) + if (!(fq->q.flags & INET_FRAG_EVICTED)) IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); /* Don't send error if the first segment did not arrive. */ - if (!(fq->q.last_in & INET_FRAG_FIRST_IN) || !fq->q.fragments) + if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !fq->q.fragments) goto out_rcu_unlock; /* @@ -209,7 +209,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, struct net *net = dev_net(skb_dst(skb)->dev); u8 ecn; - if (fq->q.last_in & INET_FRAG_COMPLETE) + if (fq->q.flags & INET_FRAG_COMPLETE) goto err; offset = ntohs(fhdr->frag_off) & ~0x7; @@ -240,9 +240,9 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, * or have different end, the segment is corrupted. */ if (end < fq->q.len || - ((fq->q.last_in & INET_FRAG_LAST_IN) && end != fq->q.len)) + ((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len)) goto err; - fq->q.last_in |= INET_FRAG_LAST_IN; + fq->q.flags |= INET_FRAG_LAST_IN; fq->q.len = end; } else { /* Check if the fragment is rounded to 8 bytes. @@ -260,7 +260,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, } if (end > fq->q.len) { /* Some bits beyond end -> corruption. */ - if (fq->q.last_in & INET_FRAG_LAST_IN) + if (fq->q.flags & INET_FRAG_LAST_IN) goto err; fq->q.len = end; } @@ -335,10 +335,10 @@ found: */ if (offset == 0) { fq->nhoffset = nhoff; - fq->q.last_in |= INET_FRAG_FIRST_IN; + fq->q.flags |= INET_FRAG_FIRST_IN; } - if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && + if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && fq->q.meat == fq->q.len) { int res; unsigned long orefdst = skb->_skb_refdst; -- cgit v1.2.3 From f926e23660d52601089222cb4755aabc693ca390 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 1 Aug 2014 12:29:46 +0200 Subject: inet: frags: fix function declaration alignments in inet_fragment Fix a couple of functions' declaration alignments. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/ipv4/inet_fragment.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'net/ipv4/inet_fragment.c') diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index e3ebc6608e5d..fa49916c23a0 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -334,8 +334,9 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f) EXPORT_SYMBOL(inet_frag_destroy); static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, - struct inet_frag_queue *qp_in, struct inet_frags *f, - void *arg) + struct inet_frag_queue *qp_in, + struct inet_frags *f, + void *arg) { struct inet_frag_bucket *hb = get_frag_bucket_locked(qp_in, f); struct inet_frag_queue *qp; @@ -368,7 +369,8 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, } static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, - struct inet_frags *f, void *arg) + struct inet_frags *f, + void *arg) { struct inet_frag_queue *q; @@ -393,7 +395,8 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, } static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, - struct inet_frags *f, void *arg) + struct inet_frags *f, + void *arg) { struct inet_frag_queue *q; @@ -405,7 +408,8 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, } struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, - struct inet_frags *f, void *key, unsigned int hash) + struct inet_frags *f, void *key, + unsigned int hash) { struct inet_frag_bucket *hb; struct inet_frag_queue *q; -- cgit v1.2.3 From 2e404f632f44979ddf0ce0808a438249a72d7015 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 1 Aug 2014 12:29:47 +0200 Subject: inet: frags: use INET_FRAG_EVICTED to prevent icmp messages Now that we have INET_FRAG_EVICTED we might as well use it to stop sending icmp messages in the "frag_expire" functions instead of stripping INET_FRAG_FIRST_IN from their flags when evicting. Also fix the comment style in ip6_expire_frag_queue(). Signed-off-by: Nikolay Aleksandrov Reviewed-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv4/inet_fragment.c | 2 -- net/ipv4/ip_fragment.c | 14 +++++++------- net/ipv6/reassembly.c | 15 ++++++++------- 3 files changed, 15 insertions(+), 16 deletions(-) (limited to 'net/ipv4/inet_fragment.c') diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index fa49916c23a0..4baa76c60398 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -151,8 +151,6 @@ evict_again: goto evict_again; } - /* suppress xmit of (icmp) error packet */ - fq->flags &= ~INET_FRAG_FIRST_IN; fq->flags |= INET_FRAG_EVICTED; hlist_del(&fq->list); hlist_add_head(&fq->list, &expired); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 6fce1ecc5bca..cb56bcc1eee2 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -189,16 +189,18 @@ static void ip_expire(unsigned long arg) goto out; ipq_kill(qp); - - if (!(qp->q.flags & INET_FRAG_EVICTED)) - IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT); IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); - if ((qp->q.flags & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) { + if (!(qp->q.flags & INET_FRAG_EVICTED)) { struct sk_buff *head = qp->q.fragments; const struct iphdr *iph; int err; + IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT); + + if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments) + goto out; + rcu_read_lock(); head->dev = dev_get_by_index_rcu(net, qp->iif); if (!head->dev) @@ -211,8 +213,7 @@ static void ip_expire(unsigned long arg) if (err) goto out_rcu_unlock; - /* - * Only an end host needs to send an ICMP + /* Only an end host needs to send an ICMP * "Fragment Reassembly Timeout" message, per RFC792. */ if (qp->user == IP_DEFRAG_AF_PACKET || @@ -221,7 +222,6 @@ static void ip_expire(unsigned long arg) (skb_rtable(head)->rt_type != RTN_LOCAL))) goto out_rcu_unlock; - /* Send an ICMP "Fragment Reassembly Timeout" message. */ icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); out_rcu_unlock: diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index b4baceed0d0d..beb6872a8fa5 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -141,19 +141,20 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq, if (!dev) goto out_rcu_unlock; - if (!(fq->q.flags & INET_FRAG_EVICTED)) - IP6_INC_STATS_BH(net, __in6_dev_get(dev), - IPSTATS_MIB_REASMTIMEOUT); IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); + if (fq->q.flags & INET_FRAG_EVICTED) + goto out_rcu_unlock; + + IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); + /* Don't send error if the first segment did not arrive. */ if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !fq->q.fragments) goto out_rcu_unlock; - /* - But use as source device on which LAST ARRIVED - segment was received. And do not use fq->dev - pointer directly, device might already disappeared. + /* But use as source device on which LAST ARRIVED + * segment was received. And do not use fq->dev + * pointer directly, device might already disappeared. */ fq->q.fragments->dev = dev; icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0); -- cgit v1.2.3 From d4ad4d22e7ac6b8711b35d7e86eb29f03f8ac153 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 1 Aug 2014 12:29:48 +0200 Subject: inet: frags: use kmem_cache for inet_frag_queue Use kmem_cache to allocate/free inet_frag_queue objects since they're all the same size per inet_frags user and are alloced/freed in high volumes thus making it a perfect case for kmem_cache. Signed-off-by: Nikolay Aleksandrov Acked-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/inet_frag.h | 4 +++- net/ieee802154/reassembly.c | 7 ++++++- net/ipv4/inet_fragment.c | 13 ++++++++++--- net/ipv4/ip_fragment.c | 5 ++++- net/ipv6/netfilter/nf_conntrack_reasm.c | 8 ++++++-- net/ipv6/reassembly.c | 7 ++++++- 6 files changed, 35 insertions(+), 9 deletions(-) (limited to 'net/ipv4/inet_fragment.c') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 90015c47b447..65a8855e99fe 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -101,9 +101,11 @@ struct inet_frags { void (*destructor)(struct inet_frag_queue *); void (*skb_free)(struct sk_buff *); void (*frag_expire)(unsigned long data); + struct kmem_cache *frags_cachep; + const char *frags_cache_name; }; -void inet_frags_init(struct inet_frags *); +int inet_frags_init(struct inet_frags *); void inet_frags_fini(struct inet_frags *); void inet_frags_init_net(struct netns_frags *nf); diff --git a/net/ieee802154/reassembly.c b/net/ieee802154/reassembly.c index 5607accd2fee..ffec6ce51005 100644 --- a/net/ieee802154/reassembly.c +++ b/net/ieee802154/reassembly.c @@ -30,6 +30,8 @@ #include "reassembly.h" +static const char lowpan_frags_cache_name[] = "lowpan-frags"; + struct lowpan_frag_info { __be16 d_tag; u16 d_size; @@ -571,7 +573,10 @@ int __init lowpan_net_frag_init(void) lowpan_frags.qsize = sizeof(struct frag_queue); lowpan_frags.match = lowpan_frag_match; lowpan_frags.frag_expire = lowpan_frag_expire; - inet_frags_init(&lowpan_frags); + lowpan_frags.frags_cache_name = lowpan_frags_cache_name; + ret = inet_frags_init(&lowpan_frags); + if (ret) + goto err_pernet; return ret; err_pernet: diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 4baa76c60398..9eb89f3f0ee4 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -198,7 +198,7 @@ static void inet_frag_schedule_worker(struct inet_frags *f) schedule_work(&f->frags_work); } -void inet_frags_init(struct inet_frags *f) +int inet_frags_init(struct inet_frags *f) { int i; @@ -213,6 +213,12 @@ void inet_frags_init(struct inet_frags *f) seqlock_init(&f->rnd_seqlock); f->last_rebuild_jiffies = 0; + f->frags_cachep = kmem_cache_create(f->frags_cache_name, f->qsize, 0, 0, + NULL); + if (!f->frags_cachep) + return -ENOMEM; + + return 0; } EXPORT_SYMBOL(inet_frags_init); @@ -225,6 +231,7 @@ EXPORT_SYMBOL(inet_frags_init_net); void inet_frags_fini(struct inet_frags *f) { cancel_work_sync(&f->frags_work); + kmem_cache_destroy(f->frags_cachep); } EXPORT_SYMBOL(inet_frags_fini); @@ -327,7 +334,7 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f) if (f->destructor) f->destructor(q); - kfree(q); + kmem_cache_free(f->frags_cachep, q); } EXPORT_SYMBOL(inet_frag_destroy); @@ -377,7 +384,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, return NULL; } - q = kzalloc(f->qsize, GFP_ATOMIC); + q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC); if (q == NULL) return NULL; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index cb56bcc1eee2..15f0e2bad7ad 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -55,6 +55,7 @@ */ static int sysctl_ipfrag_max_dist __read_mostly = 64; +static const char ip_frag_cache_name[] = "ip4-frags"; struct ipfrag_skb_cb { @@ -860,5 +861,7 @@ void __init ipfrag_init(void) ip4_frags.qsize = sizeof(struct ipq); ip4_frags.match = ip4_frag_match; ip4_frags.frag_expire = ip_expire; - inet_frags_init(&ip4_frags); + ip4_frags.frags_cache_name = ip_frag_cache_name; + if (inet_frags_init(&ip4_frags)) + panic("IP: failed to allocate ip4_frags cache\n"); } diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index cca686e42b97..6f187c8d8a1b 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -50,6 +50,7 @@ #include #include +static const char nf_frags_cache_name[] = "nf-frags"; struct nf_ct_frag6_skb_cb { @@ -677,12 +678,15 @@ int nf_ct_frag6_init(void) nf_frags.qsize = sizeof(struct frag_queue); nf_frags.match = ip6_frag_match; nf_frags.frag_expire = nf_ct_frag6_expire; - inet_frags_init(&nf_frags); - + nf_frags.frags_cache_name = nf_frags_cache_name; + ret = inet_frags_init(&nf_frags); + if (ret) + goto out; ret = register_pernet_subsys(&nf_ct_net_ops); if (ret) inet_frags_fini(&nf_frags); +out: return ret; } diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index beb6872a8fa5..c6557d9f7808 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -60,6 +60,8 @@ #include #include +static const char ip6_frag_cache_name[] = "ip6-frags"; + struct ip6frag_skb_cb { struct inet6_skb_parm h; @@ -748,7 +750,10 @@ int __init ipv6_frag_init(void) ip6_frags.qsize = sizeof(struct frag_queue); ip6_frags.match = ip6_frag_match; ip6_frags.frag_expire = ip6_frag_expire; - inet_frags_init(&ip6_frags); + ip6_frags.frags_cache_name = ip6_frag_cache_name; + ret = inet_frags_init(&ip6_frags); + if (ret) + goto err_pernet; out: return ret; -- cgit v1.2.3