From e64457191a259537bbbfaebeba9a8043786af96f Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 3 Oct 2013 18:16:47 -0700 Subject: openvswitch: Restructure datapath.c and flow.c Over the time datapath.c and flow.c has became pretty large files. Following patch restructures functionality of component into three different components: flow.c: contains flow extract. flow_netlink.c: netlink flow api. flow_table.c: flow table api. This patch restructures code without changing logic. Signed-off-by: Pravin B Shelar Signed-off-by: Jesse Gross --- net/openvswitch/flow_table.c | 517 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 517 insertions(+) create mode 100644 net/openvswitch/flow_table.c (limited to 'net/openvswitch/flow_table.c') diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c new file mode 100644 index 000000000000..dcadb75bb173 --- /dev/null +++ b/net/openvswitch/flow_table.c @@ -0,0 +1,517 @@ +/* + * Copyright (c) 2007-2013 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#include "flow.h" +#include "datapath.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct kmem_cache *flow_cache; + +static u16 range_n_bytes(const struct sw_flow_key_range *range) +{ + return range->end - range->start; +} + +void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, + const struct sw_flow_mask *mask) +{ + const long *m = (long *)((u8 *)&mask->key + mask->range.start); + const long *s = (long *)((u8 *)src + mask->range.start); + long *d = (long *)((u8 *)dst + mask->range.start); + int i; + + /* The memory outside of the 'mask->range' are not set since + * further operations on 'dst' only uses contents within + * 'mask->range'. + */ + for (i = 0; i < range_n_bytes(&mask->range); i += sizeof(long)) + *d++ = *s++ & *m++; +} + +struct sw_flow *ovs_flow_alloc(void) +{ + struct sw_flow *flow; + + flow = kmem_cache_alloc(flow_cache, GFP_KERNEL); + if (!flow) + return ERR_PTR(-ENOMEM); + + spin_lock_init(&flow->lock); + flow->sf_acts = NULL; + flow->mask = NULL; + + return flow; +} + +static struct flex_array *alloc_buckets(unsigned int n_buckets) +{ + struct flex_array *buckets; + int i, err; + + buckets = flex_array_alloc(sizeof(struct hlist_head), + n_buckets, GFP_KERNEL); + if (!buckets) + return NULL; + + err = flex_array_prealloc(buckets, 0, n_buckets, GFP_KERNEL); + if (err) { + flex_array_free(buckets); + return NULL; + } + + for (i = 0; i < n_buckets; i++) + INIT_HLIST_HEAD((struct hlist_head *) + flex_array_get(buckets, i)); + + return buckets; +} + +static void flow_free(struct sw_flow *flow) +{ + kfree((struct sf_flow_acts __force *)flow->sf_acts); + kmem_cache_free(flow_cache, flow); +} + +static void rcu_free_flow_callback(struct rcu_head *rcu) +{ + struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu); + + flow_free(flow); +} + +void ovs_flow_free(struct sw_flow *flow, bool deferred) +{ + if (!flow) + return; + + ovs_sw_flow_mask_del_ref(flow->mask, deferred); + + if (deferred) + call_rcu(&flow->rcu, rcu_free_flow_callback); + else + flow_free(flow); +} + +static void free_buckets(struct flex_array *buckets) +{ + flex_array_free(buckets); +} + +static void __flow_tbl_destroy(struct flow_table *table) +{ + int i; + + if (table->keep_flows) + goto skip_flows; + + for (i = 0; i < table->n_buckets; i++) { + struct sw_flow *flow; + struct hlist_head *head = flex_array_get(table->buckets, i); + struct hlist_node *n; + int ver = table->node_ver; + + hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) { + hlist_del(&flow->hash_node[ver]); + ovs_flow_free(flow, false); + } + } + + BUG_ON(!list_empty(table->mask_list)); + kfree(table->mask_list); + +skip_flows: + free_buckets(table->buckets); + kfree(table); +} + +static struct flow_table *__flow_tbl_alloc(int new_size) +{ + struct flow_table *table = kmalloc(sizeof(*table), GFP_KERNEL); + + if (!table) + return NULL; + + table->buckets = alloc_buckets(new_size); + + if (!table->buckets) { + kfree(table); + return NULL; + } + table->n_buckets = new_size; + table->count = 0; + table->node_ver = 0; + table->keep_flows = false; + get_random_bytes(&table->hash_seed, sizeof(u32)); + table->mask_list = NULL; + + return table; +} + +struct flow_table *ovs_flow_tbl_alloc(int new_size) +{ + struct flow_table *table = __flow_tbl_alloc(new_size); + + if (!table) + return NULL; + + table->mask_list = kmalloc(sizeof(struct list_head), GFP_KERNEL); + if (!table->mask_list) { + table->keep_flows = true; + __flow_tbl_destroy(table); + return NULL; + } + INIT_LIST_HEAD(table->mask_list); + + return table; +} + +static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) +{ + struct flow_table *table = container_of(rcu, struct flow_table, rcu); + + __flow_tbl_destroy(table); +} + +void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred) +{ + if (!table) + return; + + if (deferred) + call_rcu(&table->rcu, flow_tbl_destroy_rcu_cb); + else + __flow_tbl_destroy(table); +} + +struct sw_flow *ovs_flow_tbl_dump_next(struct flow_table *table, + u32 *bucket, u32 *last) +{ + struct sw_flow *flow; + struct hlist_head *head; + int ver; + int i; + + ver = table->node_ver; + while (*bucket < table->n_buckets) { + i = 0; + head = flex_array_get(table->buckets, *bucket); + hlist_for_each_entry_rcu(flow, head, hash_node[ver]) { + if (i < *last) { + i++; + continue; + } + *last = i + 1; + return flow; + } + (*bucket)++; + *last = 0; + } + + return NULL; +} + +static struct hlist_head *find_bucket(struct flow_table *table, u32 hash) +{ + hash = jhash_1word(hash, table->hash_seed); + return flex_array_get(table->buckets, + (hash & (table->n_buckets - 1))); +} + +static void __tbl_insert(struct flow_table *table, struct sw_flow *flow) +{ + struct hlist_head *head; + + head = find_bucket(table, flow->hash); + hlist_add_head_rcu(&flow->hash_node[table->node_ver], head); + + table->count++; +} + +static void flow_table_copy_flows(struct flow_table *old, + struct flow_table *new) +{ + int old_ver; + int i; + + old_ver = old->node_ver; + new->node_ver = !old_ver; + + /* Insert in new table. */ + for (i = 0; i < old->n_buckets; i++) { + struct sw_flow *flow; + struct hlist_head *head; + + head = flex_array_get(old->buckets, i); + + hlist_for_each_entry(flow, head, hash_node[old_ver]) + __tbl_insert(new, flow); + } + + new->mask_list = old->mask_list; + old->keep_flows = true; +} + +static struct flow_table *__flow_tbl_rehash(struct flow_table *table, + int n_buckets) +{ + struct flow_table *new_table; + + new_table = __flow_tbl_alloc(n_buckets); + if (!new_table) + return ERR_PTR(-ENOMEM); + + flow_table_copy_flows(table, new_table); + + return new_table; +} + +struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table) +{ + return __flow_tbl_rehash(table, table->n_buckets); +} + +struct flow_table *ovs_flow_tbl_expand(struct flow_table *table) +{ + return __flow_tbl_rehash(table, table->n_buckets * 2); +} + +static u32 flow_hash(const struct sw_flow_key *key, int key_start, + int key_end) +{ + u32 *hash_key = (u32 *)((u8 *)key + key_start); + int hash_u32s = (key_end - key_start) >> 2; + + /* Make sure number of hash bytes are multiple of u32. */ + BUILD_BUG_ON(sizeof(long) % sizeof(u32)); + + return jhash2(hash_key, hash_u32s, 0); +} + +static int flow_key_start(const struct sw_flow_key *key) +{ + if (key->tun_key.ipv4_dst) + return 0; + else + return rounddown(offsetof(struct sw_flow_key, phy), + sizeof(long)); +} + +static bool cmp_key(const struct sw_flow_key *key1, + const struct sw_flow_key *key2, + int key_start, int key_end) +{ + const long *cp1 = (long *)((u8 *)key1 + key_start); + const long *cp2 = (long *)((u8 *)key2 + key_start); + long diffs = 0; + int i; + + for (i = key_start; i < key_end; i += sizeof(long)) + diffs |= *cp1++ ^ *cp2++; + + return diffs == 0; +} + +static bool flow_cmp_masked_key(const struct sw_flow *flow, + const struct sw_flow_key *key, + int key_start, int key_end) +{ + return cmp_key(&flow->key, key, key_start, key_end); +} + +bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, + struct sw_flow_match *match) +{ + struct sw_flow_key *key = match->key; + int key_start = flow_key_start(key); + int key_end = match->range.end; + + return cmp_key(&flow->unmasked_key, key, key_start, key_end); +} + +static struct sw_flow *masked_flow_lookup(struct flow_table *table, + const struct sw_flow_key *unmasked, + struct sw_flow_mask *mask) +{ + struct sw_flow *flow; + struct hlist_head *head; + int key_start = mask->range.start; + int key_end = mask->range.end; + u32 hash; + struct sw_flow_key masked_key; + + ovs_flow_mask_key(&masked_key, unmasked, mask); + hash = flow_hash(&masked_key, key_start, key_end); + head = find_bucket(table, hash); + hlist_for_each_entry_rcu(flow, head, hash_node[table->node_ver]) { + if (flow->mask == mask && + flow_cmp_masked_key(flow, &masked_key, + key_start, key_end)) + return flow; + } + return NULL; +} + +struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, + const struct sw_flow_key *key) +{ + struct sw_flow *flow = NULL; + struct sw_flow_mask *mask; + + list_for_each_entry_rcu(mask, tbl->mask_list, list) { + flow = masked_flow_lookup(tbl, key, mask); + if (flow) /* Found */ + break; + } + + return flow; +} + +void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow) +{ + flow->hash = flow_hash(&flow->key, flow->mask->range.start, + flow->mask->range.end); + __tbl_insert(table, flow); +} + +void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) +{ + BUG_ON(table->count == 0); + hlist_del_rcu(&flow->hash_node[table->node_ver]); + table->count--; +} + +struct sw_flow_mask *ovs_sw_flow_mask_alloc(void) +{ + struct sw_flow_mask *mask; + + mask = kmalloc(sizeof(*mask), GFP_KERNEL); + if (mask) + mask->ref_count = 0; + + return mask; +} + +void ovs_sw_flow_mask_add_ref(struct sw_flow_mask *mask) +{ + mask->ref_count++; +} + +static void rcu_free_sw_flow_mask_cb(struct rcu_head *rcu) +{ + struct sw_flow_mask *mask = container_of(rcu, struct sw_flow_mask, rcu); + + kfree(mask); +} + +void ovs_sw_flow_mask_del_ref(struct sw_flow_mask *mask, bool deferred) +{ + if (!mask) + return; + + BUG_ON(!mask->ref_count); + mask->ref_count--; + + if (!mask->ref_count) { + list_del_rcu(&mask->list); + if (deferred) + call_rcu(&mask->rcu, rcu_free_sw_flow_mask_cb); + else + kfree(mask); + } +} + +static bool mask_equal(const struct sw_flow_mask *a, + const struct sw_flow_mask *b) +{ + u8 *a_ = (u8 *)&a->key + a->range.start; + u8 *b_ = (u8 *)&b->key + b->range.start; + + return (a->range.end == b->range.end) + && (a->range.start == b->range.start) + && (memcmp(a_, b_, range_n_bytes(&a->range)) == 0); +} + +struct sw_flow_mask *ovs_sw_flow_mask_find(const struct flow_table *tbl, + const struct sw_flow_mask *mask) +{ + struct list_head *ml; + + list_for_each(ml, tbl->mask_list) { + struct sw_flow_mask *m; + m = container_of(ml, struct sw_flow_mask, list); + if (mask_equal(mask, m)) + return m; + } + + return NULL; +} + +/** + * add a new mask into the mask list. + * The caller needs to make sure that 'mask' is not the same + * as any masks that are already on the list. + */ +void ovs_sw_flow_mask_insert(struct flow_table *tbl, struct sw_flow_mask *mask) +{ + list_add_rcu(&mask->list, tbl->mask_list); +} + +/* Initializes the flow module. + * Returns zero if successful or a negative error code. */ +int ovs_flow_init(void) +{ + BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long)); + BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long)); + + flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0, + 0, NULL); + if (flow_cache == NULL) + return -ENOMEM; + + return 0; +} + +/* Uninitializes the flow module. */ +void ovs_flow_exit(void) +{ + kmem_cache_destroy(flow_cache); +} -- cgit v1.2.3 From b637e4988c2d689bb43f943a5af0e684a4981159 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Fri, 4 Oct 2013 00:14:23 -0700 Subject: openvswitch: Move mega-flow list out of rehashing struct. ovs-flow rehash does not touch mega flow list. Following patch moves it dp struct datapath. Avoid one extra indirection for accessing mega-flow list head on every packet receive. Signed-off-by: Pravin B Shelar Signed-off-by: Jesse Gross --- net/openvswitch/datapath.c | 77 ++++------------ net/openvswitch/datapath.h | 6 +- net/openvswitch/flow_table.c | 205 ++++++++++++++++++++++++++----------------- net/openvswitch/flow_table.h | 32 +++---- 4 files changed, 155 insertions(+), 165 deletions(-) (limited to 'net/openvswitch/flow_table.c') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 72e68743c643..60b9be3b9477 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -59,8 +59,6 @@ #include "vport-internal_dev.h" #include "vport-netdev.h" -#define REHASH_FLOW_INTERVAL (10 * 60 * HZ) - int ovs_net_id __read_mostly; static void ovs_notify(struct sk_buff *skb, struct genl_info *info, @@ -163,7 +161,7 @@ static void destroy_dp_rcu(struct rcu_head *rcu) { struct datapath *dp = container_of(rcu, struct datapath, rcu); - ovs_flow_tbl_destroy((__force struct flow_table *)dp->table, false); + ovs_flow_tbl_destroy(&dp->table, false); free_percpu(dp->stats_percpu); release_net(ovs_dp_get_net(dp)); kfree(dp->ports); @@ -235,7 +233,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb) } /* Look up flow. */ - flow = ovs_flow_tbl_lookup(rcu_dereference(dp->table), &key); + flow = ovs_flow_tbl_lookup(&dp->table, &key); if (unlikely(!flow)) { struct dp_upcall_info upcall; @@ -453,23 +451,6 @@ out: return err; } -/* Called with ovs_mutex. */ -static int flush_flows(struct datapath *dp) -{ - struct flow_table *old_table; - struct flow_table *new_table; - - old_table = ovsl_dereference(dp->table); - new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS); - if (!new_table) - return -ENOMEM; - - rcu_assign_pointer(dp->table, new_table); - - ovs_flow_tbl_destroy(old_table, true); - return 0; -} - static void clear_stats(struct sw_flow *flow) { flow->used = 0; @@ -584,11 +565,9 @@ static struct genl_ops dp_packet_genl_ops[] = { static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats) { - struct flow_table *table; int i; - table = rcu_dereference_check(dp->table, lockdep_ovsl_is_held()); - stats->n_flows = ovs_flow_tbl_count(table); + stats->n_flows = ovs_flow_tbl_count(&dp->table); stats->n_hit = stats->n_missed = stats->n_lost = 0; for_each_possible_cpu(i) { @@ -773,7 +752,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) struct sw_flow_mask mask; struct sk_buff *reply; struct datapath *dp; - struct flow_table *table; struct sw_flow_actions *acts = NULL; struct sw_flow_match match; int error; @@ -814,12 +792,9 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) if (!dp) goto err_unlock_ovs; - table = ovsl_dereference(dp->table); - /* Check if this is a duplicate flow */ - flow = ovs_flow_tbl_lookup(table, &key); + flow = ovs_flow_tbl_lookup(&dp->table, &key); if (!flow) { - struct flow_table *new_table = NULL; struct sw_flow_mask *mask_p; /* Bail out if we're not allowed to create a new flow. */ @@ -827,19 +802,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) if (info->genlhdr->cmd == OVS_FLOW_CMD_SET) goto err_unlock_ovs; - /* Expand table, if necessary, to make room. */ - if (ovs_flow_tbl_need_to_expand(table)) - new_table = ovs_flow_tbl_expand(table); - else if (time_after(jiffies, dp->last_rehash + REHASH_FLOW_INTERVAL)) - new_table = ovs_flow_tbl_rehash(table); - - if (new_table && !IS_ERR(new_table)) { - rcu_assign_pointer(dp->table, new_table); - ovs_flow_tbl_destroy(table, true); - table = ovsl_dereference(dp->table); - dp->last_rehash = jiffies; - } - /* Allocate flow. */ flow = ovs_flow_alloc(); if (IS_ERR(flow)) { @@ -852,7 +814,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) flow->unmasked_key = key; /* Make sure mask is unique in the system */ - mask_p = ovs_sw_flow_mask_find(table, &mask); + mask_p = ovs_sw_flow_mask_find(&dp->table, &mask); if (!mask_p) { /* Allocate a new mask if none exsits. */ mask_p = ovs_sw_flow_mask_alloc(); @@ -860,7 +822,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) goto err_flow_free; mask_p->key = mask.key; mask_p->range = mask.range; - ovs_sw_flow_mask_insert(table, mask_p); + ovs_sw_flow_mask_insert(&dp->table, mask_p); } ovs_sw_flow_mask_add_ref(mask_p); @@ -868,7 +830,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) rcu_assign_pointer(flow->sf_acts, acts); /* Put flow in bucket. */ - ovs_flow_tbl_insert(table, flow); + ovs_flow_tbl_insert(&dp->table, flow); reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, info->snd_seq, OVS_FLOW_CMD_NEW); @@ -936,7 +898,6 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) struct sk_buff *reply; struct sw_flow *flow; struct datapath *dp; - struct flow_table *table; struct sw_flow_match match; int err; @@ -957,8 +918,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) goto unlock; } - table = ovsl_dereference(dp->table); - flow = ovs_flow_tbl_lookup(table, &key); + flow = ovs_flow_tbl_lookup(&dp->table, &key); if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) { err = -ENOENT; goto unlock; @@ -986,7 +946,6 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) struct sk_buff *reply; struct sw_flow *flow; struct datapath *dp; - struct flow_table *table; struct sw_flow_match match; int err; @@ -998,7 +957,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) } if (!a[OVS_FLOW_ATTR_KEY]) { - err = flush_flows(dp); + err = ovs_flow_tbl_flush(&dp->table); goto unlock; } @@ -1007,8 +966,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) if (err) goto unlock; - table = ovsl_dereference(dp->table); - flow = ovs_flow_tbl_lookup(table, &key); + flow = ovs_flow_tbl_lookup(&dp->table, &key); if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) { err = -ENOENT; goto unlock; @@ -1020,7 +978,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) goto unlock; } - ovs_flow_tbl_remove(table, flow); + ovs_flow_tbl_remove(&dp->table, flow); err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid, info->snd_seq, 0, OVS_FLOW_CMD_DEL); @@ -1039,8 +997,8 @@ unlock: static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); + struct table_instance *ti; struct datapath *dp; - struct flow_table *table; rcu_read_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); @@ -1049,14 +1007,14 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) return -ENODEV; } - table = rcu_dereference(dp->table); + ti = rcu_dereference(dp->table.ti); for (;;) { struct sw_flow *flow; u32 bucket, obj; bucket = cb->args[0]; obj = cb->args[1]; - flow = ovs_flow_tbl_dump_next(table, &bucket, &obj); + flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj); if (!flow) break; @@ -1220,9 +1178,8 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_dp_set_net(dp, hold_net(sock_net(skb->sk))); /* Allocate table. */ - err = -ENOMEM; - rcu_assign_pointer(dp->table, ovs_flow_tbl_alloc(TBL_MIN_BUCKETS)); - if (!dp->table) + err = ovs_flow_tbl_init(&dp->table); + if (err) goto err_free_dp; dp->stats_percpu = alloc_percpu(struct dp_stats_percpu); @@ -1279,7 +1236,7 @@ err_destroy_ports_array: err_destroy_percpu: free_percpu(dp->stats_percpu); err_destroy_table: - ovs_flow_tbl_destroy(ovsl_dereference(dp->table), false); + ovs_flow_tbl_destroy(&dp->table, false); err_free_dp: release_net(ovs_dp_get_net(dp)); kfree(dp); diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index a6982ef84f20..acfd4af8ca3a 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -58,12 +58,11 @@ struct dp_stats_percpu { * struct datapath - datapath for flow-based packet switching * @rcu: RCU callback head for deferred destruction. * @list_node: Element in global 'dps' list. - * @table: Current flow table. Protected by ovs_mutex and RCU. + * @table: flow table. * @ports: Hash table for ports. %OVSP_LOCAL port always exists. Protected by * ovs_mutex and RCU. * @stats_percpu: Per-CPU datapath statistics. * @net: Reference to net namespace. - * @last_rehash: Timestamp of last rehash. * * Context: See the comment on locking at the top of datapath.c for additional * locking information. @@ -73,7 +72,7 @@ struct datapath { struct list_head list_node; /* Flow table. */ - struct flow_table __rcu *table; + struct flow_table table; /* Switch ports. */ struct hlist_head *ports; @@ -85,7 +84,6 @@ struct datapath { /* Network namespace ref. */ struct net *net; #endif - unsigned long last_rehash; }; /** diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index dcadb75bb173..1c7e7732ed4c 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -44,6 +44,11 @@ #include #include +#include "datapath.h" + +#define TBL_MIN_BUCKETS 1024 +#define REHASH_INTERVAL (10 * 60 * HZ) + static struct kmem_cache *flow_cache; static u16 range_n_bytes(const struct sw_flow_key_range *range) @@ -82,6 +87,11 @@ struct sw_flow *ovs_flow_alloc(void) return flow; } +int ovs_flow_tbl_count(struct flow_table *table) +{ + return table->count; +} + static struct flex_array *alloc_buckets(unsigned int n_buckets) { struct flex_array *buckets; @@ -136,18 +146,18 @@ static void free_buckets(struct flex_array *buckets) flex_array_free(buckets); } -static void __flow_tbl_destroy(struct flow_table *table) +static void __table_instance_destroy(struct table_instance *ti) { int i; - if (table->keep_flows) + if (ti->keep_flows) goto skip_flows; - for (i = 0; i < table->n_buckets; i++) { + for (i = 0; i < ti->n_buckets; i++) { struct sw_flow *flow; - struct hlist_head *head = flex_array_get(table->buckets, i); + struct hlist_head *head = flex_array_get(ti->buckets, i); struct hlist_node *n; - int ver = table->node_ver; + int ver = ti->node_ver; hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) { hlist_del(&flow->hash_node[ver]); @@ -155,74 +165,74 @@ static void __flow_tbl_destroy(struct flow_table *table) } } - BUG_ON(!list_empty(table->mask_list)); - kfree(table->mask_list); - skip_flows: - free_buckets(table->buckets); - kfree(table); + free_buckets(ti->buckets); + kfree(ti); } -static struct flow_table *__flow_tbl_alloc(int new_size) +static struct table_instance *table_instance_alloc(int new_size) { - struct flow_table *table = kmalloc(sizeof(*table), GFP_KERNEL); + struct table_instance *ti = kmalloc(sizeof(*ti), GFP_KERNEL); - if (!table) + if (!ti) return NULL; - table->buckets = alloc_buckets(new_size); + ti->buckets = alloc_buckets(new_size); - if (!table->buckets) { - kfree(table); + if (!ti->buckets) { + kfree(ti); return NULL; } - table->n_buckets = new_size; - table->count = 0; - table->node_ver = 0; - table->keep_flows = false; - get_random_bytes(&table->hash_seed, sizeof(u32)); - table->mask_list = NULL; + ti->n_buckets = new_size; + ti->node_ver = 0; + ti->keep_flows = false; + get_random_bytes(&ti->hash_seed, sizeof(u32)); - return table; + return ti; } -struct flow_table *ovs_flow_tbl_alloc(int new_size) +int ovs_flow_tbl_init(struct flow_table *table) { - struct flow_table *table = __flow_tbl_alloc(new_size); + struct table_instance *ti; - if (!table) - return NULL; + ti = table_instance_alloc(TBL_MIN_BUCKETS); - table->mask_list = kmalloc(sizeof(struct list_head), GFP_KERNEL); - if (!table->mask_list) { - table->keep_flows = true; - __flow_tbl_destroy(table); - return NULL; - } - INIT_LIST_HEAD(table->mask_list); + if (!ti) + return -ENOMEM; - return table; + rcu_assign_pointer(table->ti, ti); + INIT_LIST_HEAD(&table->mask_list); + table->last_rehash = jiffies; + table->count = 0; + return 0; } static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) { - struct flow_table *table = container_of(rcu, struct flow_table, rcu); + struct table_instance *ti = container_of(rcu, struct table_instance, rcu); - __flow_tbl_destroy(table); + __table_instance_destroy(ti); } -void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred) +static void table_instance_destroy(struct table_instance *ti, bool deferred) { - if (!table) + if (!ti) return; if (deferred) - call_rcu(&table->rcu, flow_tbl_destroy_rcu_cb); + call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb); else - __flow_tbl_destroy(table); + __table_instance_destroy(ti); +} + +void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred) +{ + struct table_instance *ti = ovsl_dereference(table->ti); + + table_instance_destroy(ti, deferred); } -struct sw_flow *ovs_flow_tbl_dump_next(struct flow_table *table, +struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti, u32 *bucket, u32 *last) { struct sw_flow *flow; @@ -230,10 +240,10 @@ struct sw_flow *ovs_flow_tbl_dump_next(struct flow_table *table, int ver; int i; - ver = table->node_ver; - while (*bucket < table->n_buckets) { + ver = ti->node_ver; + while (*bucket < ti->n_buckets) { i = 0; - head = flex_array_get(table->buckets, *bucket); + head = flex_array_get(ti->buckets, *bucket); hlist_for_each_entry_rcu(flow, head, hash_node[ver]) { if (i < *last) { i++; @@ -249,25 +259,23 @@ struct sw_flow *ovs_flow_tbl_dump_next(struct flow_table *table, return NULL; } -static struct hlist_head *find_bucket(struct flow_table *table, u32 hash) +static struct hlist_head *find_bucket(struct table_instance *ti, u32 hash) { - hash = jhash_1word(hash, table->hash_seed); - return flex_array_get(table->buckets, - (hash & (table->n_buckets - 1))); + hash = jhash_1word(hash, ti->hash_seed); + return flex_array_get(ti->buckets, + (hash & (ti->n_buckets - 1))); } -static void __tbl_insert(struct flow_table *table, struct sw_flow *flow) +static void table_instance_insert(struct table_instance *ti, struct sw_flow *flow) { struct hlist_head *head; - head = find_bucket(table, flow->hash); - hlist_add_head_rcu(&flow->hash_node[table->node_ver], head); - - table->count++; + head = find_bucket(ti, flow->hash); + hlist_add_head_rcu(&flow->hash_node[ti->node_ver], head); } -static void flow_table_copy_flows(struct flow_table *old, - struct flow_table *new) +static void flow_table_copy_flows(struct table_instance *old, + struct table_instance *new) { int old_ver; int i; @@ -283,35 +291,42 @@ static void flow_table_copy_flows(struct flow_table *old, head = flex_array_get(old->buckets, i); hlist_for_each_entry(flow, head, hash_node[old_ver]) - __tbl_insert(new, flow); + table_instance_insert(new, flow); } - new->mask_list = old->mask_list; old->keep_flows = true; } -static struct flow_table *__flow_tbl_rehash(struct flow_table *table, +static struct table_instance *table_instance_rehash(struct table_instance *ti, int n_buckets) { - struct flow_table *new_table; + struct table_instance *new_ti; - new_table = __flow_tbl_alloc(n_buckets); - if (!new_table) + new_ti = table_instance_alloc(n_buckets); + if (!new_ti) return ERR_PTR(-ENOMEM); - flow_table_copy_flows(table, new_table); + flow_table_copy_flows(ti, new_ti); - return new_table; + return new_ti; } -struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table) +int ovs_flow_tbl_flush(struct flow_table *flow_table) { - return __flow_tbl_rehash(table, table->n_buckets); -} + struct table_instance *old_ti; + struct table_instance *new_ti; -struct flow_table *ovs_flow_tbl_expand(struct flow_table *table) -{ - return __flow_tbl_rehash(table, table->n_buckets * 2); + old_ti = ovsl_dereference(flow_table->ti); + new_ti = table_instance_alloc(TBL_MIN_BUCKETS); + if (!new_ti) + return -ENOMEM; + + rcu_assign_pointer(flow_table->ti, new_ti); + flow_table->last_rehash = jiffies; + flow_table->count = 0; + + table_instance_destroy(old_ti, true); + return 0; } static u32 flow_hash(const struct sw_flow_key *key, int key_start, @@ -367,7 +382,7 @@ bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, return cmp_key(&flow->unmasked_key, key, key_start, key_end); } -static struct sw_flow *masked_flow_lookup(struct flow_table *table, +static struct sw_flow *masked_flow_lookup(struct table_instance *ti, const struct sw_flow_key *unmasked, struct sw_flow_mask *mask) { @@ -380,8 +395,8 @@ static struct sw_flow *masked_flow_lookup(struct flow_table *table, ovs_flow_mask_key(&masked_key, unmasked, mask); hash = flow_hash(&masked_key, key_start, key_end); - head = find_bucket(table, hash); - hlist_for_each_entry_rcu(flow, head, hash_node[table->node_ver]) { + head = find_bucket(ti, hash); + hlist_for_each_entry_rcu(flow, head, hash_node[ti->node_ver]) { if (flow->mask == mask && flow_cmp_masked_key(flow, &masked_key, key_start, key_end)) @@ -393,29 +408,55 @@ static struct sw_flow *masked_flow_lookup(struct flow_table *table, struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, const struct sw_flow_key *key) { - struct sw_flow *flow = NULL; + struct table_instance *ti = rcu_dereference(tbl->ti); struct sw_flow_mask *mask; + struct sw_flow *flow; - list_for_each_entry_rcu(mask, tbl->mask_list, list) { - flow = masked_flow_lookup(tbl, key, mask); + list_for_each_entry_rcu(mask, &tbl->mask_list, list) { + flow = masked_flow_lookup(ti, key, mask); if (flow) /* Found */ - break; + return flow; } + return NULL; +} - return flow; +static struct table_instance *table_instance_expand(struct table_instance *ti) +{ + return table_instance_rehash(ti, ti->n_buckets * 2); } void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow) { + struct table_instance *ti = NULL; + struct table_instance *new_ti = NULL; + + ti = ovsl_dereference(table->ti); + + /* Expand table, if necessary, to make room. */ + if (table->count > ti->n_buckets) + new_ti = table_instance_expand(ti); + else if (time_after(jiffies, table->last_rehash + REHASH_INTERVAL)) + new_ti = table_instance_rehash(ti, ti->n_buckets); + + if (new_ti && !IS_ERR(new_ti)) { + rcu_assign_pointer(table->ti, new_ti); + ovs_flow_tbl_destroy(table, true); + ti = ovsl_dereference(table->ti); + table->last_rehash = jiffies; + } + flow->hash = flow_hash(&flow->key, flow->mask->range.start, flow->mask->range.end); - __tbl_insert(table, flow); + table_instance_insert(ti, flow); + table->count++; } void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) { + struct table_instance *ti = ovsl_dereference(table->ti); + BUG_ON(table->count == 0); - hlist_del_rcu(&flow->hash_node[table->node_ver]); + hlist_del_rcu(&flow->hash_node[ti->node_ver]); table->count--; } @@ -475,7 +516,7 @@ struct sw_flow_mask *ovs_sw_flow_mask_find(const struct flow_table *tbl, { struct list_head *ml; - list_for_each(ml, tbl->mask_list) { + list_for_each(ml, &tbl->mask_list) { struct sw_flow_mask *m; m = container_of(ml, struct sw_flow_mask, list); if (mask_equal(mask, m)) @@ -492,7 +533,7 @@ struct sw_flow_mask *ovs_sw_flow_mask_find(const struct flow_table *tbl, */ void ovs_sw_flow_mask_insert(struct flow_table *tbl, struct sw_flow_mask *mask) { - list_add_rcu(&mask->list, tbl->mask_list); + list_add_rcu(&mask->list, &tbl->mask_list); } /* Initializes the flow module. diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h index d7a114457cde..5d1abe566c46 100644 --- a/net/openvswitch/flow_table.h +++ b/net/openvswitch/flow_table.h @@ -36,42 +36,36 @@ #include "flow.h" -#define TBL_MIN_BUCKETS 1024 - -struct flow_table { +struct table_instance { struct flex_array *buckets; - unsigned int count, n_buckets; + unsigned int n_buckets; struct rcu_head rcu; - struct list_head *mask_list; int node_ver; u32 hash_seed; bool keep_flows; }; +struct flow_table { + struct table_instance __rcu *ti; + struct list_head mask_list; + unsigned long last_rehash; + unsigned int count; +}; + int ovs_flow_init(void); void ovs_flow_exit(void); struct sw_flow *ovs_flow_alloc(void); void ovs_flow_free(struct sw_flow *, bool deferred); -static inline int ovs_flow_tbl_count(struct flow_table *table) -{ - return table->count; -} - -static inline int ovs_flow_tbl_need_to_expand(struct flow_table *table) -{ - return (table->count > table->n_buckets); -} - -struct flow_table *ovs_flow_tbl_alloc(int new_size); -struct flow_table *ovs_flow_tbl_expand(struct flow_table *table); -struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table); +int ovs_flow_tbl_init(struct flow_table *); +int ovs_flow_tbl_count(struct flow_table *table); void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred); +int ovs_flow_tbl_flush(struct flow_table *flow_table); void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow); void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow); -struct sw_flow *ovs_flow_tbl_dump_next(struct flow_table *table, +struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table, u32 *bucket, u32 *idx); struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *, const struct sw_flow_key *); -- cgit v1.2.3 From 618ed0c805b64c820279f50732110ab873221c3b Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Fri, 4 Oct 2013 00:17:42 -0700 Subject: openvswitch: Simplify mega-flow APIs. Hides mega-flow implementation in flow_table.c rather than datapath.c. Signed-off-by: Pravin B Shelar Signed-off-by: Jesse Gross --- net/openvswitch/datapath.c | 27 +++------ net/openvswitch/flow_table.c | 138 +++++++++++++++++++++++++------------------ net/openvswitch/flow_table.h | 12 +--- 3 files changed, 89 insertions(+), 88 deletions(-) (limited to 'net/openvswitch/flow_table.c') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 60b9be3b9477..cf270973095d 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -161,7 +161,7 @@ static void destroy_dp_rcu(struct rcu_head *rcu) { struct datapath *dp = container_of(rcu, struct datapath, rcu); - ovs_flow_tbl_destroy(&dp->table, false); + ovs_flow_tbl_destroy(&dp->table); free_percpu(dp->stats_percpu); release_net(ovs_dp_get_net(dp)); kfree(dp->ports); @@ -795,8 +795,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) /* Check if this is a duplicate flow */ flow = ovs_flow_tbl_lookup(&dp->table, &key); if (!flow) { - struct sw_flow_mask *mask_p; - /* Bail out if we're not allowed to create a new flow. */ error = -ENOENT; if (info->genlhdr->cmd == OVS_FLOW_CMD_SET) @@ -812,25 +810,14 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) flow->key = masked_key; flow->unmasked_key = key; - - /* Make sure mask is unique in the system */ - mask_p = ovs_sw_flow_mask_find(&dp->table, &mask); - if (!mask_p) { - /* Allocate a new mask if none exsits. */ - mask_p = ovs_sw_flow_mask_alloc(); - if (!mask_p) - goto err_flow_free; - mask_p->key = mask.key; - mask_p->range = mask.range; - ovs_sw_flow_mask_insert(&dp->table, mask_p); - } - - ovs_sw_flow_mask_add_ref(mask_p); - flow->mask = mask_p; rcu_assign_pointer(flow->sf_acts, acts); /* Put flow in bucket. */ - ovs_flow_tbl_insert(&dp->table, flow); + error = ovs_flow_tbl_insert(&dp->table, flow, &mask); + if (error) { + acts = NULL; + goto err_flow_free; + } reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, info->snd_seq, OVS_FLOW_CMD_NEW); @@ -1236,7 +1223,7 @@ err_destroy_ports_array: err_destroy_percpu: free_percpu(dp->stats_percpu); err_destroy_table: - ovs_flow_tbl_destroy(&dp->table, false); + ovs_flow_tbl_destroy(&dp->table); err_free_dp: release_net(ovs_dp_get_net(dp)); kfree(dp); diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 1c7e7732ed4c..036e019f8c3c 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -128,12 +128,36 @@ static void rcu_free_flow_callback(struct rcu_head *rcu) flow_free(flow); } +static void rcu_free_sw_flow_mask_cb(struct rcu_head *rcu) +{ + struct sw_flow_mask *mask = container_of(rcu, struct sw_flow_mask, rcu); + + kfree(mask); +} + +static void flow_mask_del_ref(struct sw_flow_mask *mask, bool deferred) +{ + if (!mask) + return; + + BUG_ON(!mask->ref_count); + mask->ref_count--; + + if (!mask->ref_count) { + list_del_rcu(&mask->list); + if (deferred) + call_rcu(&mask->rcu, rcu_free_sw_flow_mask_cb); + else + kfree(mask); + } +} + void ovs_flow_free(struct sw_flow *flow, bool deferred) { if (!flow) return; - ovs_sw_flow_mask_del_ref(flow->mask, deferred); + flow_mask_del_ref(flow->mask, deferred); if (deferred) call_rcu(&flow->rcu, rcu_free_flow_callback); @@ -225,11 +249,11 @@ static void table_instance_destroy(struct table_instance *ti, bool deferred) __table_instance_destroy(ti); } -void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred) +void ovs_flow_tbl_destroy(struct flow_table *table) { struct table_instance *ti = ovsl_dereference(table->ti); - table_instance_destroy(ti, deferred); + table_instance_destroy(ti, false); } struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti, @@ -304,7 +328,7 @@ static struct table_instance *table_instance_rehash(struct table_instance *ti, new_ti = table_instance_alloc(n_buckets); if (!new_ti) - return ERR_PTR(-ENOMEM); + return NULL; flow_table_copy_flows(ti, new_ti); @@ -425,32 +449,6 @@ static struct table_instance *table_instance_expand(struct table_instance *ti) return table_instance_rehash(ti, ti->n_buckets * 2); } -void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow) -{ - struct table_instance *ti = NULL; - struct table_instance *new_ti = NULL; - - ti = ovsl_dereference(table->ti); - - /* Expand table, if necessary, to make room. */ - if (table->count > ti->n_buckets) - new_ti = table_instance_expand(ti); - else if (time_after(jiffies, table->last_rehash + REHASH_INTERVAL)) - new_ti = table_instance_rehash(ti, ti->n_buckets); - - if (new_ti && !IS_ERR(new_ti)) { - rcu_assign_pointer(table->ti, new_ti); - ovs_flow_tbl_destroy(table, true); - ti = ovsl_dereference(table->ti); - table->last_rehash = jiffies; - } - - flow->hash = flow_hash(&flow->key, flow->mask->range.start, - flow->mask->range.end); - table_instance_insert(ti, flow); - table->count++; -} - void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) { struct table_instance *ti = ovsl_dereference(table->ti); @@ -460,7 +458,7 @@ void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) table->count--; } -struct sw_flow_mask *ovs_sw_flow_mask_alloc(void) +static struct sw_flow_mask *mask_alloc(void) { struct sw_flow_mask *mask; @@ -471,35 +469,11 @@ struct sw_flow_mask *ovs_sw_flow_mask_alloc(void) return mask; } -void ovs_sw_flow_mask_add_ref(struct sw_flow_mask *mask) +static void mask_add_ref(struct sw_flow_mask *mask) { mask->ref_count++; } -static void rcu_free_sw_flow_mask_cb(struct rcu_head *rcu) -{ - struct sw_flow_mask *mask = container_of(rcu, struct sw_flow_mask, rcu); - - kfree(mask); -} - -void ovs_sw_flow_mask_del_ref(struct sw_flow_mask *mask, bool deferred) -{ - if (!mask) - return; - - BUG_ON(!mask->ref_count); - mask->ref_count--; - - if (!mask->ref_count) { - list_del_rcu(&mask->list); - if (deferred) - call_rcu(&mask->rcu, rcu_free_sw_flow_mask_cb); - else - kfree(mask); - } -} - static bool mask_equal(const struct sw_flow_mask *a, const struct sw_flow_mask *b) { @@ -511,7 +485,7 @@ static bool mask_equal(const struct sw_flow_mask *a, && (memcmp(a_, b_, range_n_bytes(&a->range)) == 0); } -struct sw_flow_mask *ovs_sw_flow_mask_find(const struct flow_table *tbl, +static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl, const struct sw_flow_mask *mask) { struct list_head *ml; @@ -531,9 +505,55 @@ struct sw_flow_mask *ovs_sw_flow_mask_find(const struct flow_table *tbl, * The caller needs to make sure that 'mask' is not the same * as any masks that are already on the list. */ -void ovs_sw_flow_mask_insert(struct flow_table *tbl, struct sw_flow_mask *mask) +static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow, + struct sw_flow_mask *new) +{ + struct sw_flow_mask *mask; + mask = flow_mask_find(tbl, new); + if (!mask) { + /* Allocate a new mask if none exsits. */ + mask = mask_alloc(); + if (!mask) + return -ENOMEM; + mask->key = new->key; + mask->range = new->range; + list_add_rcu(&mask->list, &tbl->mask_list); + } + + mask_add_ref(mask); + flow->mask = mask; + return 0; +} + +int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, + struct sw_flow_mask *mask) { - list_add_rcu(&mask->list, &tbl->mask_list); + struct table_instance *new_ti = NULL; + struct table_instance *ti; + int err; + + err = flow_mask_insert(table, flow, mask); + if (err) + return err; + + flow->hash = flow_hash(&flow->key, flow->mask->range.start, + flow->mask->range.end); + ti = ovsl_dereference(table->ti); + table_instance_insert(ti, flow); + table->count++; + + /* Expand table, if necessary, to make room. */ + if (table->count > ti->n_buckets) + new_ti = table_instance_expand(ti); + else if (time_after(jiffies, table->last_rehash + REHASH_INTERVAL)) + new_ti = table_instance_rehash(ti, ti->n_buckets); + + if (new_ti) { + rcu_assign_pointer(table->ti, new_ti); + table_instance_destroy(ti, true); + table->last_rehash = jiffies; + } + return 0; } /* Initializes the flow module. diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h index 5d1abe566c46..4db5f78b6f81 100644 --- a/net/openvswitch/flow_table.h +++ b/net/openvswitch/flow_table.h @@ -60,10 +60,11 @@ void ovs_flow_free(struct sw_flow *, bool deferred); int ovs_flow_tbl_init(struct flow_table *); int ovs_flow_tbl_count(struct flow_table *table); -void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred); +void ovs_flow_tbl_destroy(struct flow_table *table); int ovs_flow_tbl_flush(struct flow_table *flow_table); -void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow); +int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, + struct sw_flow_mask *mask); void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow); struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table, u32 *bucket, u32 *idx); @@ -73,13 +74,6 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *, bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, struct sw_flow_match *match); -struct sw_flow_mask *ovs_sw_flow_mask_alloc(void); -void ovs_sw_flow_mask_add_ref(struct sw_flow_mask *); -void ovs_sw_flow_mask_del_ref(struct sw_flow_mask *, bool deferred); -void ovs_sw_flow_mask_insert(struct flow_table *, struct sw_flow_mask *); -struct sw_flow_mask *ovs_sw_flow_mask_find(const struct flow_table *, - const struct sw_flow_mask *); void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, const struct sw_flow_mask *mask); - #endif /* flow_table.h */ -- cgit v1.2.3 From 1bd7116f1cb833c998cddb6b188df463342069d8 Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Tue, 22 Oct 2013 10:42:46 -0700 Subject: openvswitch: collect mega flow mask stats Collect mega flow mask stats. ovs-dpctl show command can be used to display them for debugging and performance tuning. Signed-off-by: Andy Zhou Signed-off-by: Jesse Gross --- include/uapi/linux/openvswitch.h | 17 ++++++++++++++--- net/openvswitch/datapath.c | 38 +++++++++++++++++++++++++++++++------- net/openvswitch/datapath.h | 4 ++++ net/openvswitch/flow_table.c | 16 +++++++++++++++- net/openvswitch/flow_table.h | 4 +++- 5 files changed, 67 insertions(+), 12 deletions(-) (limited to 'net/openvswitch/flow_table.c') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index a74d375b439b..2cc4644f68ef 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -63,15 +63,18 @@ enum ovs_datapath_cmd { * not be sent. * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the * datapath. Always present in notifications. + * @OVS_DP_ATTR_MEGAFLOW_STATS: Statistics about mega flow masks usage for the + * datapath. Always present in notifications. * * These attributes follow the &struct ovs_header within the Generic Netlink * payload for %OVS_DP_* commands. */ enum ovs_datapath_attr { OVS_DP_ATTR_UNSPEC, - OVS_DP_ATTR_NAME, /* name of dp_ifindex netdev */ - OVS_DP_ATTR_UPCALL_PID, /* Netlink PID to receive upcalls */ - OVS_DP_ATTR_STATS, /* struct ovs_dp_stats */ + OVS_DP_ATTR_NAME, /* name of dp_ifindex netdev */ + OVS_DP_ATTR_UPCALL_PID, /* Netlink PID to receive upcalls */ + OVS_DP_ATTR_STATS, /* struct ovs_dp_stats */ + OVS_DP_ATTR_MEGAFLOW_STATS, /* struct ovs_dp_megaflow_stats */ __OVS_DP_ATTR_MAX }; @@ -84,6 +87,14 @@ struct ovs_dp_stats { __u64 n_flows; /* Number of flows present */ }; +struct ovs_dp_megaflow_stats { + __u64 n_mask_hit; /* Number of masks used for flow lookups. */ + __u32 n_masks; /* Number of masks for the datapath. */ + __u32 pad0; /* Pad for future expension. */ + __u64 pad1; /* Pad for future expension. */ + __u64 pad2; /* Pad for future expension. */ +}; + struct ovs_vport_stats { __u64 rx_packets; /* total packets received */ __u64 tx_packets; /* total packets transmitted */ diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index cf270973095d..5bc5a4e64758 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -221,6 +221,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb) struct dp_stats_percpu *stats; struct sw_flow_key key; u64 *stats_counter; + u32 n_mask_hit; int error; stats = this_cpu_ptr(dp->stats_percpu); @@ -233,7 +234,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb) } /* Look up flow. */ - flow = ovs_flow_tbl_lookup(&dp->table, &key); + flow = ovs_flow_tbl_lookup(&dp->table, &key, &n_mask_hit); if (unlikely(!flow)) { struct dp_upcall_info upcall; @@ -258,6 +259,7 @@ out: /* Update datapath statistics. */ u64_stats_update_begin(&stats->sync); (*stats_counter)++; + stats->n_mask_hit += n_mask_hit; u64_stats_update_end(&stats->sync); } @@ -563,13 +565,18 @@ static struct genl_ops dp_packet_genl_ops[] = { } }; -static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats) +static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats, + struct ovs_dp_megaflow_stats *mega_stats) { int i; + memset(mega_stats, 0, sizeof(*mega_stats)); + stats->n_flows = ovs_flow_tbl_count(&dp->table); + mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table); stats->n_hit = stats->n_missed = stats->n_lost = 0; + for_each_possible_cpu(i) { const struct dp_stats_percpu *percpu_stats; struct dp_stats_percpu local_stats; @@ -585,6 +592,7 @@ static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats) stats->n_hit += local_stats.n_hit; stats->n_missed += local_stats.n_missed; stats->n_lost += local_stats.n_lost; + mega_stats->n_mask_hit += local_stats.n_mask_hit; } } @@ -743,6 +751,14 @@ static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, return skb; } +static struct sw_flow *__ovs_flow_tbl_lookup(struct flow_table *tbl, + const struct sw_flow_key *key) +{ + u32 __always_unused n_mask_hit; + + return ovs_flow_tbl_lookup(tbl, key, &n_mask_hit); +} + static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; @@ -793,7 +809,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) goto err_unlock_ovs; /* Check if this is a duplicate flow */ - flow = ovs_flow_tbl_lookup(&dp->table, &key); + flow = __ovs_flow_tbl_lookup(&dp->table, &key); if (!flow) { /* Bail out if we're not allowed to create a new flow. */ error = -ENOENT; @@ -905,7 +921,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) goto unlock; } - flow = ovs_flow_tbl_lookup(&dp->table, &key); + flow = __ovs_flow_tbl_lookup(&dp->table, &key); if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) { err = -ENOENT; goto unlock; @@ -953,7 +969,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) if (err) goto unlock; - flow = ovs_flow_tbl_lookup(&dp->table, &key); + flow = __ovs_flow_tbl_lookup(&dp->table, &key); if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) { err = -ENOENT; goto unlock; @@ -1067,6 +1083,7 @@ static size_t ovs_dp_cmd_msg_size(void) msgsize += nla_total_size(IFNAMSIZ); msgsize += nla_total_size(sizeof(struct ovs_dp_stats)); + msgsize += nla_total_size(sizeof(struct ovs_dp_megaflow_stats)); return msgsize; } @@ -1076,6 +1093,7 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, { struct ovs_header *ovs_header; struct ovs_dp_stats dp_stats; + struct ovs_dp_megaflow_stats dp_megaflow_stats; int err; ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family, @@ -1091,8 +1109,14 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, if (err) goto nla_put_failure; - get_dp_stats(dp, &dp_stats); - if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), &dp_stats)) + get_dp_stats(dp, &dp_stats, &dp_megaflow_stats); + if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), + &dp_stats)) + goto nla_put_failure; + + if (nla_put(skb, OVS_DP_ATTR_MEGAFLOW_STATS, + sizeof(struct ovs_dp_megaflow_stats), + &dp_megaflow_stats)) goto nla_put_failure; return genlmsg_end(skb, ovs_header); diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index acfd4af8ca3a..d3d14a58aa91 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -46,11 +46,15 @@ * @n_lost: Number of received packets that had no matching flow in the flow * table that could not be sent to userspace (normally due to an overflow in * one of the datapath's queues). + * @n_mask_hit: Number of masks looked up for flow match. + * @n_mask_hit / (@n_hit + @n_missed) will be the average masks looked + * up per packet. */ struct dp_stats_percpu { u64 n_hit; u64 n_missed; u64 n_lost; + u64 n_mask_hit; struct u64_stats_sync sync; }; diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 036e019f8c3c..536b4d2a42e2 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -430,13 +430,16 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti, } struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, - const struct sw_flow_key *key) + const struct sw_flow_key *key, + u32 *n_mask_hit) { struct table_instance *ti = rcu_dereference(tbl->ti); struct sw_flow_mask *mask; struct sw_flow *flow; + *n_mask_hit = 0; list_for_each_entry_rcu(mask, &tbl->mask_list, list) { + (*n_mask_hit)++; flow = masked_flow_lookup(ti, key, mask); if (flow) /* Found */ return flow; @@ -444,6 +447,17 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, return NULL; } +int ovs_flow_tbl_num_masks(const struct flow_table *table) +{ + struct sw_flow_mask *mask; + int num = 0; + + list_for_each_entry(mask, &table->mask_list, list) + num++; + + return num; +} + static struct table_instance *table_instance_expand(struct table_instance *ti) { return table_instance_rehash(ti, ti->n_buckets * 2); diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h index 4db5f78b6f81..fbe45d5ad07d 100644 --- a/net/openvswitch/flow_table.h +++ b/net/openvswitch/flow_table.h @@ -66,10 +66,12 @@ int ovs_flow_tbl_flush(struct flow_table *flow_table); int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, struct sw_flow_mask *mask); void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow); +int ovs_flow_tbl_num_masks(const struct flow_table *table); struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table, u32 *bucket, u32 *idx); struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *, - const struct sw_flow_key *); + const struct sw_flow_key *, + u32 *n_mask_hit); bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, struct sw_flow_match *match); -- cgit v1.2.3 From 8ddd094675cfd453fc9838caa46ea108a4107183 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Tue, 29 Oct 2013 23:10:58 -0700 Subject: openvswitch: Use flow hash during flow lookup operation. Flow->hash can be used to detect hash collisions and avoid flow key compare in flow lookup. Signed-off-by: Pravin B Shelar Signed-off-by: Jesse Gross --- net/openvswitch/flow_table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/openvswitch/flow_table.c') diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 536b4d2a42e2..e42542706087 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -421,7 +421,7 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti, hash = flow_hash(&masked_key, key_start, key_end); head = find_bucket(ti, hash); hlist_for_each_entry_rcu(flow, head, hash_node[ti->node_ver]) { - if (flow->mask == mask && + if (flow->mask == mask && flow->hash == hash && flow_cmp_masked_key(flow, &masked_key, key_start, key_end)) return flow; -- cgit v1.2.3