diff options
Diffstat (limited to 'net/sched')
58 files changed, 3623 insertions, 2035 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig index e70ed26485a2..c03d86a7775e 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -172,6 +172,17 @@ config NET_SCH_TBF To compile this code as a module, choose M here: the module will be called sch_tbf. +config NET_SCH_CBS + tristate "Credit Based Shaper (CBS)" + ---help--- + Say Y here if you want to use the Credit Based Shaper (CBS) packet + scheduling algorithm. + + See the top of <file:net/sched/sch_cbs.c> for more details. + + To compile this code as a module, choose M here: the + module will be called sch_cbs. + config NET_SCH_GRED tristate "Generic Random Early Detection (GRED)" ---help--- diff --git a/net/sched/Makefile b/net/sched/Makefile index 7b915d226de7..5b635447e3f8 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the Linux Traffic Control Unit. # @@ -52,6 +53,7 @@ obj-$(CONFIG_NET_SCH_FQ_CODEL) += sch_fq_codel.o obj-$(CONFIG_NET_SCH_FQ) += sch_fq.o obj-$(CONFIG_NET_SCH_HHF) += sch_hhf.o obj-$(CONFIG_NET_SCH_PIE) += sch_pie.o +obj-$(CONFIG_NET_SCH_CBS) += sch_cbs.o obj-$(CONFIG_NET_CLS_U32) += cls_u32.o obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o diff --git a/net/sched/act_api.c b/net/sched/act_api.c index f2e9ed34a963..4d33a50a8a6d 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -21,6 +21,8 @@ #include <linux/kmod.h> #include <linux/err.h> #include <linux/module.h> +#include <linux/rhashtable.h> +#include <linux/list.h> #include <net/net_namespace.h> #include <net/sock.h> #include <net/sch_generic.h> @@ -53,10 +55,13 @@ static void tcf_action_goto_chain_exec(const struct tc_action *a, res->goto_tp = rcu_dereference_bh(chain->filter_chain); } -static void free_tcf(struct rcu_head *head) +/* XXX: For standalone actions, we don't need a RCU grace period either, because + * actions are always connected to filters and filters are already destroyed in + * RCU callbacks, so after a RCU grace period actions are already disconnected + * from filters. Readers later can not find us. + */ +static void free_tcf(struct tc_action *p) { - struct tc_action *p = container_of(head, struct tc_action, tcfa_rcu); - free_percpu(p->cpu_bstats); free_percpu(p->cpu_qstats); @@ -70,23 +75,21 @@ static void free_tcf(struct rcu_head *head) kfree(p); } -static void tcf_hash_destroy(struct tcf_hashinfo *hinfo, struct tc_action *p) +static void tcf_idr_remove(struct tcf_idrinfo *idrinfo, struct tc_action *p) { - spin_lock_bh(&hinfo->lock); - hlist_del(&p->tcfa_head); - spin_unlock_bh(&hinfo->lock); + spin_lock_bh(&idrinfo->lock); + idr_remove_ext(&idrinfo->action_idr, p->tcfa_index); + spin_unlock_bh(&idrinfo->lock); gen_kill_estimator(&p->tcfa_rate_est); - /* - * gen_estimator est_timer() might access p->tcfa_lock - * or bstats, wait a RCU grace period before freeing p - */ - call_rcu(&p->tcfa_rcu, free_tcf); + free_tcf(p); } -int __tcf_hash_release(struct tc_action *p, bool bind, bool strict) +int __tcf_idr_release(struct tc_action *p, bool bind, bool strict) { int ret = 0; + ASSERT_RTNL(); + if (p) { if (bind) p->tcfa_bindcnt--; @@ -97,55 +100,64 @@ int __tcf_hash_release(struct tc_action *p, bool bind, bool strict) if (p->tcfa_bindcnt <= 0 && p->tcfa_refcnt <= 0) { if (p->ops->cleanup) p->ops->cleanup(p, bind); - tcf_hash_destroy(p->hinfo, p); + tcf_idr_remove(p->idrinfo, p); ret = ACT_P_DELETED; } } return ret; } -EXPORT_SYMBOL(__tcf_hash_release); +EXPORT_SYMBOL(__tcf_idr_release); -static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb, +static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, struct netlink_callback *cb) { - int err = 0, index = -1, i = 0, s_i = 0, n_i = 0; + int err = 0, index = -1, s_i = 0, n_i = 0; + u32 act_flags = cb->args[2]; + unsigned long jiffy_since = cb->args[3]; struct nlattr *nest; + struct idr *idr = &idrinfo->action_idr; + struct tc_action *p; + unsigned long id = 1; - spin_lock_bh(&hinfo->lock); + spin_lock_bh(&idrinfo->lock); s_i = cb->args[0]; - for (i = 0; i < (hinfo->hmask + 1); i++) { - struct hlist_head *head; - struct tc_action *p; - - head = &hinfo->htab[tcf_hash(i, hinfo->hmask)]; - - hlist_for_each_entry_rcu(p, head, tcfa_head) { - index++; - if (index < s_i) - continue; - - nest = nla_nest_start(skb, n_i); - if (nest == NULL) - goto nla_put_failure; - err = tcf_action_dump_1(skb, p, 0, 0); - if (err < 0) { - index--; - nlmsg_trim(skb, nest); - goto done; - } - nla_nest_end(skb, nest); - n_i++; - if (n_i >= TCA_ACT_MAX_PRIO) - goto done; + idr_for_each_entry_ext(idr, p, id) { + index++; + if (index < s_i) + continue; + + if (jiffy_since && + time_after(jiffy_since, + (unsigned long)p->tcfa_tm.lastuse)) + continue; + + nest = nla_nest_start(skb, n_i); + if (!nest) + goto nla_put_failure; + err = tcf_action_dump_1(skb, p, 0, 0); + if (err < 0) { + index--; + nlmsg_trim(skb, nest); + goto done; } + nla_nest_end(skb, nest); + n_i++; + if (!(act_flags & TCA_FLAG_LARGE_DUMP_ON) && + n_i >= TCA_ACT_MAX_PRIO) + goto done; } done: - spin_unlock_bh(&hinfo->lock); - if (n_i) - cb->args[0] += n_i; + if (index >= 0) + cb->args[0] = index + 1; + + spin_unlock_bh(&idrinfo->lock); + if (n_i) { + if (act_flags & TCA_FLAG_LARGE_DUMP_ON) + cb->args[1] = n_i; + } return n_i; nla_put_failure: @@ -153,31 +165,29 @@ nla_put_failure: goto done; } -static int tcf_del_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb, +static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, const struct tc_action_ops *ops) { struct nlattr *nest; - int i = 0, n_i = 0; + int n_i = 0; int ret = -EINVAL; + struct idr *idr = &idrinfo->action_idr; + struct tc_action *p; + unsigned long id = 1; nest = nla_nest_start(skb, 0); if (nest == NULL) goto nla_put_failure; if (nla_put_string(skb, TCA_KIND, ops->kind)) goto nla_put_failure; - for (i = 0; i < (hinfo->hmask + 1); i++) { - struct hlist_head *head; - struct hlist_node *n; - struct tc_action *p; - - head = &hinfo->htab[tcf_hash(i, hinfo->hmask)]; - hlist_for_each_entry_safe(p, n, head, tcfa_head) { - ret = __tcf_hash_release(p, false, true); - if (ret == ACT_P_DELETED) { - module_put(p->ops->owner); - n_i++; - } else if (ret < 0) - goto nla_put_failure; + + idr_for_each_entry_ext(idr, p, id) { + ret = __tcf_idr_release(p, false, true); + if (ret == ACT_P_DELETED) { + module_put(ops->owner); + n_i++; + } else if (ret < 0) { + goto nla_put_failure; } } if (nla_put_u32(skb, TCA_FCNT, n_i)) @@ -194,12 +204,12 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, struct netlink_callback *cb, int type, const struct tc_action_ops *ops) { - struct tcf_hashinfo *hinfo = tn->hinfo; + struct tcf_idrinfo *idrinfo = tn->idrinfo; if (type == RTM_DELACTION) { - return tcf_del_walker(hinfo, skb, ops); + return tcf_del_walker(idrinfo, skb, ops); } else if (type == RTM_GETACTION) { - return tcf_dump_walker(hinfo, skb, cb); + return tcf_dump_walker(idrinfo, skb, cb); } else { WARN(1, "tcf_generic_walker: unknown action %d\n", type); return -EINVAL; @@ -207,40 +217,21 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, } EXPORT_SYMBOL(tcf_generic_walker); -static struct tc_action *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo) +static struct tc_action *tcf_idr_lookup(u32 index, struct tcf_idrinfo *idrinfo) { struct tc_action *p = NULL; - struct hlist_head *head; - spin_lock_bh(&hinfo->lock); - head = &hinfo->htab[tcf_hash(index, hinfo->hmask)]; - hlist_for_each_entry_rcu(p, head, tcfa_head) - if (p->tcfa_index == index) - break; - spin_unlock_bh(&hinfo->lock); + spin_lock_bh(&idrinfo->lock); + p = idr_find_ext(&idrinfo->action_idr, index); + spin_unlock_bh(&idrinfo->lock); return p; } -u32 tcf_hash_new_index(struct tc_action_net *tn) -{ - struct tcf_hashinfo *hinfo = tn->hinfo; - u32 val = hinfo->index; - - do { - if (++val == 0) - val = 1; - } while (tcf_hash_lookup(val, hinfo)); - - hinfo->index = val; - return val; -} -EXPORT_SYMBOL(tcf_hash_new_index); - -int tcf_hash_search(struct tc_action_net *tn, struct tc_action **a, u32 index) +int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index) { - struct tcf_hashinfo *hinfo = tn->hinfo; - struct tc_action *p = tcf_hash_lookup(index, hinfo); + struct tcf_idrinfo *idrinfo = tn->idrinfo; + struct tc_action *p = tcf_idr_lookup(index, idrinfo); if (p) { *a = p; @@ -248,15 +239,15 @@ int tcf_hash_search(struct tc_action_net *tn, struct tc_action **a, u32 index) } return 0; } -EXPORT_SYMBOL(tcf_hash_search); +EXPORT_SYMBOL(tcf_idr_search); -bool tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action **a, - int bind) +bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a, + int bind) { - struct tcf_hashinfo *hinfo = tn->hinfo; - struct tc_action *p = NULL; + struct tcf_idrinfo *idrinfo = tn->idrinfo; + struct tc_action *p = tcf_idr_lookup(index, idrinfo); - if (index && (p = tcf_hash_lookup(index, hinfo)) != NULL) { + if (index && p) { if (bind) p->tcfa_bindcnt++; p->tcfa_refcnt++; @@ -265,23 +256,25 @@ bool tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action **a, } return false; } -EXPORT_SYMBOL(tcf_hash_check); +EXPORT_SYMBOL(tcf_idr_check); -void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est) +void tcf_idr_cleanup(struct tc_action *a, struct nlattr *est) { if (est) gen_kill_estimator(&a->tcfa_rate_est); - call_rcu(&a->tcfa_rcu, free_tcf); + free_tcf(a); } -EXPORT_SYMBOL(tcf_hash_cleanup); +EXPORT_SYMBOL(tcf_idr_cleanup); -int tcf_hash_create(struct tc_action_net *tn, u32 index, struct nlattr *est, - struct tc_action **a, const struct tc_action_ops *ops, - int bind, bool cpustats) +int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, + struct tc_action **a, const struct tc_action_ops *ops, + int bind, bool cpustats) { struct tc_action *p = kzalloc(ops->size, GFP_KERNEL); - struct tcf_hashinfo *hinfo = tn->hinfo; + struct tcf_idrinfo *idrinfo = tn->idrinfo; + struct idr *idr = &idrinfo->action_idr; int err = -ENOMEM; + unsigned long idr_index; if (unlikely(!p)) return -ENOMEM; @@ -304,8 +297,32 @@ err2: } } spin_lock_init(&p->tcfa_lock); - INIT_HLIST_NODE(&p->tcfa_head); - p->tcfa_index = index ? index : tcf_hash_new_index(tn); + /* user doesn't specify an index */ + if (!index) { + idr_preload(GFP_KERNEL); + spin_lock_bh(&idrinfo->lock); + err = idr_alloc_ext(idr, NULL, &idr_index, 1, 0, + GFP_ATOMIC); + spin_unlock_bh(&idrinfo->lock); + idr_preload_end(); + if (err) { +err3: + free_percpu(p->cpu_qstats); + goto err2; + } + p->tcfa_index = idr_index; + } else { + idr_preload(GFP_KERNEL); + spin_lock_bh(&idrinfo->lock); + err = idr_alloc_ext(idr, NULL, NULL, index, index + 1, + GFP_ATOMIC); + spin_unlock_bh(&idrinfo->lock); + idr_preload_end(); + if (err) + goto err3; + p->tcfa_index = index; + } + p->tcfa_tm.install = jiffies; p->tcfa_tm.lastuse = jiffies; p->tcfa_tm.firstuse = 0; @@ -314,52 +331,46 @@ err2: &p->tcfa_rate_est, &p->tcfa_lock, NULL, est); if (err) { - free_percpu(p->cpu_qstats); - goto err2; + goto err3; } } - p->hinfo = hinfo; + p->idrinfo = idrinfo; p->ops = ops; INIT_LIST_HEAD(&p->list); *a = p; return 0; } -EXPORT_SYMBOL(tcf_hash_create); +EXPORT_SYMBOL(tcf_idr_create); -void tcf_hash_insert(struct tc_action_net *tn, struct tc_action *a) +void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a) { - struct tcf_hashinfo *hinfo = tn->hinfo; - unsigned int h = tcf_hash(a->tcfa_index, hinfo->hmask); + struct tcf_idrinfo *idrinfo = tn->idrinfo; - spin_lock_bh(&hinfo->lock); - hlist_add_head(&a->tcfa_head, &hinfo->htab[h]); - spin_unlock_bh(&hinfo->lock); + spin_lock_bh(&idrinfo->lock); + idr_replace_ext(&idrinfo->action_idr, a, a->tcfa_index); + spin_unlock_bh(&idrinfo->lock); } -EXPORT_SYMBOL(tcf_hash_insert); +EXPORT_SYMBOL(tcf_idr_insert); -void tcf_hashinfo_destroy(const struct tc_action_ops *ops, - struct tcf_hashinfo *hinfo) +void tcf_idrinfo_destroy(const struct tc_action_ops *ops, + struct tcf_idrinfo *idrinfo) { - int i; - - for (i = 0; i < hinfo->hmask + 1; i++) { - struct tc_action *p; - struct hlist_node *n; - - hlist_for_each_entry_safe(p, n, &hinfo->htab[i], tcfa_head) { - int ret; + struct idr *idr = &idrinfo->action_idr; + struct tc_action *p; + int ret; + unsigned long id = 1; - ret = __tcf_hash_release(p, false, true); - if (ret == ACT_P_DELETED) - module_put(ops->owner); - else if (ret < 0) - return; - } + idr_for_each_entry_ext(idr, p, id) { + ret = __tcf_idr_release(p, false, true); + if (ret == ACT_P_DELETED) + module_put(ops->owner); + else if (ret < 0) + return; } - kfree(hinfo->htab); + idr_destroy(&idrinfo->action_idr); } -EXPORT_SYMBOL(tcf_hashinfo_destroy); +EXPORT_SYMBOL(tcf_idrinfo_destroy); static LIST_HEAD(act_base); static DEFINE_RWLOCK(act_mod_lock); @@ -460,9 +471,10 @@ static struct tc_action_ops *tc_lookup_action(struct nlattr *kind) int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions, int nr_actions, struct tcf_result *res) { - int ret = -1, i; u32 jmp_prgcnt = 0; u32 jmp_ttl = TCA_ACT_MAX_PRIO; /*matches actions per filter */ + int i; + int ret = TC_ACT_OK; if (skb_skip_tc_classify(skb)) return TC_ACT_OK; @@ -506,13 +518,15 @@ EXPORT_SYMBOL(tcf_action_exec); int tcf_action_destroy(struct list_head *actions, int bind) { + const struct tc_action_ops *ops; struct tc_action *a, *tmp; int ret = 0; list_for_each_entry_safe(a, tmp, actions, list) { - ret = __tcf_hash_release(a, bind, true); + ops = a->ops; + ret = __tcf_idr_release(a, bind, true); if (ret == ACT_P_DELETED) - module_put(a->ops->owner); + module_put(ops->owner); else if (ret < 0) return ret; } @@ -1068,11 +1082,18 @@ static int tcf_action_add(struct net *net, struct nlattr *nla, return tcf_add_notify(net, n, &actions, portid); } +static u32 tcaa_root_flags_allowed = TCA_FLAG_LARGE_DUMP_ON; +static const struct nla_policy tcaa_policy[TCA_ROOT_MAX + 1] = { + [TCA_ROOT_FLAGS] = { .type = NLA_BITFIELD32, + .validation_data = &tcaa_root_flags_allowed }, + [TCA_ROOT_TIME_DELTA] = { .type = NLA_U32 }, +}; + static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); - struct nlattr *tca[TCA_ACT_MAX + 1]; + struct nlattr *tca[TCA_ROOT_MAX + 1]; u32 portid = skb ? NETLINK_CB(skb).portid : 0; int ret = 0, ovr = 0; @@ -1080,7 +1101,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; - ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL, + ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ROOT_MAX, NULL, extack); if (ret < 0) return ret; @@ -1121,16 +1142,12 @@ replay: return ret; } -static struct nlattr *find_dump_kind(const struct nlmsghdr *n) +static struct nlattr *find_dump_kind(struct nlattr **nla) { struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1]; struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; - struct nlattr *nla[TCAA_MAX + 1]; struct nlattr *kind; - if (nlmsg_parse(n, sizeof(struct tcamsg), nla, TCAA_MAX, - NULL, NULL) < 0) - return NULL; tb1 = nla[TCA_ACT_TAB]; if (tb1 == NULL) return NULL; @@ -1157,8 +1174,20 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) struct tc_action_ops *a_o; int ret = 0; struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh); - struct nlattr *kind = find_dump_kind(cb->nlh); + struct nlattr *tb[TCA_ROOT_MAX + 1]; + struct nlattr *count_attr = NULL; + unsigned long jiffy_since = 0; + struct nlattr *kind = NULL; + struct nla_bitfield32 bf; + u32 msecs_since = 0; + u32 act_count = 0; + + ret = nlmsg_parse(cb->nlh, sizeof(struct tcamsg), tb, TCA_ROOT_MAX, + tcaa_policy, NULL); + if (ret < 0) + return ret; + kind = find_dump_kind(tb); if (kind == NULL) { pr_info("tc_dump_action: action bad kind\n"); return 0; @@ -1168,14 +1197,32 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) if (a_o == NULL) return 0; + cb->args[2] = 0; + if (tb[TCA_ROOT_FLAGS]) { + bf = nla_get_bitfield32(tb[TCA_ROOT_FLAGS]); + cb->args[2] = bf.value; + } + + if (tb[TCA_ROOT_TIME_DELTA]) { + msecs_since = nla_get_u32(tb[TCA_ROOT_TIME_DELTA]); + } + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb->nlh->nlmsg_type, sizeof(*t), 0); if (!nlh) goto out_module_put; + + if (msecs_since) + jiffy_since = jiffies - msecs_to_jiffies(msecs_since); + t = nlmsg_data(nlh); t->tca_family = AF_UNSPEC; t->tca__pad1 = 0; t->tca__pad2 = 0; + cb->args[3] = jiffy_since; + count_attr = nla_reserve(skb, TCA_ROOT_COUNT, sizeof(u32)); + if (!count_attr) + goto out_module_put; nest = nla_nest_start(skb, TCA_ACT_TAB); if (nest == NULL) @@ -1188,6 +1235,9 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) if (ret > 0) { nla_nest_end(skb, nest); ret = skb->len; + act_count = cb->args[1]; + memcpy(nla_data(count_attr), &act_count, sizeof(u32)); + cb->args[1] = 0; } else nlmsg_trim(skb, b); @@ -1203,12 +1253,231 @@ out_module_put: return skb->len; } +struct tcf_action_net { + struct rhashtable egdev_ht; +}; + +static unsigned int tcf_action_net_id; + +struct tcf_action_egdev_cb { + struct list_head list; + tc_setup_cb_t *cb; + void *cb_priv; +}; + +struct tcf_action_egdev { + struct rhash_head ht_node; + const struct net_device *dev; + unsigned int refcnt; + struct list_head cb_list; +}; + +static const struct rhashtable_params tcf_action_egdev_ht_params = { + .key_offset = offsetof(struct tcf_action_egdev, dev), + .head_offset = offsetof(struct tcf_action_egdev, ht_node), + .key_len = sizeof(const struct net_device *), +}; + +static struct tcf_action_egdev * +tcf_action_egdev_lookup(const struct net_device *dev) +{ + struct net *net = dev_net(dev); + struct tcf_action_net *tan = net_generic(net, tcf_action_net_id); + + return rhashtable_lookup_fast(&tan->egdev_ht, &dev, + tcf_action_egdev_ht_params); +} + +static struct tcf_action_egdev * +tcf_action_egdev_get(const struct net_device *dev) +{ + struct tcf_action_egdev *egdev; + struct tcf_action_net *tan; + + egdev = tcf_action_egdev_lookup(dev); + if (egdev) + goto inc_ref; + + egdev = kzalloc(sizeof(*egdev), GFP_KERNEL); + if (!egdev) + return NULL; + INIT_LIST_HEAD(&egdev->cb_list); + egdev->dev = dev; + tan = net_generic(dev_net(dev), tcf_action_net_id); + rhashtable_insert_fast(&tan->egdev_ht, &egdev->ht_node, + tcf_action_egdev_ht_params); + +inc_ref: + egdev->refcnt++; + return egdev; +} + +static void tcf_action_egdev_put(struct tcf_action_egdev *egdev) +{ + struct tcf_action_net *tan; + + if (--egdev->refcnt) + return; + tan = net_generic(dev_net(egdev->dev), tcf_action_net_id); + rhashtable_remove_fast(&tan->egdev_ht, &egdev->ht_node, + tcf_action_egdev_ht_params); + kfree(egdev); +} + +static struct tcf_action_egdev_cb * +tcf_action_egdev_cb_lookup(struct tcf_action_egdev *egdev, + tc_setup_cb_t *cb, void *cb_priv) +{ + struct tcf_action_egdev_cb *egdev_cb; + + list_for_each_entry(egdev_cb, &egdev->cb_list, list) + if (egdev_cb->cb == cb && egdev_cb->cb_priv == cb_priv) + return egdev_cb; + return NULL; +} + +static int tcf_action_egdev_cb_call(struct tcf_action_egdev *egdev, + enum tc_setup_type type, + void *type_data, bool err_stop) +{ + struct tcf_action_egdev_cb *egdev_cb; + int ok_count = 0; + int err; + + list_for_each_entry(egdev_cb, &egdev->cb_list, list) { + err = egdev_cb->cb(type, type_data, egdev_cb->cb_priv); + if (err) { + if (err_stop) + return err; + } else { + ok_count++; + } + } + return ok_count; +} + +static int tcf_action_egdev_cb_add(struct tcf_action_egdev *egdev, + tc_setup_cb_t *cb, void *cb_priv) +{ + struct tcf_action_egdev_cb *egdev_cb; + + egdev_cb = tcf_action_egdev_cb_lookup(egdev, cb, cb_priv); + if (WARN_ON(egdev_cb)) + return -EEXIST; + egdev_cb = kzalloc(sizeof(*egdev_cb), GFP_KERNEL); + if (!egdev_cb) + return -ENOMEM; + egdev_cb->cb = cb; + egdev_cb->cb_priv = cb_priv; + list_add(&egdev_cb->list, &egdev->cb_list); + return 0; +} + +static void tcf_action_egdev_cb_del(struct tcf_action_egdev *egdev, + tc_setup_cb_t *cb, void *cb_priv) +{ + struct tcf_action_egdev_cb *egdev_cb; + + egdev_cb = tcf_action_egdev_cb_lookup(egdev, cb, cb_priv); + if (WARN_ON(!egdev_cb)) + return; + list_del(&egdev_cb->list); + kfree(egdev_cb); +} + +static int __tc_setup_cb_egdev_register(const struct net_device *dev, + tc_setup_cb_t *cb, void *cb_priv) +{ + struct tcf_action_egdev *egdev = tcf_action_egdev_get(dev); + int err; + + if (!egdev) + return -ENOMEM; + err = tcf_action_egdev_cb_add(egdev, cb, cb_priv); + if (err) + goto err_cb_add; + return 0; + +err_cb_add: + tcf_action_egdev_put(egdev); + return err; +} +int tc_setup_cb_egdev_register(const struct net_device *dev, + tc_setup_cb_t *cb, void *cb_priv) +{ + int err; + + rtnl_lock(); + err = __tc_setup_cb_egdev_register(dev, cb, cb_priv); + rtnl_unlock(); + return err; +} +EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_register); + +static void __tc_setup_cb_egdev_unregister(const struct net_device *dev, + tc_setup_cb_t *cb, void *cb_priv) +{ + struct tcf_action_egdev *egdev = tcf_action_egdev_lookup(dev); + + if (WARN_ON(!egdev)) + return; + tcf_action_egdev_cb_del(egdev, cb, cb_priv); + tcf_action_egdev_put(egdev); +} +void tc_setup_cb_egdev_unregister(const struct net_device *dev, + tc_setup_cb_t *cb, void *cb_priv) +{ + rtnl_lock(); + __tc_setup_cb_egdev_unregister(dev, cb, cb_priv); + rtnl_unlock(); +} +EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_unregister); + +int tc_setup_cb_egdev_call(const struct net_device *dev, + enum tc_setup_type type, void *type_data, + bool err_stop) +{ + struct tcf_action_egdev *egdev = tcf_action_egdev_lookup(dev); + + if (!egdev) + return 0; + return tcf_action_egdev_cb_call(egdev, type, type_data, err_stop); +} +EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_call); + +static __net_init int tcf_action_net_init(struct net *net) +{ + struct tcf_action_net *tan = net_generic(net, tcf_action_net_id); + + return rhashtable_init(&tan->egdev_ht, &tcf_action_egdev_ht_params); +} + +static void __net_exit tcf_action_net_exit(struct net *net) +{ + struct tcf_action_net *tan = net_generic(net, tcf_action_net_id); + + rhashtable_destroy(&tan->egdev_ht); +} + +static struct pernet_operations tcf_action_net_ops = { + .init = tcf_action_net_init, + .exit = tcf_action_net_exit, + .id = &tcf_action_net_id, + .size = sizeof(struct tcf_action_net), +}; + static int __init tc_action_init(void) { - rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL, NULL); - rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL, NULL); + int err; + + err = register_pernet_subsys(&tcf_action_net_ops); + if (err) + return err; + + rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL, 0); + rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL, 0); rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, tc_dump_action, - NULL); + 0); return 0; } diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 9afe1337cfd1..5ef8ce8c83d4 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -21,7 +21,6 @@ #include <linux/tc_act/tc_bpf.h> #include <net/tc_act/tc_bpf.h> -#define BPF_TAB_MASK 15 #define ACT_BPF_NAME_LEN 256 struct tcf_bpf_cfg { @@ -50,11 +49,11 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, filter = rcu_dereference(prog->filter); if (at_ingress) { __skb_push(skb, skb->mac_len); - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); filter_res = BPF_PROG_RUN(filter, skb); __skb_pull(skb, skb->mac_len); } else { - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); filter_res = BPF_PROG_RUN(filter, skb); } rcu_read_unlock(); @@ -295,9 +294,9 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, parm = nla_data(tb[TCA_ACT_BPF_PARMS]); - if (!tcf_hash_check(tn, parm->index, act, bind)) { - ret = tcf_hash_create(tn, parm->index, est, act, - &act_bpf_ops, bind, true); + if (!tcf_idr_check(tn, parm->index, act, bind)) { + ret = tcf_idr_create(tn, parm->index, est, act, + &act_bpf_ops, bind, true); if (ret < 0) return ret; @@ -307,7 +306,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, if (bind) return 0; - tcf_hash_release(*act, bind); + tcf_idr_release(*act, bind); if (!replace) return -EEXIST; } @@ -343,7 +342,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, rcu_assign_pointer(prog->filter, cfg.filter); if (res == ACT_P_CREATED) { - tcf_hash_insert(tn, *act); + tcf_idr_insert(tn, *act); } else { /* make sure the program being replaced is no longer executing */ synchronize_rcu(); @@ -353,7 +352,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, return res; out: if (res == ACT_P_CREATED) - tcf_hash_cleanup(*act, est); + tcf_idr_cleanup(*act, est); return ret; } @@ -379,7 +378,7 @@ static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, bpf_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_bpf_ops __read_mostly = { @@ -399,7 +398,7 @@ static __net_init int bpf_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, bpf_net_id); - return tc_action_net_init(tn, &act_bpf_ops, BPF_TAB_MASK); + return tc_action_net_init(tn, &act_bpf_ops); } static void __net_exit bpf_exit_net(struct net *net) diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 2155bc6c6a1e..10b7a8855a6c 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -28,8 +28,6 @@ #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_zones.h> -#define CONNMARK_TAB_MASK 3 - static unsigned int connmark_net_id; static struct tc_action_ops act_connmark_ops; @@ -119,9 +117,9 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, parm = nla_data(tb[TCA_CONNMARK_PARMS]); - if (!tcf_hash_check(tn, parm->index, a, bind)) { - ret = tcf_hash_create(tn, parm->index, est, a, - &act_connmark_ops, bind, false); + if (!tcf_idr_check(tn, parm->index, a, bind)) { + ret = tcf_idr_create(tn, parm->index, est, a, + &act_connmark_ops, bind, false); if (ret) return ret; @@ -130,13 +128,13 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, ci->net = net; ci->zone = parm->zone; - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); ret = ACT_P_CREATED; } else { ci = to_connmark(*a); if (bind) return 0; - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; /* replacing action and zone */ @@ -189,7 +187,7 @@ static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, connmark_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_connmark_ops = { @@ -208,7 +206,7 @@ static __net_init int connmark_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, connmark_net_id); - return tc_action_net_init(tn, &act_connmark_ops, CONNMARK_TAB_MASK); + return tc_action_net_init(tn, &act_connmark_ops); } static void __net_exit connmark_exit_net(struct net *net) diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 3317a2f579da..d836f998117b 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -37,8 +37,6 @@ #include <linux/tc_act/tc_csum.h> #include <net/tc_act/tc_csum.h> -#define CSUM_TAB_MASK 15 - static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = { [TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), }, }; @@ -67,16 +65,16 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, return -EINVAL; parm = nla_data(tb[TCA_CSUM_PARMS]); - if (!tcf_hash_check(tn, parm->index, a, bind)) { - ret = tcf_hash_create(tn, parm->index, est, a, - &act_csum_ops, bind, false); + if (!tcf_idr_check(tn, parm->index, a, bind)) { + ret = tcf_idr_create(tn, parm->index, est, a, + &act_csum_ops, bind, false); if (ret) return ret; ret = ACT_P_CREATED; } else { if (bind)/* dont override defaults */ return 0; - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; } @@ -88,7 +86,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, spin_unlock_bh(&p->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; } @@ -615,7 +613,7 @@ static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, csum_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_csum_ops = { @@ -634,7 +632,7 @@ static __net_init int csum_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, csum_net_id); - return tc_action_net_init(tn, &act_csum_ops, CSUM_TAB_MASK); + return tc_action_net_init(tn, &act_csum_ops); } static void __net_exit csum_exit_net(struct net *net) diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 99afe8b1f1fb..e29a48ef7fc3 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -23,8 +23,6 @@ #include <linux/tc_act/tc_gact.h> #include <net/tc_act/tc_gact.h> -#define GACT_TAB_MASK 15 - static unsigned int gact_net_id; static struct tc_action_ops act_gact_ops; @@ -92,16 +90,16 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, } #endif - if (!tcf_hash_check(tn, parm->index, a, bind)) { - ret = tcf_hash_create(tn, parm->index, est, a, - &act_gact_ops, bind, true); + if (!tcf_idr_check(tn, parm->index, a, bind)) { + ret = tcf_idr_create(tn, parm->index, est, a, + &act_gact_ops, bind, true); if (ret) return ret; ret = ACT_P_CREATED; } else { if (bind)/* dont override defaults */ return 0; - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; } @@ -122,7 +120,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, } #endif if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; } @@ -214,7 +212,7 @@ static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, gact_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_gact_ops = { @@ -234,7 +232,7 @@ static __net_init int gact_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, gact_net_id); - return tc_action_net_init(tn, &act_gact_ops, GACT_TAB_MASK); + return tc_action_net_init(tn, &act_gact_ops); } static void __net_exit gact_exit_net(struct net *net) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index c5dec308b8b1..3007cb1310ea 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -34,8 +34,6 @@ #include <linux/etherdevice.h> #include <net/ife.h> -#define IFE_TAB_MASK 15 - static unsigned int ife_net_id; static int max_metacnt = IFE_META_MAX + 1; static struct tc_action_ops act_ife_ops; @@ -250,6 +248,22 @@ static int ife_validate_metatype(struct tcf_meta_ops *ops, void *val, int len) return ret; } +#ifdef CONFIG_MODULES +static const char *ife_meta_id2name(u32 metaid) +{ + switch (metaid) { + case IFE_META_SKBMARK: + return "skbmark"; + case IFE_META_PRIO: + return "skbprio"; + case IFE_META_TCINDEX: + return "tcindex"; + default: + return "unknown"; + } +} +#endif + /* called when adding new meta information * under ife->tcf_lock for existing action */ @@ -265,7 +279,7 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid, if (exists) spin_unlock_bh(&ife->tcf_lock); rtnl_unlock(); - request_module("ifemeta%u", metaid); + request_module("ife-meta-%s", ife_meta_id2name(metaid)); rtnl_lock(); if (exists) spin_lock_bh(&ife->tcf_lock); @@ -394,10 +408,14 @@ static void _tcf_ife_cleanup(struct tc_action *a, int bind) static void tcf_ife_cleanup(struct tc_action *a, int bind) { struct tcf_ife_info *ife = to_ife(a); + struct tcf_ife_params *p; spin_lock_bh(&ife->tcf_lock); _tcf_ife_cleanup(a, bind); spin_unlock_bh(&ife->tcf_lock); + + p = rcu_dereference_protected(ife->params, 1); + kfree_rcu(p, rcu); } /* under ife->tcf_lock for existing action */ @@ -434,9 +452,10 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, struct tc_action_net *tn = net_generic(net, ife_net_id); struct nlattr *tb[TCA_IFE_MAX + 1]; struct nlattr *tb2[IFE_META_MAX + 1]; + struct tcf_ife_params *p, *p_old; struct tcf_ife_info *ife; + u16 ife_type = ETH_P_IFE; struct tc_ife *parm; - u16 ife_type = 0; u8 *daddr = NULL; u8 *saddr = NULL; bool exists = false; @@ -452,63 +471,70 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, parm = nla_data(tb[TCA_IFE_PARMS]); - exists = tcf_hash_check(tn, parm->index, a, bind); - if (exists && bind) - return 0; + /* IFE_DECODE is 0 and indicates the opposite of IFE_ENCODE because + * they cannot run as the same time. Check on all other values which + * are not supported right now. + */ + if (parm->flags & ~IFE_ENCODE) + return -EINVAL; - if (parm->flags & IFE_ENCODE) { - /* Until we get issued the ethertype, we cant have - * a default.. - **/ - if (!tb[TCA_IFE_TYPE]) { - if (exists) - tcf_hash_release(*a, bind); - pr_info("You MUST pass etherype for encoding\n"); - return -EINVAL; - } + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (!p) + return -ENOMEM; + + exists = tcf_idr_check(tn, parm->index, a, bind); + if (exists && bind) { + kfree(p); + return 0; } if (!exists) { - ret = tcf_hash_create(tn, parm->index, est, a, &act_ife_ops, - bind, false); - if (ret) + ret = tcf_idr_create(tn, parm->index, est, a, &act_ife_ops, + bind, true); + if (ret) { + kfree(p); return ret; + } ret = ACT_P_CREATED; } else { - tcf_hash_release(*a, bind); - if (!ovr) + tcf_idr_release(*a, bind); + if (!ovr) { + kfree(p); return -EEXIST; + } } ife = to_ife(*a); - ife->flags = parm->flags; + p->flags = parm->flags; if (parm->flags & IFE_ENCODE) { - ife_type = nla_get_u16(tb[TCA_IFE_TYPE]); + if (tb[TCA_IFE_TYPE]) + ife_type = nla_get_u16(tb[TCA_IFE_TYPE]); if (tb[TCA_IFE_DMAC]) daddr = nla_data(tb[TCA_IFE_DMAC]); if (tb[TCA_IFE_SMAC]) saddr = nla_data(tb[TCA_IFE_SMAC]); } - if (exists) - spin_lock_bh(&ife->tcf_lock); ife->tcf_action = parm->action; if (parm->flags & IFE_ENCODE) { if (daddr) - ether_addr_copy(ife->eth_dst, daddr); + ether_addr_copy(p->eth_dst, daddr); else - eth_zero_addr(ife->eth_dst); + eth_zero_addr(p->eth_dst); if (saddr) - ether_addr_copy(ife->eth_src, saddr); + ether_addr_copy(p->eth_src, saddr); else - eth_zero_addr(ife->eth_src); + eth_zero_addr(p->eth_src); - ife->eth_type = ife_type; + p->eth_type = ife_type; } + if (exists) + spin_lock_bh(&ife->tcf_lock); + if (ret == ACT_P_CREATED) INIT_LIST_HEAD(&ife->metalist); @@ -518,12 +544,13 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, if (err) { metadata_parse_err: if (exists) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (ret == ACT_P_CREATED) _tcf_ife_cleanup(*a, bind); if (exists) spin_unlock_bh(&ife->tcf_lock); + kfree(p); return err; } @@ -544,6 +571,7 @@ metadata_parse_err: if (exists) spin_unlock_bh(&ife->tcf_lock); + kfree(p); return err; } } @@ -551,8 +579,13 @@ metadata_parse_err: if (exists) spin_unlock_bh(&ife->tcf_lock); + p_old = rtnl_dereference(ife->params); + rcu_assign_pointer(ife->params, p); + if (p_old) + kfree_rcu(p_old, rcu); + if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; } @@ -562,12 +595,13 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind, { unsigned char *b = skb_tail_pointer(skb); struct tcf_ife_info *ife = to_ife(a); + struct tcf_ife_params *p = rtnl_dereference(ife->params); struct tc_ife opt = { .index = ife->tcf_index, .refcnt = ife->tcf_refcnt - ref, .bindcnt = ife->tcf_bindcnt - bind, .action = ife->tcf_action, - .flags = ife->flags, + .flags = p->flags, }; struct tcf_t t; @@ -578,17 +612,17 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind, if (nla_put_64bit(skb, TCA_IFE_TM, sizeof(t), &t, TCA_IFE_PAD)) goto nla_put_failure; - if (!is_zero_ether_addr(ife->eth_dst)) { - if (nla_put(skb, TCA_IFE_DMAC, ETH_ALEN, ife->eth_dst)) + if (!is_zero_ether_addr(p->eth_dst)) { + if (nla_put(skb, TCA_IFE_DMAC, ETH_ALEN, p->eth_dst)) goto nla_put_failure; } - if (!is_zero_ether_addr(ife->eth_src)) { - if (nla_put(skb, TCA_IFE_SMAC, ETH_ALEN, ife->eth_src)) + if (!is_zero_ether_addr(p->eth_src)) { + if (nla_put(skb, TCA_IFE_SMAC, ETH_ALEN, p->eth_src)) goto nla_put_failure; } - if (nla_put(skb, TCA_IFE_TYPE, 2, &ife->eth_type)) + if (nla_put(skb, TCA_IFE_TYPE, 2, &p->eth_type)) goto nla_put_failure; if (dump_metalist(skb, ife)) { @@ -630,19 +664,15 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a, u8 *tlv_data; u16 metalen; - spin_lock(&ife->tcf_lock); - bstats_update(&ife->tcf_bstats, skb); + bstats_cpu_update(this_cpu_ptr(ife->common.cpu_bstats), skb); tcf_lastuse_update(&ife->tcf_tm); - spin_unlock(&ife->tcf_lock); if (skb_at_tc_ingress(skb)) skb_push(skb, skb->dev->hard_header_len); tlv_data = ife_decode(skb, &metalen); if (unlikely(!tlv_data)) { - spin_lock(&ife->tcf_lock); - ife->tcf_qstats.drops++; - spin_unlock(&ife->tcf_lock); + qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats)); return TC_ACT_SHOT; } @@ -660,14 +690,12 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a, */ pr_info_ratelimited("Unknown metaid %d dlen %d\n", mtype, dlen); - ife->tcf_qstats.overlimits++; + qstats_overlimit_inc(this_cpu_ptr(ife->common.cpu_qstats)); } } if (WARN_ON(tlv_data != ifehdr_end)) { - spin_lock(&ife->tcf_lock); - ife->tcf_qstats.drops++; - spin_unlock(&ife->tcf_lock); + qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats)); return TC_ACT_SHOT; } @@ -696,7 +724,7 @@ static int ife_get_sz(struct sk_buff *skb, struct tcf_ife_info *ife) } static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, - struct tcf_result *res) + struct tcf_result *res, struct tcf_ife_params *p) { struct tcf_ife_info *ife = to_ife(a); int action = ife->tcf_action; @@ -719,23 +747,20 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, exceed_mtu = true; } - spin_lock(&ife->tcf_lock); - bstats_update(&ife->tcf_bstats, skb); + bstats_cpu_update(this_cpu_ptr(ife->common.cpu_bstats), skb); tcf_lastuse_update(&ife->tcf_tm); if (!metalen) { /* no metadata to send */ /* abuse overlimits to count when we allow packet * with no metadata */ - ife->tcf_qstats.overlimits++; - spin_unlock(&ife->tcf_lock); + qstats_overlimit_inc(this_cpu_ptr(ife->common.cpu_qstats)); return action; } /* could be stupid policy setup or mtu config * so lets be conservative.. */ if ((action == TC_ACT_SHOT) || exceed_mtu) { - ife->tcf_qstats.drops++; - spin_unlock(&ife->tcf_lock); + qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats)); return TC_ACT_SHOT; } @@ -744,6 +769,8 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, ife_meta = ife_encode(skb, metalen); + spin_lock(&ife->tcf_lock); + /* XXX: we dont have a clever way of telling encode to * not repeat some of the computations that are done by * ops->presence_check... @@ -755,25 +782,24 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, } if (err < 0) { /* too corrupt to keep around if overwritten */ - ife->tcf_qstats.drops++; spin_unlock(&ife->tcf_lock); + qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats)); return TC_ACT_SHOT; } skboff += err; } + spin_unlock(&ife->tcf_lock); oethh = (struct ethhdr *)skb->data; - if (!is_zero_ether_addr(ife->eth_src)) - ether_addr_copy(oethh->h_source, ife->eth_src); - if (!is_zero_ether_addr(ife->eth_dst)) - ether_addr_copy(oethh->h_dest, ife->eth_dst); - oethh->h_proto = htons(ife->eth_type); + if (!is_zero_ether_addr(p->eth_src)) + ether_addr_copy(oethh->h_source, p->eth_src); + if (!is_zero_ether_addr(p->eth_dst)) + ether_addr_copy(oethh->h_dest, p->eth_dst); + oethh->h_proto = htons(p->eth_type); if (skb_at_tc_ingress(skb)) skb_pull(skb, skb->dev->hard_header_len); - spin_unlock(&ife->tcf_lock); - return action; } @@ -781,21 +807,19 @@ static int tcf_ife_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_ife_info *ife = to_ife(a); + struct tcf_ife_params *p; + int ret; + + rcu_read_lock(); + p = rcu_dereference(ife->params); + if (p->flags & IFE_ENCODE) { + ret = tcf_ife_encode(skb, a, res, p); + rcu_read_unlock(); + return ret; + } + rcu_read_unlock(); - if (ife->flags & IFE_ENCODE) - return tcf_ife_encode(skb, a, res); - - if (!(ife->flags & IFE_ENCODE)) - return tcf_ife_decode(skb, a, res); - - pr_info_ratelimited("unknown failure(policy neither de/encode\n"); - spin_lock(&ife->tcf_lock); - bstats_update(&ife->tcf_bstats, skb); - tcf_lastuse_update(&ife->tcf_tm); - ife->tcf_qstats.drops++; - spin_unlock(&ife->tcf_lock); - - return TC_ACT_SHOT; + return tcf_ife_decode(skb, a, res); } static int tcf_ife_walker(struct net *net, struct sk_buff *skb, @@ -811,7 +835,7 @@ static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, ife_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_ife_ops = { @@ -831,7 +855,7 @@ static __net_init int ife_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, ife_net_id); - return tc_action_net_init(tn, &act_ife_ops, IFE_TAB_MASK); + return tc_action_net_init(tn, &act_ife_ops); } static void __net_exit ife_exit_net(struct net *net) diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 36f0ced9e60c..d9e399a7e3d5 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -28,19 +28,18 @@ #include <linux/netfilter_ipv4/ip_tables.h> -#define IPT_TAB_MASK 15 - static unsigned int ipt_net_id; static struct tc_action_ops act_ipt_ops; static unsigned int xt_net_id; static struct tc_action_ops act_xt_ops; -static int ipt_init_target(struct xt_entry_target *t, char *table, - unsigned int hook) +static int ipt_init_target(struct net *net, struct xt_entry_target *t, + char *table, unsigned int hook) { struct xt_tgchk_param par; struct xt_target *target; + struct ipt_entry e = {}; int ret = 0; target = xt_request_find_target(AF_INET, t->u.user.name, @@ -49,8 +48,10 @@ static int ipt_init_target(struct xt_entry_target *t, char *table, return PTR_ERR(target); t->u.kernel.target = target; + memset(&par, 0, sizeof(par)); + par.net = net; par.table = table; - par.entryinfo = NULL; + par.entryinfo = &e; par.target = target; par.targinfo = t->data; par.hook_mask = hook; @@ -91,10 +92,11 @@ static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = { [TCA_IPT_TARG] = { .len = sizeof(struct xt_entry_target) }, }; -static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla, +static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla, struct nlattr *est, struct tc_action **a, const struct tc_action_ops *ops, int ovr, int bind) { + struct tc_action_net *tn = net_generic(net, id); struct nlattr *tb[TCA_IPT_MAX + 1]; struct tcf_ipt *ipt; struct xt_entry_target *td, *t; @@ -114,33 +116,33 @@ static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla, if (tb[TCA_IPT_INDEX] != NULL) index = nla_get_u32(tb[TCA_IPT_INDEX]); - exists = tcf_hash_check(tn, index, a, bind); + exists = tcf_idr_check(tn, index, a, bind); if (exists && bind) return 0; if (tb[TCA_IPT_HOOK] == NULL || tb[TCA_IPT_TARG] == NULL) { if (exists) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -EINVAL; } td = (struct xt_entry_target *)nla_data(tb[TCA_IPT_TARG]); if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size) { if (exists) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -EINVAL; } if (!exists) { - ret = tcf_hash_create(tn, index, est, a, ops, bind, - false); + ret = tcf_idr_create(tn, index, est, a, ops, bind, + false); if (ret) return ret; ret = ACT_P_CREATED; } else { if (bind)/* dont override defaults */ return 0; - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; @@ -159,7 +161,7 @@ static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla, if (unlikely(!t)) goto err2; - err = ipt_init_target(t, tname, hook); + err = ipt_init_target(net, t, tname, hook); if (err < 0) goto err3; @@ -176,7 +178,7 @@ static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla, ipt->tcfi_hook = hook; spin_unlock_bh(&ipt->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; err3: @@ -185,7 +187,7 @@ err2: kfree(tname); err1: if (ret == ACT_P_CREATED) - tcf_hash_cleanup(*a, est); + tcf_idr_cleanup(*a, est); return err; } @@ -193,18 +195,16 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind) { - struct tc_action_net *tn = net_generic(net, ipt_net_id); - - return __tcf_ipt_init(tn, nla, est, a, &act_ipt_ops, ovr, bind); + return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, ovr, + bind); } static int tcf_xt_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind) { - struct tc_action_net *tn = net_generic(net, xt_net_id); - - return __tcf_ipt_init(tn, nla, est, a, &act_xt_ops, ovr, bind); + return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, ovr, + bind); } static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a, @@ -314,7 +314,7 @@ static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, ipt_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_ipt_ops = { @@ -334,7 +334,7 @@ static __net_init int ipt_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, ipt_net_id); - return tc_action_net_init(tn, &act_ipt_ops, IPT_TAB_MASK); + return tc_action_net_init(tn, &act_ipt_ops); } static void __net_exit ipt_exit_net(struct net *net) @@ -364,7 +364,7 @@ static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, xt_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_xt_ops = { @@ -384,7 +384,7 @@ static __net_init int xt_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, xt_net_id); - return tc_action_net_init(tn, &act_xt_ops, IPT_TAB_MASK); + return tc_action_net_init(tn, &act_xt_ops); } static void __net_exit xt_exit_net(struct net *net) diff --git a/net/sched/act_meta_mark.c b/net/sched/act_meta_mark.c index 82892170ce4f..1e3f10e5da99 100644 --- a/net/sched/act_meta_mark.c +++ b/net/sched/act_meta_mark.c @@ -76,4 +76,4 @@ module_exit(ifemark_cleanup_module); MODULE_AUTHOR("Jamal Hadi Salim(2015)"); MODULE_DESCRIPTION("Inter-FE skb mark metadata module"); MODULE_LICENSE("GPL"); -MODULE_ALIAS_IFE_META(IFE_META_SKBMARK); +MODULE_ALIAS_IFE_META("skbmark"); diff --git a/net/sched/act_meta_skbprio.c b/net/sched/act_meta_skbprio.c index 26bf4d86030b..4033f9fc4d4a 100644 --- a/net/sched/act_meta_skbprio.c +++ b/net/sched/act_meta_skbprio.c @@ -73,4 +73,4 @@ module_exit(ifeprio_cleanup_module); MODULE_AUTHOR("Jamal Hadi Salim(2015)"); MODULE_DESCRIPTION("Inter-FE skb prio metadata action"); MODULE_LICENSE("GPL"); -MODULE_ALIAS_IFE_META(IFE_META_PRIO); +MODULE_ALIAS_IFE_META("skbprio"); diff --git a/net/sched/act_meta_skbtcindex.c b/net/sched/act_meta_skbtcindex.c index 3b35774ce890..2ea1f26c9e96 100644 --- a/net/sched/act_meta_skbtcindex.c +++ b/net/sched/act_meta_skbtcindex.c @@ -76,4 +76,4 @@ module_exit(ifetc_index_cleanup_module); MODULE_AUTHOR("Jamal Hadi Salim(2016)"); MODULE_DESCRIPTION("Inter-FE skb tc_index metadata module"); MODULE_LICENSE("GPL"); -MODULE_ALIAS_IFE_META(IFE_META_SKBTCINDEX); +MODULE_ALIAS_IFE_META("tcindex"); diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 1b5549ababd4..8b3e59388480 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -28,7 +28,6 @@ #include <linux/tc_act/tc_mirred.h> #include <net/tc_act/tc_mirred.h> -#define MIRRED_TAB_MASK 7 static LIST_HEAD(mirred_list); static DEFINE_SPINLOCK(mirred_list_lock); @@ -94,7 +93,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, return -EINVAL; parm = nla_data(tb[TCA_MIRRED_PARMS]); - exists = tcf_hash_check(tn, parm->index, a, bind); + exists = tcf_idr_check(tn, parm->index, a, bind); if (exists && bind) return 0; @@ -106,14 +105,14 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, break; default: if (exists) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -EINVAL; } if (parm->ifindex) { dev = __dev_get_by_index(net, parm->ifindex); if (dev == NULL) { if (exists) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -ENODEV; } mac_header_xmit = dev_is_mac_header_xmit(dev); @@ -124,13 +123,13 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, if (!exists) { if (dev == NULL) return -EINVAL; - ret = tcf_hash_create(tn, parm->index, est, a, - &act_mirred_ops, bind, true); + ret = tcf_idr_create(tn, parm->index, est, a, + &act_mirred_ops, bind, true); if (ret) return ret; ret = ACT_P_CREATED; } else { - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; } @@ -141,6 +140,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, m->tcfm_eaction = parm->eaction; if (dev != NULL) { m->tcfm_ifindex = parm->ifindex; + m->net = net; if (ret != ACT_P_CREATED) dev_put(rcu_dereference_protected(m->tcfm_dev, 1)); dev_hold(dev); @@ -152,7 +152,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, spin_lock_bh(&mirred_list_lock); list_add(&m->tcfm_list, &mirred_list); spin_unlock_bh(&mirred_list_lock); - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); } return ret; @@ -283,7 +283,7 @@ static int tcf_mirred_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, mirred_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static int mirred_device_event(struct notifier_block *unused, @@ -314,15 +314,11 @@ static struct notifier_block mirred_device_notifier = { .notifier_call = mirred_device_event, }; -static int tcf_mirred_device(const struct tc_action *a, struct net *net, - struct net_device **mirred_dev) +static struct net_device *tcf_mirred_get_dev(const struct tc_action *a) { - int ifindex = tcf_mirred_ifindex(a); + struct tcf_mirred *m = to_mirred(a); - *mirred_dev = __dev_get_by_index(net, ifindex); - if (!*mirred_dev) - return -EINVAL; - return 0; + return __dev_get_by_index(m->net, m->tcfm_ifindex); } static struct tc_action_ops act_mirred_ops = { @@ -337,14 +333,14 @@ static struct tc_action_ops act_mirred_ops = { .walk = tcf_mirred_walker, .lookup = tcf_mirred_search, .size = sizeof(struct tcf_mirred), - .get_dev = tcf_mirred_device, + .get_dev = tcf_mirred_get_dev, }; static __net_init int mirred_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, mirred_net_id); - return tc_action_net_init(tn, &act_mirred_ops, MIRRED_TAB_MASK); + return tc_action_net_init(tn, &act_mirred_ops); } static void __net_exit mirred_exit_net(struct net *net) diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 9016ab8a0649..c365d01b99c8 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -29,8 +29,6 @@ #include <net/udp.h> -#define NAT_TAB_MASK 15 - static unsigned int nat_net_id; static struct tc_action_ops act_nat_ops; @@ -58,16 +56,16 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, return -EINVAL; parm = nla_data(tb[TCA_NAT_PARMS]); - if (!tcf_hash_check(tn, parm->index, a, bind)) { - ret = tcf_hash_create(tn, parm->index, est, a, - &act_nat_ops, bind, false); + if (!tcf_idr_check(tn, parm->index, a, bind)) { + ret = tcf_idr_create(tn, parm->index, est, a, + &act_nat_ops, bind, false); if (ret) return ret; ret = ACT_P_CREATED; } else { if (bind) return 0; - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; } @@ -83,7 +81,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, spin_unlock_bh(&p->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; } @@ -290,7 +288,7 @@ static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, nat_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_nat_ops = { @@ -309,7 +307,7 @@ static __net_init int nat_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, nat_net_id); - return tc_action_net_init(tn, &act_nat_ops, NAT_TAB_MASK); + return tc_action_net_init(tn, &act_nat_ops); } static void __net_exit nat_exit_net(struct net *net) diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 7dc5892671c8..491fe5deb09e 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -24,8 +24,6 @@ #include <net/tc_act/tc_pedit.h> #include <uapi/linux/tc_act/tc_pedit.h> -#define PEDIT_TAB_MASK 15 - static unsigned int pedit_net_id; static struct tc_action_ops act_pedit_ops; @@ -168,17 +166,17 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, if (IS_ERR(keys_ex)) return PTR_ERR(keys_ex); - if (!tcf_hash_check(tn, parm->index, a, bind)) { + if (!tcf_idr_check(tn, parm->index, a, bind)) { if (!parm->nkeys) return -EINVAL; - ret = tcf_hash_create(tn, parm->index, est, a, - &act_pedit_ops, bind, false); + ret = tcf_idr_create(tn, parm->index, est, a, + &act_pedit_ops, bind, false); if (ret) return ret; p = to_pedit(*a); keys = kmalloc(ksize, GFP_KERNEL); if (keys == NULL) { - tcf_hash_cleanup(*a, est); + tcf_idr_cleanup(*a, est); kfree(keys_ex); return -ENOMEM; } @@ -186,7 +184,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, } else { if (bind) return 0; - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; p = to_pedit(*a); @@ -214,7 +212,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, spin_unlock_bh(&p->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; } @@ -432,7 +430,7 @@ static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, pedit_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_pedit_ops = { @@ -452,7 +450,7 @@ static __net_init int pedit_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, pedit_net_id); - return tc_action_net_init(tn, &act_pedit_ops, PEDIT_TAB_MASK); + return tc_action_net_init(tn, &act_pedit_ops); } static void __net_exit pedit_exit_net(struct net *net) diff --git a/net/sched/act_police.c b/net/sched/act_police.c index b062bc80c7cb..3bb2ebf9e9ae 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -40,8 +40,6 @@ struct tcf_police { #define to_police(pc) ((struct tcf_police *)pc) -#define POL_TAB_MASK 15 - /* old policer structure from before tc actions */ struct tc_police_compat { u32 index; @@ -101,18 +99,18 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla, return -EINVAL; parm = nla_data(tb[TCA_POLICE_TBF]); - exists = tcf_hash_check(tn, parm->index, a, bind); + exists = tcf_idr_check(tn, parm->index, a, bind); if (exists && bind) return 0; if (!exists) { - ret = tcf_hash_create(tn, parm->index, NULL, a, - &act_police_ops, bind, false); + ret = tcf_idr_create(tn, parm->index, NULL, a, + &act_police_ops, bind, false); if (ret) return ret; ret = ACT_P_CREATED; } else { - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; } @@ -188,7 +186,7 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla, return ret; police->tcfp_t_c = ktime_get_ns(); - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; @@ -196,7 +194,7 @@ failure: qdisc_put_rtab(P_tab); qdisc_put_rtab(R_tab); if (ret == ACT_P_CREATED) - tcf_hash_cleanup(*a, est); + tcf_idr_cleanup(*a, est); return err; } @@ -310,7 +308,7 @@ static int tcf_police_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, police_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } MODULE_AUTHOR("Alexey Kuznetsov"); @@ -333,7 +331,7 @@ static __net_init int police_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, police_net_id); - return tc_action_net_init(tn, &act_police_ops, POL_TAB_MASK); + return tc_action_net_init(tn, &act_police_ops); } static void __net_exit police_exit_net(struct net *net) diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 59d6645a4007..8b5abcd2f32f 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -25,7 +25,6 @@ #include <linux/if_arp.h> -#define SAMPLE_TAB_MASK 7 static unsigned int sample_net_id; static struct tc_action_ops act_sample_ops; @@ -59,18 +58,18 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, parm = nla_data(tb[TCA_SAMPLE_PARMS]); - exists = tcf_hash_check(tn, parm->index, a, bind); + exists = tcf_idr_check(tn, parm->index, a, bind); if (exists && bind) return 0; if (!exists) { - ret = tcf_hash_create(tn, parm->index, est, a, - &act_sample_ops, bind, false); + ret = tcf_idr_create(tn, parm->index, est, a, + &act_sample_ops, bind, false); if (ret) return ret; ret = ACT_P_CREATED; } else { - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; } @@ -82,7 +81,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, psample_group = psample_group_get(net, s->psample_group_num); if (!psample_group) { if (ret == ACT_P_CREATED) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -ENOMEM; } RCU_INIT_POINTER(s->psample_group, psample_group); @@ -93,7 +92,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, } if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; } @@ -221,7 +220,7 @@ static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, sample_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_sample_ops = { @@ -241,7 +240,7 @@ static __net_init int sample_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, sample_net_id); - return tc_action_net_init(tn, &act_sample_ops, SAMPLE_TAB_MASK); + return tc_action_net_init(tn, &act_sample_ops); } static void __net_exit sample_exit_net(struct net *net) @@ -265,12 +264,13 @@ static int __init sample_init_module(void) static void __exit sample_cleanup_module(void) { + rcu_barrier(); tcf_unregister_action(&act_sample_ops, &sample_net_ops); } module_init(sample_init_module); module_exit(sample_cleanup_module); -MODULE_AUTHOR("Yotam Gigi <yotamg@mellanox.com>"); +MODULE_AUTHOR("Yotam Gigi <yotam.gi@gmail.com>"); MODULE_DESCRIPTION("Packet sampling action"); MODULE_LICENSE("GPL v2"); diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 43605e7ce051..e7b57e5071a3 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -24,8 +24,6 @@ #include <linux/tc_act/tc_defact.h> #include <net/tc_act/tc_defact.h> -#define SIMP_TAB_MASK 7 - static unsigned int simp_net_id; static struct tc_action_ops act_simp_ops; @@ -102,28 +100,28 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, return -EINVAL; parm = nla_data(tb[TCA_DEF_PARMS]); - exists = tcf_hash_check(tn, parm->index, a, bind); + exists = tcf_idr_check(tn, parm->index, a, bind); if (exists && bind) return 0; if (tb[TCA_DEF_DATA] == NULL) { if (exists) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -EINVAL; } defdata = nla_data(tb[TCA_DEF_DATA]); if (!exists) { - ret = tcf_hash_create(tn, parm->index, est, a, - &act_simp_ops, bind, false); + ret = tcf_idr_create(tn, parm->index, est, a, + &act_simp_ops, bind, false); if (ret) return ret; d = to_defact(*a); ret = alloc_defdata(d, defdata); if (ret < 0) { - tcf_hash_cleanup(*a, est); + tcf_idr_cleanup(*a, est); return ret; } d->tcf_action = parm->action; @@ -131,7 +129,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, } else { d = to_defact(*a); - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; @@ -139,7 +137,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, } if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; } @@ -183,7 +181,7 @@ static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, simp_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_simp_ops = { @@ -203,7 +201,7 @@ static __net_init int simp_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, simp_net_id); - return tc_action_net_init(tn, &act_simp_ops, SIMP_TAB_MASK); + return tc_action_net_init(tn, &act_simp_ops); } static void __net_exit simp_exit_net(struct net *net) diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 6b3e65d7de0c..59949d61f20d 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -27,8 +27,6 @@ #include <linux/tc_act/tc_skbedit.h> #include <net/tc_act/tc_skbedit.h> -#define SKBEDIT_TAB_MASK 15 - static unsigned int skbedit_net_id; static struct tc_action_ops act_skbedit_ops; @@ -118,18 +116,18 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, parm = nla_data(tb[TCA_SKBEDIT_PARMS]); - exists = tcf_hash_check(tn, parm->index, a, bind); + exists = tcf_idr_check(tn, parm->index, a, bind); if (exists && bind) return 0; if (!flags) { - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -EINVAL; } if (!exists) { - ret = tcf_hash_create(tn, parm->index, est, a, - &act_skbedit_ops, bind, false); + ret = tcf_idr_create(tn, parm->index, est, a, + &act_skbedit_ops, bind, false); if (ret) return ret; @@ -137,7 +135,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, ret = ACT_P_CREATED; } else { d = to_skbedit(*a); - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; } @@ -163,7 +161,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, spin_unlock_bh(&d->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; } @@ -221,7 +219,7 @@ static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, skbedit_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_skbedit_ops = { @@ -240,7 +238,7 @@ static __net_init int skbedit_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, skbedit_net_id); - return tc_action_net_init(tn, &act_skbedit_ops, SKBEDIT_TAB_MASK); + return tc_action_net_init(tn, &act_skbedit_ops); } static void __net_exit skbedit_exit_net(struct net *net) diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index a73c4bbcada2..b642ad3d39dd 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -20,8 +20,6 @@ #include <linux/tc_act/tc_skbmod.h> #include <net/tc_act/tc_skbmod.h> -#define SKBMOD_TAB_MASK 15 - static unsigned int skbmod_net_id; static struct tc_action_ops act_skbmod_ops; @@ -129,7 +127,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, if (parm->flags & SKBMOD_F_SWAPMAC) lflags = SKBMOD_F_SWAPMAC; - exists = tcf_hash_check(tn, parm->index, a, bind); + exists = tcf_idr_check(tn, parm->index, a, bind); if (exists && bind) return 0; @@ -137,14 +135,14 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, return -EINVAL; if (!exists) { - ret = tcf_hash_create(tn, parm->index, est, a, - &act_skbmod_ops, bind, true); + ret = tcf_idr_create(tn, parm->index, est, a, + &act_skbmod_ops, bind, true); if (ret) return ret; ret = ACT_P_CREATED; } else { - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; } @@ -155,7 +153,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL); if (unlikely(!p)) { if (ovr) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -ENOMEM; } @@ -182,7 +180,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, kfree_rcu(p_old, rcu); if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; } @@ -245,7 +243,7 @@ static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, skbmod_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_skbmod_ops = { @@ -265,7 +263,7 @@ static __net_init int skbmod_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, skbmod_net_id); - return tc_action_net_init(tn, &act_skbmod_ops, SKBMOD_TAB_MASK); + return tc_action_net_init(tn, &act_skbmod_ops); } static void __net_exit skbmod_exit_net(struct net *net) diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index fd7e75679c69..30c96274c638 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -20,8 +20,6 @@ #include <linux/tc_act/tc_tunnel_key.h> #include <net/tc_act/tc_tunnel_key.h> -#define TUNNEL_KEY_TAB_MASK 15 - static unsigned int tunnel_key_net_id; static struct tc_action_ops act_tunnel_key_ops; @@ -100,7 +98,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, return -EINVAL; parm = nla_data(tb[TCA_TUNNEL_KEY_PARMS]); - exists = tcf_hash_check(tn, parm->index, a, bind); + exists = tcf_idr_check(tn, parm->index, a, bind); if (exists && bind) return 0; @@ -159,14 +157,14 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, } if (!exists) { - ret = tcf_hash_create(tn, parm->index, est, a, - &act_tunnel_key_ops, bind, true); + ret = tcf_idr_create(tn, parm->index, est, a, + &act_tunnel_key_ops, bind, true); if (ret) return ret; ret = ACT_P_CREATED; } else { - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; } @@ -177,7 +175,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, params_new = kzalloc(sizeof(*params_new), GFP_KERNEL); if (unlikely(!params_new)) { if (ret == ACT_P_CREATED) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -ENOMEM; } @@ -193,13 +191,13 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, kfree_rcu(params_old, rcu); if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; err_out: if (exists) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return ret; } @@ -304,7 +302,7 @@ static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_tunnel_key_ops = { @@ -324,7 +322,7 @@ static __net_init int tunnel_key_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); - return tc_action_net_init(tn, &act_tunnel_key_ops, TUNNEL_KEY_TAB_MASK); + return tc_action_net_init(tn, &act_tunnel_key_ops); } static void __net_exit tunnel_key_exit_net(struct net *net) diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 13ba3a89f675..97f717a13ad5 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -19,8 +19,6 @@ #include <linux/tc_act/tc_vlan.h> #include <net/tc_act/tc_vlan.h> -#define VLAN_TAB_MASK 15 - static unsigned int vlan_net_id; static struct tc_action_ops act_vlan_ops; @@ -28,14 +26,13 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_vlan *v = to_vlan(a); + struct tcf_vlan_params *p; int action; int err; u16 tci; - spin_lock(&v->tcf_lock); tcf_lastuse_update(&v->tcf_tm); - bstats_update(&v->tcf_bstats, skb); - action = v->tcf_action; + bstats_cpu_update(this_cpu_ptr(v->common.cpu_bstats), skb); /* Ensure 'data' points at mac_header prior calling vlan manipulating * functions. @@ -43,15 +40,21 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, if (skb_at_tc_ingress(skb)) skb_push_rcsum(skb, skb->mac_len); - switch (v->tcfv_action) { + rcu_read_lock(); + + action = READ_ONCE(v->tcf_action); + + p = rcu_dereference(v->vlan_p); + + switch (p->tcfv_action) { case TCA_VLAN_ACT_POP: err = skb_vlan_pop(skb); if (err) goto drop; break; case TCA_VLAN_ACT_PUSH: - err = skb_vlan_push(skb, v->tcfv_push_proto, v->tcfv_push_vid | - (v->tcfv_push_prio << VLAN_PRIO_SHIFT)); + err = skb_vlan_push(skb, p->tcfv_push_proto, p->tcfv_push_vid | + (p->tcfv_push_prio << VLAN_PRIO_SHIFT)); if (err) goto drop; break; @@ -70,14 +73,14 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, goto drop; } /* replace the vid */ - tci = (tci & ~VLAN_VID_MASK) | v->tcfv_push_vid; + tci = (tci & ~VLAN_VID_MASK) | p->tcfv_push_vid; /* replace prio bits, if tcfv_push_prio specified */ - if (v->tcfv_push_prio) { + if (p->tcfv_push_prio) { tci &= ~VLAN_PRIO_MASK; - tci |= v->tcfv_push_prio << VLAN_PRIO_SHIFT; + tci |= p->tcfv_push_prio << VLAN_PRIO_SHIFT; } /* put updated tci as hwaccel tag */ - __vlan_hwaccel_put_tag(skb, v->tcfv_push_proto, tci); + __vlan_hwaccel_put_tag(skb, p->tcfv_push_proto, tci); break; default: BUG(); @@ -87,12 +90,13 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, drop: action = TC_ACT_SHOT; - v->tcf_qstats.drops++; + qstats_drop_inc(this_cpu_ptr(v->common.cpu_qstats)); + unlock: + rcu_read_unlock(); if (skb_at_tc_ingress(skb)) skb_pull_rcsum(skb, skb->mac_len); - spin_unlock(&v->tcf_lock); return action; } @@ -109,6 +113,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, { struct tc_action_net *tn = net_generic(net, vlan_net_id); struct nlattr *tb[TCA_VLAN_MAX + 1]; + struct tcf_vlan_params *p, *p_old; struct tc_vlan *parm; struct tcf_vlan *v; int action; @@ -128,7 +133,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, if (!tb[TCA_VLAN_PARMS]) return -EINVAL; parm = nla_data(tb[TCA_VLAN_PARMS]); - exists = tcf_hash_check(tn, parm->index, a, bind); + exists = tcf_idr_check(tn, parm->index, a, bind); if (exists && bind) return 0; @@ -139,13 +144,13 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, case TCA_VLAN_ACT_MODIFY: if (!tb[TCA_VLAN_PUSH_VLAN_ID]) { if (exists) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -EINVAL; } push_vid = nla_get_u16(tb[TCA_VLAN_PUSH_VLAN_ID]); if (push_vid >= VLAN_VID_MASK) { if (exists) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -ERANGE; } @@ -167,66 +172,87 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, break; default: if (exists) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -EINVAL; } action = parm->v_action; if (!exists) { - ret = tcf_hash_create(tn, parm->index, est, a, - &act_vlan_ops, bind, false); + ret = tcf_idr_create(tn, parm->index, est, a, + &act_vlan_ops, bind, true); if (ret) return ret; ret = ACT_P_CREATED; } else { - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; } v = to_vlan(*a); - spin_lock_bh(&v->tcf_lock); - - v->tcfv_action = action; - v->tcfv_push_vid = push_vid; - v->tcfv_push_prio = push_prio; - v->tcfv_push_proto = push_proto; + ASSERT_RTNL(); + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (!p) { + if (ovr) + tcf_idr_release(*a, bind); + return -ENOMEM; + } v->tcf_action = parm->action; - spin_unlock_bh(&v->tcf_lock); + p_old = rtnl_dereference(v->vlan_p); + + p->tcfv_action = action; + p->tcfv_push_vid = push_vid; + p->tcfv_push_prio = push_prio; + p->tcfv_push_proto = push_proto; + + rcu_assign_pointer(v->vlan_p, p); + + if (p_old) + kfree_rcu(p_old, rcu); if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; } +static void tcf_vlan_cleanup(struct tc_action *a, int bind) +{ + struct tcf_vlan *v = to_vlan(a); + struct tcf_vlan_params *p; + + p = rcu_dereference_protected(v->vlan_p, 1); + kfree_rcu(p, rcu); +} + static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_vlan *v = to_vlan(a); + struct tcf_vlan_params *p = rtnl_dereference(v->vlan_p); struct tc_vlan opt = { .index = v->tcf_index, .refcnt = v->tcf_refcnt - ref, .bindcnt = v->tcf_bindcnt - bind, .action = v->tcf_action, - .v_action = v->tcfv_action, + .v_action = p->tcfv_action, }; struct tcf_t t; if (nla_put(skb, TCA_VLAN_PARMS, sizeof(opt), &opt)) goto nla_put_failure; - if ((v->tcfv_action == TCA_VLAN_ACT_PUSH || - v->tcfv_action == TCA_VLAN_ACT_MODIFY) && - (nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, v->tcfv_push_vid) || + if ((p->tcfv_action == TCA_VLAN_ACT_PUSH || + p->tcfv_action == TCA_VLAN_ACT_MODIFY) && + (nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, p->tcfv_push_vid) || nla_put_be16(skb, TCA_VLAN_PUSH_VLAN_PROTOCOL, - v->tcfv_push_proto) || + p->tcfv_push_proto) || (nla_put_u8(skb, TCA_VLAN_PUSH_VLAN_PRIORITY, - v->tcfv_push_prio)))) + p->tcfv_push_prio)))) goto nla_put_failure; tcf_tm_dump(&t, &v->tcf_tm); @@ -252,7 +278,7 @@ static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, vlan_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_vlan_ops = { @@ -262,6 +288,7 @@ static struct tc_action_ops act_vlan_ops = { .act = tcf_vlan, .dump = tcf_vlan_dump, .init = tcf_vlan_init, + .cleanup = tcf_vlan_cleanup, .walk = tcf_vlan_walker, .lookup = tcf_vlan_search, .size = sizeof(struct tcf_vlan), @@ -271,7 +298,7 @@ static __net_init int vlan_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, vlan_net_id); - return tc_action_net_init(tn, &act_vlan_ops, VLAN_TAB_MASK); + return tc_action_net_init(tn, &act_vlan_ops); } static void __net_exit vlan_exit_net(struct net *net) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 39da0c5801c9..7d97f612c9b9 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -77,6 +77,8 @@ out: } EXPORT_SYMBOL(register_tcf_proto_ops); +static struct workqueue_struct *tc_filter_wq; + int unregister_tcf_proto_ops(struct tcf_proto_ops *ops) { struct tcf_proto_ops *t; @@ -86,6 +88,7 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops) * tcf_proto_ops's destroy() handler. */ rcu_barrier(); + flush_workqueue(tc_filter_wq); write_lock(&cls_mod_lock); list_for_each_entry(t, &tcf_proto_base, head) { @@ -100,20 +103,11 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops) } EXPORT_SYMBOL(unregister_tcf_proto_ops); -static int tfilter_notify(struct net *net, struct sk_buff *oskb, - struct nlmsghdr *n, struct tcf_proto *tp, - unsigned long fh, int event, bool unicast); - -static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb, - struct nlmsghdr *n, - struct tcf_chain *chain, int event) +bool tcf_queue_work(struct work_struct *work) { - struct tcf_proto *tp; - - for (tp = rtnl_dereference(chain->filter_chain); - tp; tp = rtnl_dereference(tp->next)) - tfilter_notify(net, oskb, n, tp, 0, event, false); + return queue_work(tc_filter_wq, work); } +EXPORT_SYMBOL(tcf_queue_work); /* Select new prio value from the range, managed by kernel. */ @@ -201,25 +195,38 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block, return chain; } +static void tcf_chain_head_change(struct tcf_chain *chain, + struct tcf_proto *tp_head) +{ + if (chain->chain_head_change) + chain->chain_head_change(tp_head, + chain->chain_head_change_priv); +} + static void tcf_chain_flush(struct tcf_chain *chain) { - struct tcf_proto *tp; + struct tcf_proto *tp = rtnl_dereference(chain->filter_chain); - if (*chain->p_filter_chain) - RCU_INIT_POINTER(*chain->p_filter_chain, NULL); - while ((tp = rtnl_dereference(chain->filter_chain)) != NULL) { + tcf_chain_head_change(chain, NULL); + while (tp) { RCU_INIT_POINTER(chain->filter_chain, tp->next); tcf_proto_destroy(tp); + tp = rtnl_dereference(chain->filter_chain); + tcf_chain_put(chain); } } static void tcf_chain_destroy(struct tcf_chain *chain) { list_del(&chain->list); - tcf_chain_flush(chain); kfree(chain); } +static void tcf_chain_hold(struct tcf_chain *chain) +{ + ++chain->refcnt; +} + struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, bool create) { @@ -227,36 +234,51 @@ struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, list_for_each_entry(chain, &block->chain_list, list) { if (chain->index == chain_index) { - chain->refcnt++; + tcf_chain_hold(chain); return chain; } } - if (create) - return tcf_chain_create(block, chain_index); - else - return NULL; + + return create ? tcf_chain_create(block, chain_index) : NULL; } EXPORT_SYMBOL(tcf_chain_get); void tcf_chain_put(struct tcf_chain *chain) { - /* Destroy unused chain, with exception of chain 0, which is the - * default one and has to be always present. - */ - if (--chain->refcnt == 0 && !chain->filter_chain && chain->index != 0) + if (--chain->refcnt == 0) tcf_chain_destroy(chain); } EXPORT_SYMBOL(tcf_chain_put); -static void -tcf_chain_filter_chain_ptr_set(struct tcf_chain *chain, - struct tcf_proto __rcu **p_filter_chain) +static void tcf_block_offload_cmd(struct tcf_block *block, struct Qdisc *q, + struct tcf_block_ext_info *ei, + enum tc_block_command command) { - chain->p_filter_chain = p_filter_chain; + struct net_device *dev = q->dev_queue->dev; + struct tc_block_offload bo = {}; + + if (!dev->netdev_ops->ndo_setup_tc) + return; + bo.command = command; + bo.binder_type = ei->binder_type; + bo.block = block; + dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo); } -int tcf_block_get(struct tcf_block **p_block, - struct tcf_proto __rcu **p_filter_chain) +static void tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q, + struct tcf_block_ext_info *ei) +{ + tcf_block_offload_cmd(block, q, ei, TC_BLOCK_BIND); +} + +static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q, + struct tcf_block_ext_info *ei) +{ + tcf_block_offload_cmd(block, q, ei, TC_BLOCK_UNBIND); +} + +int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q, + struct tcf_block_ext_info *ei) { struct tcf_block *block = kzalloc(sizeof(*block), GFP_KERNEL); struct tcf_chain *chain; @@ -265,13 +287,20 @@ int tcf_block_get(struct tcf_block **p_block, if (!block) return -ENOMEM; INIT_LIST_HEAD(&block->chain_list); + INIT_LIST_HEAD(&block->cb_list); + /* Create chain 0 by default, it has to be always present. */ chain = tcf_chain_create(block, 0); if (!chain) { err = -ENOMEM; goto err_chain_create; } - tcf_chain_filter_chain_ptr_set(chain, p_filter_chain); + WARN_ON(!ei->chain_head_change); + chain->chain_head_change = ei->chain_head_change; + chain->chain_head_change_priv = ei->chain_head_change_priv; + block->net = qdisc_net(q); + block->q = q; + tcf_block_offload_bind(block, q, ei); *p_block = block; return 0; @@ -279,21 +308,179 @@ err_chain_create: kfree(block); return err; } +EXPORT_SYMBOL(tcf_block_get_ext); + +static void tcf_chain_head_change_dflt(struct tcf_proto *tp_head, void *priv) +{ + struct tcf_proto __rcu **p_filter_chain = priv; + + rcu_assign_pointer(*p_filter_chain, tp_head); +} + +int tcf_block_get(struct tcf_block **p_block, + struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q) +{ + struct tcf_block_ext_info ei = { + .chain_head_change = tcf_chain_head_change_dflt, + .chain_head_change_priv = p_filter_chain, + }; + + WARN_ON(!p_filter_chain); + return tcf_block_get_ext(p_block, q, &ei); +} EXPORT_SYMBOL(tcf_block_get); -void tcf_block_put(struct tcf_block *block) +static void tcf_block_put_final(struct work_struct *work) { + struct tcf_block *block = container_of(work, struct tcf_block, work); struct tcf_chain *chain, *tmp; - if (!block) - return; - + rtnl_lock(); + /* Only chain 0 should be still here. */ list_for_each_entry_safe(chain, tmp, &block->chain_list, list) - tcf_chain_destroy(chain); + tcf_chain_put(chain); + rtnl_unlock(); kfree(block); } + +/* XXX: Standalone actions are not allowed to jump to any chain, and bound + * actions should be all removed after flushing. However, filters are now + * destroyed in tc filter workqueue with RTNL lock, they can not race here. + */ +void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q, + struct tcf_block_ext_info *ei) +{ + struct tcf_chain *chain, *tmp; + + list_for_each_entry_safe(chain, tmp, &block->chain_list, list) + tcf_chain_flush(chain); + + tcf_block_offload_unbind(block, q, ei); + + INIT_WORK(&block->work, tcf_block_put_final); + /* Wait for existing RCU callbacks to cool down, make sure their works + * have been queued before this. We can not flush pending works here + * because we are holding the RTNL lock. + */ + rcu_barrier(); + tcf_queue_work(&block->work); +} +EXPORT_SYMBOL(tcf_block_put_ext); + +void tcf_block_put(struct tcf_block *block) +{ + struct tcf_block_ext_info ei = {0, }; + + if (!block) + return; + tcf_block_put_ext(block, block->q, &ei); +} + EXPORT_SYMBOL(tcf_block_put); +struct tcf_block_cb { + struct list_head list; + tc_setup_cb_t *cb; + void *cb_ident; + void *cb_priv; + unsigned int refcnt; +}; + +void *tcf_block_cb_priv(struct tcf_block_cb *block_cb) +{ + return block_cb->cb_priv; +} +EXPORT_SYMBOL(tcf_block_cb_priv); + +struct tcf_block_cb *tcf_block_cb_lookup(struct tcf_block *block, + tc_setup_cb_t *cb, void *cb_ident) +{ struct tcf_block_cb *block_cb; + + list_for_each_entry(block_cb, &block->cb_list, list) + if (block_cb->cb == cb && block_cb->cb_ident == cb_ident) + return block_cb; + return NULL; +} +EXPORT_SYMBOL(tcf_block_cb_lookup); + +void tcf_block_cb_incref(struct tcf_block_cb *block_cb) +{ + block_cb->refcnt++; +} +EXPORT_SYMBOL(tcf_block_cb_incref); + +unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb) +{ + return --block_cb->refcnt; +} +EXPORT_SYMBOL(tcf_block_cb_decref); + +struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block, + tc_setup_cb_t *cb, void *cb_ident, + void *cb_priv) +{ + struct tcf_block_cb *block_cb; + + block_cb = kzalloc(sizeof(*block_cb), GFP_KERNEL); + if (!block_cb) + return NULL; + block_cb->cb = cb; + block_cb->cb_ident = cb_ident; + block_cb->cb_priv = cb_priv; + list_add(&block_cb->list, &block->cb_list); + return block_cb; +} +EXPORT_SYMBOL(__tcf_block_cb_register); + +int tcf_block_cb_register(struct tcf_block *block, + tc_setup_cb_t *cb, void *cb_ident, + void *cb_priv) +{ + struct tcf_block_cb *block_cb; + + block_cb = __tcf_block_cb_register(block, cb, cb_ident, cb_priv); + return block_cb ? 0 : -ENOMEM; +} +EXPORT_SYMBOL(tcf_block_cb_register); + +void __tcf_block_cb_unregister(struct tcf_block_cb *block_cb) +{ + list_del(&block_cb->list); + kfree(block_cb); +} +EXPORT_SYMBOL(__tcf_block_cb_unregister); + +void tcf_block_cb_unregister(struct tcf_block *block, + tc_setup_cb_t *cb, void *cb_ident) +{ + struct tcf_block_cb *block_cb; + + block_cb = tcf_block_cb_lookup(block, cb, cb_ident); + if (!block_cb) + return; + __tcf_block_cb_unregister(block_cb); +} +EXPORT_SYMBOL(tcf_block_cb_unregister); + +static int tcf_block_cb_call(struct tcf_block *block, enum tc_setup_type type, + void *type_data, bool err_stop) +{ + struct tcf_block_cb *block_cb; + int ok_count = 0; + int err; + + list_for_each_entry(block_cb, &block->cb_list, list) { + err = block_cb->cb(type, type_data, block_cb->cb_priv); + if (err) { + if (err_stop) + return err; + } else { + ok_count++; + } + } + return ok_count; +} + /* Main classifier routine: scans classifier chain attached * to this qdisc, (optionally) tests for protocol and asks * specific classifiers. @@ -362,11 +549,11 @@ static void tcf_chain_tp_insert(struct tcf_chain *chain, struct tcf_chain_info *chain_info, struct tcf_proto *tp) { - if (chain->p_filter_chain && - *chain_info->pprev == chain->filter_chain) - rcu_assign_pointer(*chain->p_filter_chain, tp); + if (*chain_info->pprev == chain->filter_chain) + tcf_chain_head_change(chain, tp); RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info)); rcu_assign_pointer(*chain_info->pprev, tp); + tcf_chain_hold(chain); } static void tcf_chain_tp_remove(struct tcf_chain *chain, @@ -375,9 +562,10 @@ static void tcf_chain_tp_remove(struct tcf_chain *chain, { struct tcf_proto *next = rtnl_dereference(chain_info->next); - if (chain->p_filter_chain && tp == chain->filter_chain) - RCU_INIT_POINTER(*chain->p_filter_chain, next); + if (tp == chain->filter_chain) + tcf_chain_head_change(chain, next); RCU_INIT_POINTER(*chain_info->pprev, next); + tcf_chain_put(chain); } static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, @@ -407,6 +595,112 @@ static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, return tp; } +static int tcf_fill_node(struct net *net, struct sk_buff *skb, + struct tcf_proto *tp, struct Qdisc *q, u32 parent, + void *fh, u32 portid, u32 seq, u16 flags, int event) +{ + struct tcmsg *tcm; + struct nlmsghdr *nlh; + unsigned char *b = skb_tail_pointer(skb); + + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); + if (!nlh) + goto out_nlmsg_trim; + tcm = nlmsg_data(nlh); + tcm->tcm_family = AF_UNSPEC; + tcm->tcm__pad1 = 0; + tcm->tcm__pad2 = 0; + tcm->tcm_ifindex = qdisc_dev(q)->ifindex; + tcm->tcm_parent = parent; + tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol); + if (nla_put_string(skb, TCA_KIND, tp->ops->kind)) + goto nla_put_failure; + if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index)) + goto nla_put_failure; + if (!fh) { + tcm->tcm_handle = 0; + } else { + if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0) + goto nla_put_failure; + } + nlh->nlmsg_len = skb_tail_pointer(skb) - b; + return skb->len; + +out_nlmsg_trim: +nla_put_failure: + nlmsg_trim(skb, b); + return -1; +} + +static int tfilter_notify(struct net *net, struct sk_buff *oskb, + struct nlmsghdr *n, struct tcf_proto *tp, + struct Qdisc *q, u32 parent, + void *fh, int event, bool unicast) +{ + struct sk_buff *skb; + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + if (tcf_fill_node(net, skb, tp, q, parent, fh, portid, n->nlmsg_seq, + n->nlmsg_flags, event) <= 0) { + kfree_skb(skb); + return -EINVAL; + } + + if (unicast) + return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); + + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); +} + +static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, + struct nlmsghdr *n, struct tcf_proto *tp, + struct Qdisc *q, u32 parent, + void *fh, bool unicast, bool *last) +{ + struct sk_buff *skb; + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + int err; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + if (tcf_fill_node(net, skb, tp, q, parent, fh, portid, n->nlmsg_seq, + n->nlmsg_flags, RTM_DELTFILTER) <= 0) { + kfree_skb(skb); + return -EINVAL; + } + + err = tp->ops->delete(tp, fh, last); + if (err) { + kfree_skb(skb); + return err; + } + + if (unicast) + return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); + + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); +} + +static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb, + struct Qdisc *q, u32 parent, + struct nlmsghdr *n, + struct tcf_chain *chain, int event) +{ + struct tcf_proto *tp; + + for (tp = rtnl_dereference(chain->filter_chain); + tp; tp = rtnl_dereference(tp->next)) + tfilter_notify(net, oskb, n, tp, q, parent, 0, event, false); +} + /* Add/change/delete/get a filter node */ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, @@ -428,7 +722,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, struct tcf_proto *tp; const struct Qdisc_class_ops *cops; unsigned long cl; - unsigned long fh; + void *fh; int err; int tp_created; @@ -498,7 +792,7 @@ replay: /* Do we search for filter, attached to class? */ if (TC_H_MIN(parent)) { - cl = cops->get(q, parent); + cl = cops->find(q, parent); if (cl == 0) return -ENOENT; } @@ -523,7 +817,8 @@ replay: } if (n->nlmsg_type == RTM_DELTFILTER && prio == 0) { - tfilter_notify_chain(net, skb, n, chain, RTM_DELTFILTER); + tfilter_notify_chain(net, skb, q, parent, n, + chain, RTM_DELTFILTER); tcf_chain_flush(chain); err = 0; goto errout; @@ -567,10 +862,10 @@ replay: fh = tp->ops->get(tp, t->tcm_handle); - if (fh == 0) { + if (!fh) { if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) { tcf_chain_tp_remove(chain, &chain_info, tp); - tfilter_notify(net, skb, n, tp, fh, + tfilter_notify(net, skb, n, tp, q, parent, fh, RTM_DELTFILTER, false); tcf_proto_destroy(tp); err = 0; @@ -595,18 +890,17 @@ replay: } break; case RTM_DELTFILTER: - err = tp->ops->delete(tp, fh, &last); + err = tfilter_del_notify(net, skb, n, tp, q, parent, + fh, false, &last); if (err) goto errout; - tfilter_notify(net, skb, n, tp, t->tcm_handle, - RTM_DELTFILTER, false); if (last) { tcf_chain_tp_remove(chain, &chain_info, tp); tcf_proto_destroy(tp); } goto errout; case RTM_GETTFILTER: - err = tfilter_notify(net, skb, n, tp, fh, + err = tfilter_notify(net, skb, n, tp, q, parent, fh, RTM_NEWTFILTER, true); goto errout; default: @@ -620,7 +914,8 @@ replay: if (err == 0) { if (tp_created) tcf_chain_tp_insert(chain, &chain_info, tp); - tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER, false); + tfilter_notify(net, skb, n, tp, q, parent, fh, + RTM_NEWTFILTER, false); } else { if (tp_created) tcf_proto_destroy(tp); @@ -629,94 +924,33 @@ replay: errout: if (chain) tcf_chain_put(chain); - if (cl) - cops->put(q, cl); if (err == -EAGAIN) /* Replay the request. */ goto replay; return err; } -static int tcf_fill_node(struct net *net, struct sk_buff *skb, - struct tcf_proto *tp, unsigned long fh, u32 portid, - u32 seq, u16 flags, int event) -{ - struct tcmsg *tcm; - struct nlmsghdr *nlh; - unsigned char *b = skb_tail_pointer(skb); - - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); - if (!nlh) - goto out_nlmsg_trim; - tcm = nlmsg_data(nlh); - tcm->tcm_family = AF_UNSPEC; - tcm->tcm__pad1 = 0; - tcm->tcm__pad2 = 0; - tcm->tcm_ifindex = qdisc_dev(tp->q)->ifindex; - tcm->tcm_parent = tp->classid; - tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol); - if (nla_put_string(skb, TCA_KIND, tp->ops->kind)) - goto nla_put_failure; - if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index)) - goto nla_put_failure; - tcm->tcm_handle = fh; - if (RTM_DELTFILTER != event) { - tcm->tcm_handle = 0; - if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0) - goto nla_put_failure; - } - nlh->nlmsg_len = skb_tail_pointer(skb) - b; - return skb->len; - -out_nlmsg_trim: -nla_put_failure: - nlmsg_trim(skb, b); - return -1; -} - -static int tfilter_notify(struct net *net, struct sk_buff *oskb, - struct nlmsghdr *n, struct tcf_proto *tp, - unsigned long fh, int event, bool unicast) -{ - struct sk_buff *skb; - u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; - - skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); - if (!skb) - return -ENOBUFS; - - if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq, - n->nlmsg_flags, event) <= 0) { - kfree_skb(skb); - return -EINVAL; - } - - if (unicast) - return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); - - return rtnetlink_send(skb, net, portid, RTNLGRP_TC, - n->nlmsg_flags & NLM_F_ECHO); -} - struct tcf_dump_args { struct tcf_walker w; struct sk_buff *skb; struct netlink_callback *cb; + struct Qdisc *q; + u32 parent; }; -static int tcf_node_dump(struct tcf_proto *tp, unsigned long n, - struct tcf_walker *arg) +static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg) { struct tcf_dump_args *a = (void *)arg; struct net *net = sock_net(a->skb->sk); - return tcf_fill_node(net, a->skb, tp, n, NETLINK_CB(a->cb->skb).portid, + return tcf_fill_node(net, a->skb, tp, a->q, a->parent, + n, NETLINK_CB(a->cb->skb).portid, a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER); } -static bool tcf_chain_dump(struct tcf_chain *chain, struct sk_buff *skb, - struct netlink_callback *cb, +static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent, + struct sk_buff *skb, struct netlink_callback *cb, long index_start, long *p_index) { struct net *net = sock_net(skb->sk); @@ -738,7 +972,7 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct sk_buff *skb, memset(&cb->args[1], 0, sizeof(cb->args) - sizeof(cb->args[0])); if (cb->args[1] == 0) { - if (tcf_fill_node(net, skb, tp, 0, + if (tcf_fill_node(net, skb, tp, q, parent, 0, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER) <= 0) @@ -751,6 +985,8 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct sk_buff *skb, arg.w.fn = tcf_node_dump; arg.skb = skb; arg.cb = cb; + arg.q = q; + arg.parent = parent; arg.w.stop = 0; arg.w.skip = cb->args[1] - 1; arg.w.count = 0; @@ -776,6 +1012,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) const struct Qdisc_class_ops *cops; long index_start; long index; + u32 parent; int err; if (nlmsg_len(cb->nlh) < sizeof(*tcm)) @@ -789,25 +1026,28 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) if (!dev) return skb->len; - if (!tcm->tcm_parent) + parent = tcm->tcm_parent; + if (!parent) { q = dev->qdisc; - else + parent = q->handle; + } else { q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); + } if (!q) goto out; cops = q->ops->cl_ops; if (!cops) - goto errout; + goto out; if (!cops->tcf_block) - goto errout; + goto out; if (TC_H_MIN(tcm->tcm_parent)) { - cl = cops->get(q, tcm->tcm_parent); + cl = cops->find(q, tcm->tcm_parent); if (cl == 0) - goto errout; + goto out; } block = cops->tcf_block(q, cl); if (!block) - goto errout; + goto out; index_start = cb->args[0]; index = 0; @@ -816,15 +1056,13 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) if (tca[TCA_CHAIN] && nla_get_u32(tca[TCA_CHAIN]) != chain->index) continue; - if (!tcf_chain_dump(chain, skb, cb, index_start, &index)) + if (!tcf_chain_dump(chain, q, parent, skb, cb, + index_start, &index)) break; } cb->args[0] = index; -errout: - if (cl) - cops->put(q, cl); out: return skb->len; } @@ -834,6 +1072,7 @@ void tcf_exts_destroy(struct tcf_exts *exts) #ifdef CONFIG_NET_CLS_ACT LIST_HEAD(actions); + ASSERT_RTNL(); tcf_exts_to_list(exts, &actions); tcf_action_destroy(&actions, TCA_ACT_UNBIND); kfree(exts->actions); @@ -872,6 +1111,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, exts->actions[i++] = act; exts->nr_actions = i; } + exts->net = net; } #else if ((exts->action && tb[exts->action]) || @@ -883,18 +1123,12 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, } EXPORT_SYMBOL(tcf_exts_validate); -void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst, - struct tcf_exts *src) +void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src) { #ifdef CONFIG_NET_CLS_ACT struct tcf_exts old = *dst; - tcf_tree_lock(tp); - dst->nr_actions = src->nr_actions; - dst->actions = src->actions; - dst->type = src->type; - tcf_tree_unlock(tp); - + *dst = *src; tcf_exts_destroy(&old); #endif } @@ -915,7 +1149,7 @@ int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts) #ifdef CONFIG_NET_CLS_ACT struct nlattr *nest; - if (exts->action && exts->nr_actions) { + if (exts->action && tcf_exts_has_actions(exts)) { /* * again for backward compatible mode - we want * to work with both old and new modes of entering @@ -965,36 +1199,67 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts) } EXPORT_SYMBOL(tcf_exts_dump_stats); -int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts, - struct net_device **hw_dev) +static int tc_exts_setup_cb_egdev_call(struct tcf_exts *exts, + enum tc_setup_type type, + void *type_data, bool err_stop) { + int ok_count = 0; #ifdef CONFIG_NET_CLS_ACT const struct tc_action *a; - LIST_HEAD(actions); + struct net_device *dev; + int i, ret; - if (tc_no_actions(exts)) - return -EINVAL; + if (!tcf_exts_has_actions(exts)) + return 0; - tcf_exts_to_list(exts, &actions); - list_for_each_entry(a, &actions, list) { - if (a->ops->get_dev) { - a->ops->get_dev(a, dev_net(dev), hw_dev); - break; - } + for (i = 0; i < exts->nr_actions; i++) { + a = exts->actions[i]; + if (!a->ops->get_dev) + continue; + dev = a->ops->get_dev(a); + if (!dev) + continue; + ret = tc_setup_cb_egdev_call(dev, type, type_data, err_stop); + if (ret < 0) + return ret; + ok_count += ret; } - if (*hw_dev) - return 0; #endif - return -EOPNOTSUPP; + return ok_count; +} + +int tc_setup_cb_call(struct tcf_block *block, struct tcf_exts *exts, + enum tc_setup_type type, void *type_data, bool err_stop) +{ + int ok_count; + int ret; + + ret = tcf_block_cb_call(block, type, type_data, err_stop); + if (ret < 0) + return ret; + ok_count = ret; + + if (!exts) + return ok_count; + ret = tc_exts_setup_cb_egdev_call(exts, type, type_data, err_stop); + if (ret < 0) + return ret; + ok_count += ret; + + return ok_count; } -EXPORT_SYMBOL(tcf_exts_get_dev); +EXPORT_SYMBOL(tc_setup_cb_call); static int __init tc_filter_init(void) { - rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, NULL); - rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, NULL); + tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0); + if (!tc_filter_wq) + return -ENOMEM; + + rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, 0); + rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, 0); rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter, - tc_dump_tfilter, NULL); + tc_dump_tfilter, 0); return 0; } diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index c4fd63a068f9..5f169ded347e 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -17,13 +17,14 @@ #include <linux/errno.h> #include <linux/rtnetlink.h> #include <linux/skbuff.h> +#include <linux/idr.h> #include <net/netlink.h> #include <net/act_api.h> #include <net/pkt_cls.h> struct basic_head { - u32 hgenerator; struct list_head flist; + struct idr handle_idr; struct rcu_head rcu; }; @@ -34,7 +35,10 @@ struct basic_filter { struct tcf_result res; struct tcf_proto *tp; struct list_head link; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp, @@ -56,20 +60,18 @@ static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp, return -1; } -static unsigned long basic_get(struct tcf_proto *tp, u32 handle) +static void *basic_get(struct tcf_proto *tp, u32 handle) { - unsigned long l = 0UL; struct basic_head *head = rtnl_dereference(tp->root); struct basic_filter *f; list_for_each_entry(f, &head->flist, link) { if (f->handle == handle) { - l = (unsigned long) f; - break; + return f; } } - return l; + return NULL; } static int basic_init(struct tcf_proto *tp) @@ -80,19 +82,36 @@ static int basic_init(struct tcf_proto *tp) if (head == NULL) return -ENOBUFS; INIT_LIST_HEAD(&head->flist); + idr_init(&head->handle_idr); rcu_assign_pointer(tp->root, head); return 0; } -static void basic_delete_filter(struct rcu_head *head) +static void __basic_delete_filter(struct basic_filter *f) { - struct basic_filter *f = container_of(head, struct basic_filter, rcu); - tcf_exts_destroy(&f->exts); tcf_em_tree_destroy(&f->ematches); + tcf_exts_put_net(&f->exts); kfree(f); } +static void basic_delete_filter_work(struct work_struct *work) +{ + struct basic_filter *f = container_of(work, struct basic_filter, work); + + rtnl_lock(); + __basic_delete_filter(f); + rtnl_unlock(); +} + +static void basic_delete_filter(struct rcu_head *head) +{ + struct basic_filter *f = container_of(head, struct basic_filter, rcu); + + INIT_WORK(&f->work, basic_delete_filter_work); + tcf_queue_work(&f->work); +} + static void basic_destroy(struct tcf_proto *tp) { struct basic_head *head = rtnl_dereference(tp->root); @@ -101,18 +120,25 @@ static void basic_destroy(struct tcf_proto *tp) list_for_each_entry_safe(f, n, &head->flist, link) { list_del_rcu(&f->link); tcf_unbind_filter(tp, &f->res); - call_rcu(&f->rcu, basic_delete_filter); + idr_remove_ext(&head->handle_idr, f->handle); + if (tcf_exts_get_net(&f->exts)) + call_rcu(&f->rcu, basic_delete_filter); + else + __basic_delete_filter(f); } + idr_destroy(&head->handle_idr); kfree_rcu(head, rcu); } -static int basic_delete(struct tcf_proto *tp, unsigned long arg, bool *last) +static int basic_delete(struct tcf_proto *tp, void *arg, bool *last) { struct basic_head *head = rtnl_dereference(tp->root); - struct basic_filter *f = (struct basic_filter *) arg; + struct basic_filter *f = arg; list_del_rcu(&f->link); tcf_unbind_filter(tp, &f->res); + idr_remove_ext(&head->handle_idr, f->handle); + tcf_exts_get_net(&f->exts); call_rcu(&f->rcu, basic_delete_filter); *last = list_empty(&head->flist); return 0; @@ -129,44 +155,34 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp, struct nlattr *est, bool ovr) { int err; - struct tcf_exts e; - struct tcf_ematch_tree t; - err = tcf_exts_init(&e, TCA_BASIC_ACT, TCA_BASIC_POLICE); + err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr); if (err < 0) return err; - err = tcf_exts_validate(net, tp, tb, est, &e, ovr); - if (err < 0) - goto errout; - err = tcf_em_tree_validate(tp, tb[TCA_BASIC_EMATCHES], &t); + err = tcf_em_tree_validate(tp, tb[TCA_BASIC_EMATCHES], &f->ematches); if (err < 0) - goto errout; + return err; if (tb[TCA_BASIC_CLASSID]) { f->res.classid = nla_get_u32(tb[TCA_BASIC_CLASSID]); tcf_bind_filter(tp, &f->res, base); } - tcf_exts_change(tp, &f->exts, &e); - tcf_em_tree_change(tp, &f->ematches, &t); f->tp = tp; - return 0; -errout: - tcf_exts_destroy(&e); - return err; } static int basic_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, unsigned long *arg, bool ovr) + struct nlattr **tca, void **arg, bool ovr) { int err; struct basic_head *head = rtnl_dereference(tp->root); struct nlattr *tb[TCA_BASIC_MAX + 1]; struct basic_filter *fold = (struct basic_filter *) *arg; struct basic_filter *fnew; + unsigned long idr_index; if (tca[TCA_OPTIONS] == NULL) return -EINVAL; @@ -189,35 +205,36 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, if (err < 0) goto errout; - err = -EINVAL; if (handle) { fnew->handle = handle; - } else if (fold) { - fnew->handle = fold->handle; + if (!fold) { + err = idr_alloc_ext(&head->handle_idr, fnew, &idr_index, + handle, handle + 1, GFP_KERNEL); + if (err) + goto errout; + } } else { - unsigned int i = 0x80000000; - do { - if (++head->hgenerator == 0x7FFFFFFF) - head->hgenerator = 1; - } while (--i > 0 && basic_get(tp, head->hgenerator)); - - if (i <= 0) { - pr_err("Insufficient number of handles\n"); + err = idr_alloc_ext(&head->handle_idr, fnew, &idr_index, + 1, 0x7FFFFFFF, GFP_KERNEL); + if (err) goto errout; - } - - fnew->handle = head->hgenerator; + fnew->handle = idr_index; } err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr); - if (err < 0) + if (err < 0) { + if (!fold) + idr_remove_ext(&head->handle_idr, fnew->handle); goto errout; + } - *arg = (unsigned long)fnew; + *arg = fnew; if (fold) { + idr_replace_ext(&head->handle_idr, fnew, fnew->handle); list_replace_rcu(&fold->link, &fnew->link); tcf_unbind_filter(tp, &fold->res); + tcf_exts_get_net(&fold->exts); call_rcu(&fold->rcu, basic_delete_filter); } else { list_add_rcu(&fnew->link, &head->flist); @@ -239,7 +256,7 @@ static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg) if (arg->count < arg->skip) goto skip; - if (arg->fn(tp, (unsigned long) f, arg) < 0) { + if (arg->fn(tp, f, arg) < 0) { arg->stop = 1; break; } @@ -248,10 +265,18 @@ skip: } } -static int basic_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, +static void basic_bind_class(void *fh, u32 classid, unsigned long cl) +{ + struct basic_filter *f = fh; + + if (f && f->res.classid == classid) + f->res.class = cl; +} + +static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t) { - struct basic_filter *f = (struct basic_filter *) fh; + struct basic_filter *f = fh; struct nlattr *nest; if (f == NULL) @@ -293,6 +318,7 @@ static struct tcf_proto_ops cls_basic_ops __read_mostly = { .delete = basic_delete, .walk = basic_walk, .dump = basic_dump, + .bind_class = basic_bind_class, .owner = THIS_MODULE, }; diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index f57bd531ba98..a9f3e317055c 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -17,6 +17,7 @@ #include <linux/skbuff.h> #include <linux/filter.h> #include <linux/bpf.h> +#include <linux/idr.h> #include <net/rtnetlink.h> #include <net/pkt_cls.h> @@ -32,7 +33,7 @@ MODULE_DESCRIPTION("TC BPF based classifier"); struct cls_bpf_head { struct list_head plist; - u32 hgen; + struct idr handle_idr; struct rcu_head rcu; }; @@ -49,7 +50,10 @@ struct cls_bpf_prog { struct sock_filter *bpf_ops; const char *bpf_name; struct tcf_proto *tp; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = { @@ -99,11 +103,11 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, } else if (at_ingress) { /* It is safe to push/pull even if skb_shared() */ __skb_push(skb, skb->mac_len); - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); filter_res = BPF_PROG_RUN(prog->filter, skb); __skb_pull(skb, skb->mac_len); } else { - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); filter_res = BPF_PROG_RUN(prog->filter, skb); } @@ -146,35 +150,39 @@ static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog) static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, enum tc_clsbpf_command cmd) { - struct net_device *dev = tp->q->dev_queue->dev; - struct tc_cls_bpf_offload bpf_offload = {}; - struct tc_to_netdev offload; + bool addorrep = cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE; + struct tcf_block *block = tp->chain->block; + bool skip_sw = tc_skip_sw(prog->gen_flags); + struct tc_cls_bpf_offload cls_bpf = {}; int err; - offload.type = TC_SETUP_CLSBPF; - offload.cls_bpf = &bpf_offload; - - bpf_offload.command = cmd; - bpf_offload.exts = &prog->exts; - bpf_offload.prog = prog->filter; - bpf_offload.name = prog->bpf_name; - bpf_offload.exts_integrated = prog->exts_integrated; - bpf_offload.gen_flags = prog->gen_flags; - - err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->chain->index, - tp->protocol, &offload); + tc_cls_common_offload_init(&cls_bpf.common, tp); + cls_bpf.command = cmd; + cls_bpf.exts = &prog->exts; + cls_bpf.prog = prog->filter; + cls_bpf.name = prog->bpf_name; + cls_bpf.exts_integrated = prog->exts_integrated; + cls_bpf.gen_flags = prog->gen_flags; + + err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw); + if (addorrep) { + if (err < 0) { + cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY); + return err; + } else if (err > 0) { + prog->gen_flags |= TCA_CLS_FLAGS_IN_HW; + } + } - if (!err && (cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE)) - prog->gen_flags |= TCA_CLS_FLAGS_IN_HW; + if (addorrep && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW)) + return -EINVAL; - return err; + return 0; } static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, struct cls_bpf_prog *oldprog) { - struct net_device *dev = tp->q->dev_queue->dev; struct cls_bpf_prog *obj = prog; enum tc_clsbpf_command cmd; bool skip_sw; @@ -184,7 +192,7 @@ static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, (oldprog && tc_skip_sw(oldprog->gen_flags)); if (oldprog && oldprog->offloaded) { - if (tc_should_offload(dev, tp, prog->gen_flags)) { + if (!tc_skip_hw(prog->gen_flags)) { cmd = TC_CLSBPF_REPLACE; } else if (!tc_skip_sw(prog->gen_flags)) { obj = oldprog; @@ -193,14 +201,14 @@ static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, return -EINVAL; } } else { - if (!tc_should_offload(dev, tp, prog->gen_flags)) + if (tc_skip_hw(prog->gen_flags)) return skip_sw ? -EINVAL : 0; cmd = TC_CLSBPF_ADD; } ret = cls_bpf_offload_cmd(tp, obj, cmd); if (ret) - return skip_sw ? ret : 0; + return ret; obj->offloaded = true; if (oldprog) @@ -244,6 +252,7 @@ static int cls_bpf_init(struct tcf_proto *tp) return -ENOBUFS; INIT_LIST_HEAD_RCU(&head->plist); + idr_init(&head->handle_idr); rcu_assign_pointer(tp->root, head); return 0; @@ -252,6 +261,7 @@ static int cls_bpf_init(struct tcf_proto *tp) static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog) { tcf_exts_destroy(&prog->exts); + tcf_exts_put_net(&prog->exts); if (cls_bpf_is_ebpf(prog)) bpf_prog_put(prog->filter); @@ -263,24 +273,42 @@ static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog) kfree(prog); } +static void cls_bpf_delete_prog_work(struct work_struct *work) +{ + struct cls_bpf_prog *prog = container_of(work, struct cls_bpf_prog, work); + + rtnl_lock(); + __cls_bpf_delete_prog(prog); + rtnl_unlock(); +} + static void cls_bpf_delete_prog_rcu(struct rcu_head *rcu) { - __cls_bpf_delete_prog(container_of(rcu, struct cls_bpf_prog, rcu)); + struct cls_bpf_prog *prog = container_of(rcu, struct cls_bpf_prog, rcu); + + INIT_WORK(&prog->work, cls_bpf_delete_prog_work); + tcf_queue_work(&prog->work); } static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog) { + struct cls_bpf_head *head = rtnl_dereference(tp->root); + + idr_remove_ext(&head->handle_idr, prog->handle); cls_bpf_stop_offload(tp, prog); list_del_rcu(&prog->link); tcf_unbind_filter(tp, &prog->res); - call_rcu(&prog->rcu, cls_bpf_delete_prog_rcu); + if (tcf_exts_get_net(&prog->exts)) + call_rcu(&prog->rcu, cls_bpf_delete_prog_rcu); + else + __cls_bpf_delete_prog(prog); } -static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg, bool *last) +static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last) { struct cls_bpf_head *head = rtnl_dereference(tp->root); - __cls_bpf_delete(tp, (struct cls_bpf_prog *) arg); + __cls_bpf_delete(tp, arg); *last = list_empty(&head->plist); return 0; } @@ -293,23 +321,21 @@ static void cls_bpf_destroy(struct tcf_proto *tp) list_for_each_entry_safe(prog, tmp, &head->plist, link) __cls_bpf_delete(tp, prog); + idr_destroy(&head->handle_idr); kfree_rcu(head, rcu); } -static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle) +static void *cls_bpf_get(struct tcf_proto *tp, u32 handle) { struct cls_bpf_head *head = rtnl_dereference(tp->root); struct cls_bpf_prog *prog; - unsigned long ret = 0UL; list_for_each_entry(prog, &head->plist, link) { - if (prog->handle == handle) { - ret = (unsigned long) prog; - break; - } + if (prog->handle == handle) + return prog; } - return ret; + return NULL; } static int cls_bpf_prog_from_ops(struct nlattr **tb, struct cls_bpf_prog *prog) @@ -352,15 +378,17 @@ static int cls_bpf_prog_from_ops(struct nlattr **tb, struct cls_bpf_prog *prog) } static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog, - const struct tcf_proto *tp) + u32 gen_flags, const struct tcf_proto *tp) { struct bpf_prog *fp; char *name = NULL; + bool skip_sw; u32 bpf_fd; bpf_fd = nla_get_u32(tb[TCA_BPF_FD]); + skip_sw = gen_flags & TCA_CLS_FLAGS_SKIP_SW; - fp = bpf_prog_get_type(bpf_fd, BPF_PROG_TYPE_SCHED_CLS); + fp = bpf_prog_get_type_dev(bpf_fd, BPF_PROG_TYPE_SCHED_CLS, skip_sw); if (IS_ERR(fp)) return PTR_ERR(fp); @@ -382,13 +410,11 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog, return 0; } -static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, - struct cls_bpf_prog *prog, - unsigned long base, struct nlattr **tb, - struct nlattr *est, bool ovr) +static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp, + struct cls_bpf_prog *prog, unsigned long base, + struct nlattr **tb, struct nlattr *est, bool ovr) { bool is_bpf, is_ebpf, have_exts = false; - struct tcf_exts exts; u32 gen_flags = 0; int ret; @@ -397,83 +423,51 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf)) return -EINVAL; - ret = tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE); + ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr); if (ret < 0) return ret; - ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr); - if (ret < 0) - goto errout; if (tb[TCA_BPF_FLAGS]) { u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]); - if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT) { - ret = -EINVAL; - goto errout; - } + if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT) + return -EINVAL; have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT; } if (tb[TCA_BPF_FLAGS_GEN]) { gen_flags = nla_get_u32(tb[TCA_BPF_FLAGS_GEN]); if (gen_flags & ~CLS_BPF_SUPPORTED_GEN_FLAGS || - !tc_flags_valid(gen_flags)) { - ret = -EINVAL; - goto errout; - } + !tc_flags_valid(gen_flags)) + return -EINVAL; } prog->exts_integrated = have_exts; prog->gen_flags = gen_flags; ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) : - cls_bpf_prog_from_efd(tb, prog, tp); + cls_bpf_prog_from_efd(tb, prog, gen_flags, tp); if (ret < 0) - goto errout; + return ret; if (tb[TCA_BPF_CLASSID]) { prog->res.classid = nla_get_u32(tb[TCA_BPF_CLASSID]); tcf_bind_filter(tp, &prog->res, base); } - tcf_exts_change(tp, &prog->exts, &exts); return 0; - -errout: - tcf_exts_destroy(&exts); - return ret; -} - -static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp, - struct cls_bpf_head *head) -{ - unsigned int i = 0x80000000; - u32 handle; - - do { - if (++head->hgen == 0x7FFFFFFF) - head->hgen = 1; - } while (--i > 0 && cls_bpf_get(tp, head->hgen)); - - if (unlikely(i == 0)) { - pr_err("Insufficient number of handles\n"); - handle = 0; - } else { - handle = head->hgen; - } - - return handle; } static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - unsigned long *arg, bool ovr) + void **arg, bool ovr) { struct cls_bpf_head *head = rtnl_dereference(tp->root); - struct cls_bpf_prog *oldprog = (struct cls_bpf_prog *) *arg; + struct cls_bpf_prog *oldprog = *arg; struct nlattr *tb[TCA_BPF_MAX + 1]; struct cls_bpf_prog *prog; + unsigned long idr_index; int ret; if (tca[TCA_OPTIONS] == NULL) @@ -499,22 +493,30 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, } } - if (handle == 0) - prog->handle = cls_bpf_grab_new_handle(tp, head); - else + if (handle == 0) { + ret = idr_alloc_ext(&head->handle_idr, prog, &idr_index, + 1, 0x7FFFFFFF, GFP_KERNEL); + if (ret) + goto errout; + prog->handle = idr_index; + } else { + if (!oldprog) { + ret = idr_alloc_ext(&head->handle_idr, prog, &idr_index, + handle, handle + 1, GFP_KERNEL); + if (ret) + goto errout; + } prog->handle = handle; - if (prog->handle == 0) { - ret = -EINVAL; - goto errout; } - ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE], - ovr); + ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], ovr); if (ret < 0) - goto errout; + goto errout_idr; ret = cls_bpf_offload(tp, prog, oldprog); if (ret) { + if (!oldprog) + idr_remove_ext(&head->handle_idr, prog->handle); __cls_bpf_delete_prog(prog); return ret; } @@ -523,16 +525,21 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, prog->gen_flags |= TCA_CLS_FLAGS_NOT_IN_HW; if (oldprog) { + idr_replace_ext(&head->handle_idr, prog, handle); list_replace_rcu(&oldprog->link, &prog->link); tcf_unbind_filter(tp, &oldprog->res); + tcf_exts_get_net(&oldprog->exts); call_rcu(&oldprog->rcu, cls_bpf_delete_prog_rcu); } else { list_add_rcu(&prog->link, &head->plist); } - *arg = (unsigned long) prog; + *arg = prog; return 0; +errout_idr: + if (!oldprog) + idr_remove_ext(&head->handle_idr, prog->handle); errout: tcf_exts_destroy(&prog->exts); kfree(prog); @@ -578,10 +585,10 @@ static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog *prog, return 0; } -static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, +static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *tm) { - struct cls_bpf_prog *prog = (struct cls_bpf_prog *) fh; + struct cls_bpf_prog *prog = fh; struct nlattr *nest; u32 bpf_flags = 0; int ret; @@ -631,6 +638,14 @@ nla_put_failure: return -1; } +static void cls_bpf_bind_class(void *fh, u32 classid, unsigned long cl) +{ + struct cls_bpf_prog *prog = fh; + + if (prog && prog->res.classid == classid) + prog->res.class = cl; +} + static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg) { struct cls_bpf_head *head = rtnl_dereference(tp->root); @@ -639,7 +654,7 @@ static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg) list_for_each_entry(prog, &head->plist, link) { if (arg->count < arg->skip) goto skip; - if (arg->fn(tp, (unsigned long) prog, arg) < 0) { + if (arg->fn(tp, prog, arg) < 0) { arg->stop = 1; break; } @@ -659,6 +674,7 @@ static struct tcf_proto_ops cls_bpf_ops __read_mostly = { .delete = cls_bpf_delete, .walk = cls_bpf_walk, .dump = cls_bpf_dump, + .bind_class = cls_bpf_bind_class, }; static int __init cls_bpf_init_mod(void) diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 12ce547eea04..309d5899265f 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -23,7 +23,10 @@ struct cls_cgroup_head { struct tcf_exts exts; struct tcf_ematch_tree ematches; struct tcf_proto *tp; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; static int cls_cgroup_classify(struct sk_buff *skb, const struct tcf_proto *tp, @@ -43,9 +46,9 @@ static int cls_cgroup_classify(struct sk_buff *skb, const struct tcf_proto *tp, return tcf_exts_exec(skb, &head->exts, res); } -static unsigned long cls_cgroup_get(struct tcf_proto *tp, u32 handle) +static void *cls_cgroup_get(struct tcf_proto *tp, u32 handle) { - return 0UL; + return NULL; } static int cls_cgroup_init(struct tcf_proto *tp) @@ -57,27 +60,42 @@ static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = { [TCA_CGROUP_EMATCHES] = { .type = NLA_NESTED }, }; +static void __cls_cgroup_destroy(struct cls_cgroup_head *head) +{ + tcf_exts_destroy(&head->exts); + tcf_em_tree_destroy(&head->ematches); + tcf_exts_put_net(&head->exts); + kfree(head); +} + +static void cls_cgroup_destroy_work(struct work_struct *work) +{ + struct cls_cgroup_head *head = container_of(work, + struct cls_cgroup_head, + work); + rtnl_lock(); + __cls_cgroup_destroy(head); + rtnl_unlock(); +} + static void cls_cgroup_destroy_rcu(struct rcu_head *root) { struct cls_cgroup_head *head = container_of(root, struct cls_cgroup_head, rcu); - tcf_exts_destroy(&head->exts); - tcf_em_tree_destroy(&head->ematches); - kfree(head); + INIT_WORK(&head->work, cls_cgroup_destroy_work); + tcf_queue_work(&head->work); } static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - unsigned long *arg, bool ovr) + void **arg, bool ovr) { struct nlattr *tb[TCA_CGROUP_MAX + 1]; struct cls_cgroup_head *head = rtnl_dereference(tp->root); struct cls_cgroup_head *new; - struct tcf_ematch_tree t; - struct tcf_exts e; int err; if (!tca[TCA_OPTIONS]) @@ -103,27 +121,19 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, if (err < 0) goto errout; - err = tcf_exts_init(&e, TCA_CGROUP_ACT, TCA_CGROUP_POLICE); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, ovr); if (err < 0) goto errout; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); - if (err < 0) { - tcf_exts_destroy(&e); - goto errout; - } - err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &t); - if (err < 0) { - tcf_exts_destroy(&e); + err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &new->ematches); + if (err < 0) goto errout; - } - - tcf_exts_change(tp, &new->exts, &e); - tcf_em_tree_change(tp, &new->ematches, &t); rcu_assign_pointer(tp->root, new); - if (head) + if (head) { + tcf_exts_get_net(&head->exts); call_rcu(&head->rcu, cls_cgroup_destroy_rcu); + } return 0; errout: tcf_exts_destroy(&new->exts); @@ -136,11 +146,15 @@ static void cls_cgroup_destroy(struct tcf_proto *tp) struct cls_cgroup_head *head = rtnl_dereference(tp->root); /* Head can still be NULL due to cls_cgroup_init(). */ - if (head) - call_rcu(&head->rcu, cls_cgroup_destroy_rcu); + if (head) { + if (tcf_exts_get_net(&head->exts)) + call_rcu(&head->rcu, cls_cgroup_destroy_rcu); + else + __cls_cgroup_destroy(head); + } } -static int cls_cgroup_delete(struct tcf_proto *tp, unsigned long arg, bool *last) +static int cls_cgroup_delete(struct tcf_proto *tp, void *arg, bool *last) { return -EOPNOTSUPP; } @@ -152,7 +166,7 @@ static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg) if (arg->count < arg->skip) goto skip; - if (arg->fn(tp, (unsigned long) head, arg) < 0) { + if (arg->fn(tp, head, arg) < 0) { arg->stop = 1; return; } @@ -160,7 +174,7 @@ skip: arg->count++; } -static int cls_cgroup_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, +static int cls_cgroup_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t) { struct cls_cgroup_head *head = rtnl_dereference(tp->root); diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 3065752b9cda..25c2a888e1f0 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -57,7 +57,10 @@ struct flow_filter { u32 divisor; u32 baseclass; u32 hashrnd; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; static inline u32 addr_fold(void *addr) @@ -345,9 +348,9 @@ static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp, return -1; } -static void flow_perturbation(unsigned long arg) +static void flow_perturbation(struct timer_list *t) { - struct flow_filter *f = (struct flow_filter *)arg; + struct flow_filter *f = from_timer(f, t, perturb_timer); get_random_bytes(&f->hashrnd, 4); if (f->perturb_period) @@ -369,27 +372,41 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = { [TCA_FLOW_PERTURB] = { .type = NLA_U32 }, }; -static void flow_destroy_filter(struct rcu_head *head) +static void __flow_destroy_filter(struct flow_filter *f) { - struct flow_filter *f = container_of(head, struct flow_filter, rcu); - del_timer_sync(&f->perturb_timer); tcf_exts_destroy(&f->exts); tcf_em_tree_destroy(&f->ematches); + tcf_exts_put_net(&f->exts); kfree(f); } +static void flow_destroy_filter_work(struct work_struct *work) +{ + struct flow_filter *f = container_of(work, struct flow_filter, work); + + rtnl_lock(); + __flow_destroy_filter(f); + rtnl_unlock(); +} + +static void flow_destroy_filter(struct rcu_head *head) +{ + struct flow_filter *f = container_of(head, struct flow_filter, rcu); + + INIT_WORK(&f->work, flow_destroy_filter_work); + tcf_queue_work(&f->work); +} + static int flow_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - unsigned long *arg, bool ovr) + void **arg, bool ovr) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *fold, *fnew; struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_FLOW_MAX + 1]; - struct tcf_exts e; - struct tcf_ematch_tree t; unsigned int nkeys = 0; unsigned int perturb_period = 0; u32 baseclass = 0; @@ -425,31 +442,27 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, return -EOPNOTSUPP; } - err = tcf_exts_init(&e, TCA_FLOW_ACT, TCA_FLOW_POLICE); - if (err < 0) - goto err1; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); - if (err < 0) - goto err1; + fnew = kzalloc(sizeof(*fnew), GFP_KERNEL); + if (!fnew) + return -ENOBUFS; - err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &t); + err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &fnew->ematches); if (err < 0) goto err1; - err = -ENOBUFS; - fnew = kzalloc(sizeof(*fnew), GFP_KERNEL); - if (!fnew) + err = tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE); + if (err < 0) goto err2; - err = tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, ovr); if (err < 0) - goto err3; + goto err2; - fold = (struct flow_filter *)*arg; + fold = *arg; if (fold) { err = -EINVAL; if (fold->handle != handle && handle) - goto err3; + goto err2; /* Copy fold into fnew */ fnew->tp = fold->tp; @@ -469,36 +482,39 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, if (tb[TCA_FLOW_MODE]) mode = nla_get_u32(tb[TCA_FLOW_MODE]); if (mode != FLOW_MODE_HASH && nkeys > 1) - goto err3; + goto err2; if (mode == FLOW_MODE_HASH) perturb_period = fold->perturb_period; if (tb[TCA_FLOW_PERTURB]) { if (mode != FLOW_MODE_HASH) - goto err3; + goto err2; perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ; } } else { err = -EINVAL; if (!handle) - goto err3; + goto err2; if (!tb[TCA_FLOW_KEYS]) - goto err3; + goto err2; mode = FLOW_MODE_MAP; if (tb[TCA_FLOW_MODE]) mode = nla_get_u32(tb[TCA_FLOW_MODE]); if (mode != FLOW_MODE_HASH && nkeys > 1) - goto err3; + goto err2; if (tb[TCA_FLOW_PERTURB]) { if (mode != FLOW_MODE_HASH) - goto err3; + goto err2; perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ; } - if (TC_H_MAJ(baseclass) == 0) - baseclass = TC_H_MAKE(tp->q->handle, baseclass); + if (TC_H_MAJ(baseclass) == 0) { + struct Qdisc *q = tcf_block_q(tp->chain->block); + + baseclass = TC_H_MAKE(q->handle, baseclass); + } if (TC_H_MIN(baseclass) == 0) baseclass = TC_H_MAKE(baseclass, 1); @@ -508,11 +524,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, get_random_bytes(&fnew->hashrnd, 4); } - setup_deferrable_timer(&fnew->perturb_timer, flow_perturbation, - (unsigned long)fnew); - - tcf_exts_change(tp, &fnew->exts, &e); - tcf_em_tree_change(tp, &fnew->ematches, &t); + timer_setup(&fnew->perturb_timer, flow_perturbation, TIMER_DEFERRABLE); netif_keep_dst(qdisc_dev(tp->q)); @@ -541,33 +553,34 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, if (perturb_period) mod_timer(&fnew->perturb_timer, jiffies + perturb_period); - if (*arg == 0) + if (!*arg) list_add_tail_rcu(&fnew->list, &head->filters); else list_replace_rcu(&fold->list, &fnew->list); - *arg = (unsigned long)fnew; + *arg = fnew; - if (fold) + if (fold) { + tcf_exts_get_net(&fold->exts); call_rcu(&fold->rcu, flow_destroy_filter); + } return 0; -err3: - tcf_exts_destroy(&fnew->exts); err2: - tcf_em_tree_destroy(&t); - kfree(fnew); + tcf_exts_destroy(&fnew->exts); + tcf_em_tree_destroy(&fnew->ematches); err1: - tcf_exts_destroy(&e); + kfree(fnew); return err; } -static int flow_delete(struct tcf_proto *tp, unsigned long arg, bool *last) +static int flow_delete(struct tcf_proto *tp, void *arg, bool *last) { struct flow_head *head = rtnl_dereference(tp->root); - struct flow_filter *f = (struct flow_filter *)arg; + struct flow_filter *f = arg; list_del_rcu(&f->list); + tcf_exts_get_net(&f->exts); call_rcu(&f->rcu, flow_destroy_filter); *last = list_empty(&head->filters); return 0; @@ -592,26 +605,29 @@ static void flow_destroy(struct tcf_proto *tp) list_for_each_entry_safe(f, next, &head->filters, list) { list_del_rcu(&f->list); - call_rcu(&f->rcu, flow_destroy_filter); + if (tcf_exts_get_net(&f->exts)) + call_rcu(&f->rcu, flow_destroy_filter); + else + __flow_destroy_filter(f); } kfree_rcu(head, rcu); } -static unsigned long flow_get(struct tcf_proto *tp, u32 handle) +static void *flow_get(struct tcf_proto *tp, u32 handle) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *f; list_for_each_entry(f, &head->filters, list) if (f->handle == handle) - return (unsigned long)f; - return 0; + return f; + return NULL; } -static int flow_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, +static int flow_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t) { - struct flow_filter *f = (struct flow_filter *)fh; + struct flow_filter *f = fh; struct nlattr *nest; if (f == NULL) @@ -677,7 +693,7 @@ static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg) list_for_each_entry(f, &head->filters, list) { if (arg->count < arg->skip) goto skip; - if (arg->fn(tp, (unsigned long)f, arg) < 0) { + if (arg->fn(tp, f, arg) < 0) { arg->stop = 1; break; } diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 7832eb93379b..543a3e875d05 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -68,7 +68,6 @@ struct cls_fl_head { struct rhashtable ht; struct fl_flow_mask mask; struct flow_dissector dissector; - u32 hgen; bool mask_assigned; struct list_head filters; struct rhashtable_params ht_params; @@ -76,6 +75,7 @@ struct cls_fl_head { struct work_struct work; struct rcu_head rcu; }; + struct idr handle_idr; }; struct cls_fl_filter { @@ -87,8 +87,10 @@ struct cls_fl_filter { struct list_head list; u32 handle; u32 flags; - struct rcu_head rcu; - struct tc_to_netdev tc; + union { + struct work_struct work; + struct rcu_head rcu; + }; struct net_device *hw_dev; }; @@ -153,37 +155,12 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct cls_fl_filter *f; struct fl_flow_key skb_key; struct fl_flow_key skb_mkey; - struct ip_tunnel_info *info; if (!atomic_read(&head->ht.nelems)) return -1; fl_clear_masked_range(&skb_key, &head->mask); - info = skb_tunnel_info(skb); - if (info) { - struct ip_tunnel_key *key = &info->key; - - switch (ip_tunnel_info_af(info)) { - case AF_INET: - skb_key.enc_control.addr_type = - FLOW_DISSECTOR_KEY_IPV4_ADDRS; - skb_key.enc_ipv4.src = key->u.ipv4.src; - skb_key.enc_ipv4.dst = key->u.ipv4.dst; - break; - case AF_INET6: - skb_key.enc_control.addr_type = - FLOW_DISSECTOR_KEY_IPV6_ADDRS; - skb_key.enc_ipv6.src = key->u.ipv6.src; - skb_key.enc_ipv6.dst = key->u.ipv6.dst; - break; - } - - skb_key.enc_key_id.keyid = tunnel_id_to_key32(key->tun_id); - skb_key.enc_tp.src = key->tp_src; - skb_key.enc_tp.dst = key->tp_dst; - } - skb_key.indev_ifindex = skb->skb_iif; /* skb_flow_dissect() does not set n_proto in case an unknown protocol, * so do it rather here. @@ -211,36 +188,46 @@ static int fl_init(struct tcf_proto *tp) INIT_LIST_HEAD_RCU(&head->filters); rcu_assign_pointer(tp->root, head); + idr_init(&head->handle_idr); return 0; } -static void fl_destroy_filter(struct rcu_head *head) +static void __fl_destroy_filter(struct cls_fl_filter *f) { - struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu); - tcf_exts_destroy(&f->exts); + tcf_exts_put_net(&f->exts); kfree(f); } -static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f) +static void fl_destroy_filter_work(struct work_struct *work) { - struct tc_cls_flower_offload offload = {0}; - struct net_device *dev = f->hw_dev; - struct tc_to_netdev *tc = &f->tc; + struct cls_fl_filter *f = container_of(work, struct cls_fl_filter, work); - if (!tc_can_offload(dev, tp)) - return; + rtnl_lock(); + __fl_destroy_filter(f); + rtnl_unlock(); +} - offload.command = TC_CLSFLOWER_DESTROY; - offload.prio = tp->prio; - offload.cookie = (unsigned long)f; +static void fl_destroy_filter(struct rcu_head *head) +{ + struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu); + + INIT_WORK(&f->work, fl_destroy_filter_work); + tcf_queue_work(&f->work); +} + +static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f) +{ + struct tc_cls_flower_offload cls_flower = {}; + struct tcf_block *block = tp->chain->block; - tc->type = TC_SETUP_CLSFLOWER; - tc->cls_flower = &offload; + tc_cls_common_offload_init(&cls_flower.common, tp); + cls_flower.command = TC_CLSFLOWER_DESTROY; + cls_flower.cookie = (unsigned long) f; - dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->chain->index, - tp->protocol, tc); + tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER, + &cls_flower, false); } static int fl_hw_replace_filter(struct tcf_proto *tp, @@ -248,72 +235,63 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, struct fl_flow_key *mask, struct cls_fl_filter *f) { - struct net_device *dev = tp->q->dev_queue->dev; - struct tc_cls_flower_offload offload = {0}; - struct tc_to_netdev *tc = &f->tc; + struct tc_cls_flower_offload cls_flower = {}; + struct tcf_block *block = tp->chain->block; + bool skip_sw = tc_skip_sw(f->flags); int err; - if (!tc_can_offload(dev, tp)) { - if (tcf_exts_get_dev(dev, &f->exts, &f->hw_dev) || - (f->hw_dev && !tc_can_offload(f->hw_dev, tp))) { - f->hw_dev = dev; - return tc_skip_sw(f->flags) ? -EINVAL : 0; - } - dev = f->hw_dev; - tc->egress_dev = true; - } else { - f->hw_dev = dev; + tc_cls_common_offload_init(&cls_flower.common, tp); + cls_flower.command = TC_CLSFLOWER_REPLACE; + cls_flower.cookie = (unsigned long) f; + cls_flower.dissector = dissector; + cls_flower.mask = mask; + cls_flower.key = &f->mkey; + cls_flower.exts = &f->exts; + cls_flower.classid = f->res.classid; + + err = tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER, + &cls_flower, skip_sw); + if (err < 0) { + fl_hw_destroy_filter(tp, f); + return err; + } else if (err > 0) { + f->flags |= TCA_CLS_FLAGS_IN_HW; } - offload.command = TC_CLSFLOWER_REPLACE; - offload.prio = tp->prio; - offload.cookie = (unsigned long)f; - offload.dissector = dissector; - offload.mask = mask; - offload.key = &f->mkey; - offload.exts = &f->exts; - - tc->type = TC_SETUP_CLSFLOWER; - tc->cls_flower = &offload; - - err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->chain->index, tp->protocol, tc); - if (!err) - f->flags |= TCA_CLS_FLAGS_IN_HW; + if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW)) + return -EINVAL; - if (tc_skip_sw(f->flags)) - return err; return 0; } static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) { - struct tc_cls_flower_offload offload = {0}; - struct net_device *dev = f->hw_dev; - struct tc_to_netdev *tc = &f->tc; - - if (!tc_can_offload(dev, tp)) - return; - - offload.command = TC_CLSFLOWER_STATS; - offload.prio = tp->prio; - offload.cookie = (unsigned long)f; - offload.exts = &f->exts; + struct tc_cls_flower_offload cls_flower = {}; + struct tcf_block *block = tp->chain->block; - tc->type = TC_SETUP_CLSFLOWER; - tc->cls_flower = &offload; + tc_cls_common_offload_init(&cls_flower.common, tp); + cls_flower.command = TC_CLSFLOWER_STATS; + cls_flower.cookie = (unsigned long) f; + cls_flower.exts = &f->exts; + cls_flower.classid = f->res.classid; - dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->chain->index, tp->protocol, tc); + tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER, + &cls_flower, false); } static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f) { + struct cls_fl_head *head = rtnl_dereference(tp->root); + + idr_remove_ext(&head->handle_idr, f->handle); list_del_rcu(&f->list); if (!tc_skip_hw(f->flags)) fl_hw_destroy_filter(tp, f); tcf_unbind_filter(tp, &f->res); - call_rcu(&f->rcu, fl_destroy_filter); + if (tcf_exts_get_net(&f->exts)) + call_rcu(&f->rcu, fl_destroy_filter); + else + __fl_destroy_filter(f); } static void fl_destroy_sleepable(struct work_struct *work) @@ -341,20 +319,17 @@ static void fl_destroy(struct tcf_proto *tp) list_for_each_entry_safe(f, next, &head->filters, list) __fl_delete(tp, f); + idr_destroy(&head->handle_idr); __module_get(THIS_MODULE); call_rcu(&head->rcu, fl_destroy_rcu); } -static unsigned long fl_get(struct tcf_proto *tp, u32 handle) +static void *fl_get(struct tcf_proto *tp, u32 handle) { struct cls_fl_head *head = rtnl_dereference(tp->root); - struct cls_fl_filter *f; - list_for_each_entry(f, &head->filters, list) - if (f->handle == handle) - return (unsigned long) f; - return 0; + return idr_find_ext(&head->handle_idr, handle); } static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { @@ -852,15 +827,11 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct nlattr **tb, struct nlattr *est, bool ovr) { - struct tcf_exts e; int err; - err = tcf_exts_init(&e, TCA_FLOWER_ACT, 0); + err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr); if (err < 0) return err; - err = tcf_exts_validate(net, tp, tb, est, &e, ovr); - if (err < 0) - goto errout; if (tb[TCA_FLOWER_CLASSID]) { f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]); @@ -869,50 +840,25 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp, err = fl_set_key(net, tb, &f->key, &mask->key); if (err) - goto errout; + return err; fl_mask_update_range(mask); fl_set_masked_key(&f->mkey, &f->key, mask); - tcf_exts_change(tp, &f->exts, &e); - return 0; -errout: - tcf_exts_destroy(&e); - return err; -} - -static u32 fl_grab_new_handle(struct tcf_proto *tp, - struct cls_fl_head *head) -{ - unsigned int i = 0x80000000; - u32 handle; - - do { - if (++head->hgen == 0x7FFFFFFF) - head->hgen = 1; - } while (--i > 0 && fl_get(tp, head->hgen)); - - if (unlikely(i == 0)) { - pr_err("Insufficient number of handles\n"); - handle = 0; - } else { - handle = head->hgen; - } - - return handle; } static int fl_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - unsigned long *arg, bool ovr) + void **arg, bool ovr) { struct cls_fl_head *head = rtnl_dereference(tp->root); - struct cls_fl_filter *fold = (struct cls_fl_filter *) *arg; + struct cls_fl_filter *fold = *arg; struct cls_fl_filter *fnew; struct nlattr **tb; struct fl_flow_mask mask = {}; + unsigned long idr_index; int err; if (!tca[TCA_OPTIONS]) @@ -943,41 +889,49 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, goto errout; if (!handle) { - handle = fl_grab_new_handle(tp, head); - if (!handle) { - err = -EINVAL; + err = idr_alloc_ext(&head->handle_idr, fnew, &idr_index, + 1, 0x80000000, GFP_KERNEL); + if (err) goto errout; - } + fnew->handle = idr_index; + } + + /* user specifies a handle and it doesn't exist */ + if (handle && !fold) { + err = idr_alloc_ext(&head->handle_idr, fnew, &idr_index, + handle, handle + 1, GFP_KERNEL); + if (err) + goto errout; + fnew->handle = idr_index; } - fnew->handle = handle; if (tb[TCA_FLOWER_FLAGS]) { fnew->flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]); if (!tc_flags_valid(fnew->flags)) { err = -EINVAL; - goto errout; + goto errout_idr; } } err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr); if (err) - goto errout; + goto errout_idr; err = fl_check_assign_mask(head, &mask); if (err) - goto errout; + goto errout_idr; if (!tc_skip_sw(fnew->flags)) { if (!fold && fl_lookup(head, &fnew->mkey)) { err = -EEXIST; - goto errout; + goto errout_idr; } err = rhashtable_insert_fast(&head->ht, &fnew->ht_node, head->ht_params); if (err) - goto errout; + goto errout_idr; } if (!tc_skip_hw(fnew->flags)) { @@ -986,7 +940,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, &mask.key, fnew); if (err) - goto errout; + goto errout_idr; } if (!tc_in_hw(fnew->flags)) @@ -1000,11 +954,14 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, fl_hw_destroy_filter(tp, fold); } - *arg = (unsigned long) fnew; + *arg = fnew; if (fold) { + fnew->handle = handle; + idr_replace_ext(&head->handle_idr, fnew, fnew->handle); list_replace_rcu(&fold->list, &fnew->list); tcf_unbind_filter(tp, &fold->res); + tcf_exts_get_net(&fold->exts); call_rcu(&fold->rcu, fl_destroy_filter); } else { list_add_tail_rcu(&fnew->list, &head->filters); @@ -1013,6 +970,9 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, kfree(tb); return 0; +errout_idr: + if (fnew->handle) + idr_remove_ext(&head->handle_idr, fnew->handle); errout: tcf_exts_destroy(&fnew->exts); kfree(fnew); @@ -1021,10 +981,10 @@ errout_tb: return err; } -static int fl_delete(struct tcf_proto *tp, unsigned long arg, bool *last) +static int fl_delete(struct tcf_proto *tp, void *arg, bool *last) { struct cls_fl_head *head = rtnl_dereference(tp->root); - struct cls_fl_filter *f = (struct cls_fl_filter *) arg; + struct cls_fl_filter *f = arg; if (!tc_skip_sw(f->flags)) rhashtable_remove_fast(&head->ht, &f->ht_node, @@ -1042,7 +1002,7 @@ static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg) list_for_each_entry_rcu(f, &head->filters, list) { if (arg->count < arg->skip) goto skip; - if (arg->fn(tp, (unsigned long) f, arg) < 0) { + if (arg->fn(tp, f, arg) < 0) { arg->stop = 1; break; } @@ -1177,11 +1137,11 @@ static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask) return nla_put(skb, TCA_FLOWER_KEY_FLAGS_MASK, 4, &_mask); } -static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, +static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t) { struct cls_fl_head *head = rtnl_dereference(tp->root); - struct cls_fl_filter *f = (struct cls_fl_filter *) fh; + struct cls_fl_filter *f = fh; struct nlattr *nest; struct fl_flow_key *key, *mask; @@ -1383,6 +1343,14 @@ nla_put_failure: return -1; } +static void fl_bind_class(void *fh, u32 classid, unsigned long cl) +{ + struct cls_fl_filter *f = fh; + + if (f && f->res.classid == classid) + f->res.class = cl; +} + static struct tcf_proto_ops cls_fl_ops __read_mostly = { .kind = "flower", .classify = fl_classify, @@ -1393,6 +1361,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = { .delete = fl_delete, .walk = fl_walk, .dump = fl_dump, + .bind_class = fl_bind_class, .owner = THIS_MODULE, }; diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index d3885362e017..20f0de1a960a 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -28,6 +28,7 @@ #include <net/netlink.h> #include <net/act_api.h> #include <net/pkt_cls.h> +#include <net/sch_generic.h> #define HTSIZE 256 @@ -46,7 +47,10 @@ struct fw_filter { #endif /* CONFIG_NET_CLS_IND */ struct tcf_exts exts; struct tcf_proto *tp; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; static u32 fw_hash(u32 handle) @@ -83,9 +87,11 @@ static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp, } } } else { + struct Qdisc *q = tcf_block_q(tp->chain->block); + /* Old method: classify the packet using its skb mark. */ if (id && (TC_H_MAJ(id) == 0 || - !(TC_H_MAJ(id ^ tp->q->handle)))) { + !(TC_H_MAJ(id ^ q->handle)))) { res->classid = id; res->class = 0; return 0; @@ -95,20 +101,20 @@ static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp, return -1; } -static unsigned long fw_get(struct tcf_proto *tp, u32 handle) +static void *fw_get(struct tcf_proto *tp, u32 handle) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f; if (head == NULL) - return 0; + return NULL; f = rtnl_dereference(head->ht[fw_hash(handle)]); for (; f; f = rtnl_dereference(f->next)) { if (f->id == handle) - return (unsigned long)f; + return f; } - return 0; + return NULL; } static int fw_init(struct tcf_proto *tp) @@ -119,12 +125,28 @@ static int fw_init(struct tcf_proto *tp) return 0; } +static void __fw_delete_filter(struct fw_filter *f) +{ + tcf_exts_destroy(&f->exts); + tcf_exts_put_net(&f->exts); + kfree(f); +} + +static void fw_delete_filter_work(struct work_struct *work) +{ + struct fw_filter *f = container_of(work, struct fw_filter, work); + + rtnl_lock(); + __fw_delete_filter(f); + rtnl_unlock(); +} + static void fw_delete_filter(struct rcu_head *head) { struct fw_filter *f = container_of(head, struct fw_filter, rcu); - tcf_exts_destroy(&f->exts); - kfree(f); + INIT_WORK(&f->work, fw_delete_filter_work); + tcf_queue_work(&f->work); } static void fw_destroy(struct tcf_proto *tp) @@ -141,16 +163,19 @@ static void fw_destroy(struct tcf_proto *tp) RCU_INIT_POINTER(head->ht[h], rtnl_dereference(f->next)); tcf_unbind_filter(tp, &f->res); - call_rcu(&f->rcu, fw_delete_filter); + if (tcf_exts_get_net(&f->exts)) + call_rcu(&f->rcu, fw_delete_filter); + else + __fw_delete_filter(f); } } kfree_rcu(head, rcu); } -static int fw_delete(struct tcf_proto *tp, unsigned long arg, bool *last) +static int fw_delete(struct tcf_proto *tp, void *arg, bool *last) { struct fw_head *head = rtnl_dereference(tp->root); - struct fw_filter *f = (struct fw_filter *)arg; + struct fw_filter *f = arg; struct fw_filter __rcu **fp; struct fw_filter *pfp; int ret = -EINVAL; @@ -166,6 +191,7 @@ static int fw_delete(struct tcf_proto *tp, unsigned long arg, bool *last) if (pfp == f) { RCU_INIT_POINTER(*fp, rtnl_dereference(f->next)); tcf_unbind_filter(tp, &f->res); + tcf_exts_get_net(&f->exts); call_rcu(&f->rcu, fw_delete_filter); ret = 0; break; @@ -190,22 +216,17 @@ static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = { [TCA_FW_MASK] = { .type = NLA_U32 }, }; -static int -fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f, - struct nlattr **tb, struct nlattr **tca, unsigned long base, - bool ovr) +static int fw_set_parms(struct net *net, struct tcf_proto *tp, + struct fw_filter *f, struct nlattr **tb, + struct nlattr **tca, unsigned long base, bool ovr) { struct fw_head *head = rtnl_dereference(tp->root); - struct tcf_exts e; u32 mask; int err; - err = tcf_exts_init(&e, TCA_FW_ACT, TCA_FW_POLICE); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, ovr); if (err < 0) return err; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); - if (err < 0) - goto errout; if (tb[TCA_FW_CLASSID]) { f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]); @@ -216,10 +237,8 @@ fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f, if (tb[TCA_FW_INDEV]) { int ret; ret = tcf_change_indev(net, tb[TCA_FW_INDEV]); - if (ret < 0) { - err = ret; - goto errout; - } + if (ret < 0) + return ret; f->ifindex = ret; } #endif /* CONFIG_NET_CLS_IND */ @@ -228,25 +247,20 @@ fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f, if (tb[TCA_FW_MASK]) { mask = nla_get_u32(tb[TCA_FW_MASK]); if (mask != head->mask) - goto errout; + return err; } else if (head->mask != 0xFFFFFFFF) - goto errout; - - tcf_exts_change(tp, &f->exts, &e); + return err; return 0; -errout: - tcf_exts_destroy(&e); - return err; } static int fw_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, - u32 handle, struct nlattr **tca, unsigned long *arg, + u32 handle, struct nlattr **tca, void **arg, bool ovr) { struct fw_head *head = rtnl_dereference(tp->root); - struct fw_filter *f = (struct fw_filter *) *arg; + struct fw_filter *f = *arg; struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_FW_MAX + 1]; int err; @@ -282,7 +296,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, return err; } - err = fw_change_attrs(net, tp, fnew, tb, tca, base, ovr); + err = fw_set_parms(net, tp, fnew, tb, tca, base, ovr); if (err < 0) { tcf_exts_destroy(&fnew->exts); kfree(fnew); @@ -298,9 +312,10 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, RCU_INIT_POINTER(fnew->next, rtnl_dereference(pfp->next)); rcu_assign_pointer(*fp, fnew); tcf_unbind_filter(tp, &f->res); + tcf_exts_get_net(&f->exts); call_rcu(&f->rcu, fw_delete_filter); - *arg = (unsigned long)fnew; + *arg = fnew; return err; } @@ -330,14 +345,14 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, f->id = handle; f->tp = tp; - err = fw_change_attrs(net, tp, f, tb, tca, base, ovr); + err = fw_set_parms(net, tp, f, tb, tca, base, ovr); if (err < 0) goto errout; RCU_INIT_POINTER(f->next, head->ht[fw_hash(handle)]); rcu_assign_pointer(head->ht[fw_hash(handle)], f); - *arg = (unsigned long)f; + *arg = f; return 0; errout: @@ -366,7 +381,7 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg) arg->count++; continue; } - if (arg->fn(tp, (unsigned long)f, arg) < 0) { + if (arg->fn(tp, f, arg) < 0) { arg->stop = 1; return; } @@ -375,11 +390,11 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg) } } -static int fw_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, +static int fw_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t) { struct fw_head *head = rtnl_dereference(tp->root); - struct fw_filter *f = (struct fw_filter *)fh; + struct fw_filter *f = fh; struct nlattr *nest; if (f == NULL) @@ -387,7 +402,7 @@ static int fw_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, t->tcm_handle = f->id; - if (!f->res.classid && !tcf_exts_is_available(&f->exts)) + if (!f->res.classid && !tcf_exts_has_actions(&f->exts)) return skb->len; nest = nla_nest_start(skb, TCA_OPTIONS); @@ -424,6 +439,14 @@ nla_put_failure: return -1; } +static void fw_bind_class(void *fh, u32 classid, unsigned long cl) +{ + struct fw_filter *f = fh; + + if (f && f->res.classid == classid) + f->res.class = cl; +} + static struct tcf_proto_ops cls_fw_ops __read_mostly = { .kind = "fw", .classify = fw_classify, @@ -434,6 +457,7 @@ static struct tcf_proto_ops cls_fw_ops __read_mostly = { .delete = fw_delete, .walk = fw_walk, .dump = fw_dump, + .bind_class = fw_bind_class, .owner = THIS_MODULE, }; diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 9dc26c32cf32..66d4e0099158 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -21,7 +21,10 @@ struct cls_mall_head { struct tcf_result res; u32 handle; u32 flags; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp, @@ -32,6 +35,7 @@ static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp, if (tc_skip_sw(head->flags)) return -1; + *res = head->res; return tcf_exts_exec(skb, &head->exts, res); } @@ -40,74 +44,93 @@ static int mall_init(struct tcf_proto *tp) return 0; } +static void __mall_destroy(struct cls_mall_head *head) +{ + tcf_exts_destroy(&head->exts); + tcf_exts_put_net(&head->exts); + kfree(head); +} + +static void mall_destroy_work(struct work_struct *work) +{ + struct cls_mall_head *head = container_of(work, struct cls_mall_head, + work); + rtnl_lock(); + __mall_destroy(head); + rtnl_unlock(); +} + static void mall_destroy_rcu(struct rcu_head *rcu) { struct cls_mall_head *head = container_of(rcu, struct cls_mall_head, rcu); - tcf_exts_destroy(&head->exts); - kfree(head); + INIT_WORK(&head->work, mall_destroy_work); + tcf_queue_work(&head->work); +} + +static void mall_destroy_hw_filter(struct tcf_proto *tp, + struct cls_mall_head *head, + unsigned long cookie) +{ + struct tc_cls_matchall_offload cls_mall = {}; + struct tcf_block *block = tp->chain->block; + + tc_cls_common_offload_init(&cls_mall.common, tp); + cls_mall.command = TC_CLSMATCHALL_DESTROY; + cls_mall.cookie = cookie; + + tc_setup_cb_call(block, NULL, TC_SETUP_CLSMATCHALL, &cls_mall, false); } static int mall_replace_hw_filter(struct tcf_proto *tp, struct cls_mall_head *head, unsigned long cookie) { - struct net_device *dev = tp->q->dev_queue->dev; - struct tc_to_netdev offload; - struct tc_cls_matchall_offload mall_offload = {0}; + struct tc_cls_matchall_offload cls_mall = {}; + struct tcf_block *block = tp->chain->block; + bool skip_sw = tc_skip_sw(head->flags); int err; - offload.type = TC_SETUP_MATCHALL; - offload.cls_mall = &mall_offload; - offload.cls_mall->command = TC_CLSMATCHALL_REPLACE; - offload.cls_mall->exts = &head->exts; - offload.cls_mall->cookie = cookie; + tc_cls_common_offload_init(&cls_mall.common, tp); + cls_mall.command = TC_CLSMATCHALL_REPLACE; + cls_mall.exts = &head->exts; + cls_mall.cookie = cookie; - err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->chain->index, - tp->protocol, &offload); - if (!err) + err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSMATCHALL, + &cls_mall, skip_sw); + if (err < 0) { + mall_destroy_hw_filter(tp, head, cookie); + return err; + } else if (err > 0) { head->flags |= TCA_CLS_FLAGS_IN_HW; + } - return err; -} + if (skip_sw && !(head->flags & TCA_CLS_FLAGS_IN_HW)) + return -EINVAL; -static void mall_destroy_hw_filter(struct tcf_proto *tp, - struct cls_mall_head *head, - unsigned long cookie) -{ - struct net_device *dev = tp->q->dev_queue->dev; - struct tc_to_netdev offload; - struct tc_cls_matchall_offload mall_offload = {0}; - - offload.type = TC_SETUP_MATCHALL; - offload.cls_mall = &mall_offload; - offload.cls_mall->command = TC_CLSMATCHALL_DESTROY; - offload.cls_mall->exts = NULL; - offload.cls_mall->cookie = cookie; - - dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->chain->index, - tp->protocol, &offload); + return 0; } static void mall_destroy(struct tcf_proto *tp) { struct cls_mall_head *head = rtnl_dereference(tp->root); - struct net_device *dev = tp->q->dev_queue->dev; if (!head) return; - if (tc_should_offload(dev, tp, head->flags)) + if (!tc_skip_hw(head->flags)) mall_destroy_hw_filter(tp, head, (unsigned long) head); - call_rcu(&head->rcu, mall_destroy_rcu); + if (tcf_exts_get_net(&head->exts)) + call_rcu(&head->rcu, mall_destroy_rcu); + else + __mall_destroy(head); } -static unsigned long mall_get(struct tcf_proto *tp, u32 handle) +static void *mall_get(struct tcf_proto *tp, u32 handle) { - return 0UL; + return NULL; } static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = { @@ -120,36 +143,25 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct nlattr **tb, struct nlattr *est, bool ovr) { - struct tcf_exts e; int err; - err = tcf_exts_init(&e, TCA_MATCHALL_ACT, 0); - if (err) - return err; - err = tcf_exts_validate(net, tp, tb, est, &e, ovr); + err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr); if (err < 0) - goto errout; + return err; if (tb[TCA_MATCHALL_CLASSID]) { head->res.classid = nla_get_u32(tb[TCA_MATCHALL_CLASSID]); tcf_bind_filter(tp, &head->res, base); } - - tcf_exts_change(tp, &head->exts, &e); - return 0; -errout: - tcf_exts_destroy(&e); - return err; } static int mall_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - unsigned long *arg, bool ovr) + void **arg, bool ovr) { struct cls_mall_head *head = rtnl_dereference(tp->root); - struct net_device *dev = tp->q->dev_queue->dev; struct nlattr *tb[TCA_MATCHALL_MAX + 1]; struct cls_mall_head *new; u32 flags = 0; @@ -189,20 +201,16 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, if (err) goto err_set_parms; - if (tc_should_offload(dev, tp, flags)) { + if (!tc_skip_hw(new->flags)) { err = mall_replace_hw_filter(tp, new, (unsigned long) new); - if (err) { - if (tc_skip_sw(flags)) - goto err_replace_hw_filter; - else - err = 0; - } + if (err) + goto err_replace_hw_filter; } if (!tc_in_hw(new->flags)) new->flags |= TCA_CLS_FLAGS_NOT_IN_HW; - *arg = (unsigned long) head; + *arg = head; rcu_assign_pointer(tp->root, new); return 0; @@ -214,7 +222,7 @@ err_exts_init: return err; } -static int mall_delete(struct tcf_proto *tp, unsigned long arg, bool *last) +static int mall_delete(struct tcf_proto *tp, void *arg, bool *last) { return -EOPNOTSUPP; } @@ -225,16 +233,16 @@ static void mall_walk(struct tcf_proto *tp, struct tcf_walker *arg) if (arg->count < arg->skip) goto skip; - if (arg->fn(tp, (unsigned long) head, arg) < 0) + if (arg->fn(tp, head, arg) < 0) arg->stop = 1; skip: arg->count++; } -static int mall_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, +static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t) { - struct cls_mall_head *head = (struct cls_mall_head *) fh; + struct cls_mall_head *head = fh; struct nlattr *nest; if (!head) @@ -268,6 +276,14 @@ nla_put_failure: return -1; } +static void mall_bind_class(void *fh, u32 classid, unsigned long cl) +{ + struct cls_mall_head *head = fh; + + if (head && head->res.classid == classid) + head->res.class = cl; +} + static struct tcf_proto_ops cls_mall_ops __read_mostly = { .kind = "matchall", .classify = mall_classify, @@ -278,6 +294,7 @@ static struct tcf_proto_ops cls_mall_ops __read_mostly = { .delete = mall_delete, .walk = mall_walk, .dump = mall_dump, + .bind_class = mall_bind_class, .owner = THIS_MODULE, }; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index d63d5502ee02..ac9a5b8825b9 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -57,7 +57,10 @@ struct route4_filter { u32 handle; struct route4_bucket *bkt; struct tcf_proto *tp; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; #define ROUTE4_FAILURE ((struct route4_filter *)(-1L)) @@ -113,7 +116,7 @@ static inline int route4_hash_wild(void) #define ROUTE4_APPLY_RESULT() \ { \ *res = f->res; \ - if (tcf_exts_is_available(&f->exts)) { \ + if (tcf_exts_has_actions(&f->exts)) { \ int r = tcf_exts_exec(skb, &f->exts, res); \ if (r < 0) { \ dont_cache = 1; \ @@ -216,7 +219,7 @@ static inline u32 from_hash(u32 id) return 16 + (id & 0xF); } -static unsigned long route4_get(struct tcf_proto *tp, u32 handle) +static void *route4_get(struct tcf_proto *tp, u32 handle) { struct route4_head *head = rtnl_dereference(tp->root); struct route4_bucket *b; @@ -225,11 +228,11 @@ static unsigned long route4_get(struct tcf_proto *tp, u32 handle) h1 = to_hash(handle); if (h1 > 256) - return 0; + return NULL; h2 = from_hash(handle >> 16); if (h2 > 32) - return 0; + return NULL; b = rtnl_dereference(head->table[h1]); if (b) { @@ -237,9 +240,9 @@ static unsigned long route4_get(struct tcf_proto *tp, u32 handle) f; f = rtnl_dereference(f->next)) if (f->handle == handle) - return (unsigned long)f; + return f; } - return 0; + return NULL; } static int route4_init(struct tcf_proto *tp) @@ -254,12 +257,28 @@ static int route4_init(struct tcf_proto *tp) return 0; } +static void __route4_delete_filter(struct route4_filter *f) +{ + tcf_exts_destroy(&f->exts); + tcf_exts_put_net(&f->exts); + kfree(f); +} + +static void route4_delete_filter_work(struct work_struct *work) +{ + struct route4_filter *f = container_of(work, struct route4_filter, work); + + rtnl_lock(); + __route4_delete_filter(f); + rtnl_unlock(); +} + static void route4_delete_filter(struct rcu_head *head) { struct route4_filter *f = container_of(head, struct route4_filter, rcu); - tcf_exts_destroy(&f->exts); - kfree(f); + INIT_WORK(&f->work, route4_delete_filter_work); + tcf_queue_work(&f->work); } static void route4_destroy(struct tcf_proto *tp) @@ -284,7 +303,10 @@ static void route4_destroy(struct tcf_proto *tp) next = rtnl_dereference(f->next); RCU_INIT_POINTER(b->ht[h2], next); tcf_unbind_filter(tp, &f->res); - call_rcu(&f->rcu, route4_delete_filter); + if (tcf_exts_get_net(&f->exts)) + call_rcu(&f->rcu, route4_delete_filter); + else + __route4_delete_filter(f); } } RCU_INIT_POINTER(head->table[h1], NULL); @@ -294,10 +316,10 @@ static void route4_destroy(struct tcf_proto *tp) kfree_rcu(head, rcu); } -static int route4_delete(struct tcf_proto *tp, unsigned long arg, bool *last) +static int route4_delete(struct tcf_proto *tp, void *arg, bool *last) { struct route4_head *head = rtnl_dereference(tp->root); - struct route4_filter *f = (struct route4_filter *)arg; + struct route4_filter *f = arg; struct route4_filter __rcu **fp; struct route4_filter *nf; struct route4_bucket *b; @@ -325,6 +347,7 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg, bool *last) /* Delete it */ tcf_unbind_filter(tp, &f->res); + tcf_exts_get_net(&f->exts); call_rcu(&f->rcu, route4_delete_filter); /* Strip RTNL protected tree */ @@ -372,37 +395,32 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, struct route4_filter *fp; unsigned int h1; struct route4_bucket *b; - struct tcf_exts e; int err; - err = tcf_exts_init(&e, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE); + err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr); if (err < 0) return err; - err = tcf_exts_validate(net, tp, tb, est, &e, ovr); - if (err < 0) - goto errout; - err = -EINVAL; if (tb[TCA_ROUTE4_TO]) { if (new && handle & 0x8000) - goto errout; + return -EINVAL; to = nla_get_u32(tb[TCA_ROUTE4_TO]); if (to > 0xFF) - goto errout; + return -EINVAL; nhandle = to; } if (tb[TCA_ROUTE4_FROM]) { if (tb[TCA_ROUTE4_IIF]) - goto errout; + return -EINVAL; id = nla_get_u32(tb[TCA_ROUTE4_FROM]); if (id > 0xFF) - goto errout; + return -EINVAL; nhandle |= id << 16; } else if (tb[TCA_ROUTE4_IIF]) { id = nla_get_u32(tb[TCA_ROUTE4_IIF]); if (id > 0x7FFF) - goto errout; + return -EINVAL; nhandle |= (id | 0x8000) << 16; } else nhandle |= 0xFFFF << 16; @@ -410,27 +428,25 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, if (handle && new) { nhandle |= handle & 0x7F00; if (nhandle != handle) - goto errout; + return -EINVAL; } h1 = to_hash(nhandle); b = rtnl_dereference(head->table[h1]); if (!b) { - err = -ENOBUFS; b = kzalloc(sizeof(struct route4_bucket), GFP_KERNEL); if (b == NULL) - goto errout; + return -ENOBUFS; rcu_assign_pointer(head->table[h1], b); } else { unsigned int h2 = from_hash(nhandle >> 16); - err = -EEXIST; for (fp = rtnl_dereference(b->ht[h2]); fp; fp = rtnl_dereference(fp->next)) if (fp->handle == f->handle) - goto errout; + return -EEXIST; } if (tb[TCA_ROUTE4_TO]) @@ -450,17 +466,12 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, tcf_bind_filter(tp, &f->res, base); } - tcf_exts_change(tp, &f->exts, &e); - return 0; -errout: - tcf_exts_destroy(&e); - return err; } static int route4_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, unsigned long *arg, bool ovr) + struct nlattr **tca, void **arg, bool ovr) { struct route4_head *head = rtnl_dereference(tp->root); struct route4_filter __rcu **fp; @@ -479,7 +490,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, if (err < 0) return err; - fold = (struct route4_filter *)*arg; + fold = *arg; if (fold && handle && fold->handle != handle) return -EINVAL; @@ -537,9 +548,10 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, } route4_reset_fastmap(head); - *arg = (unsigned long)f; + *arg = f; if (fold) { tcf_unbind_filter(tp, &fold->res); + tcf_exts_get_net(&fold->exts); call_rcu(&fold->rcu, route4_delete_filter); } return 0; @@ -576,7 +588,7 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg) arg->count++; continue; } - if (arg->fn(tp, (unsigned long)f, arg) < 0) { + if (arg->fn(tp, f, arg) < 0) { arg->stop = 1; return; } @@ -587,10 +599,10 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg) } } -static int route4_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, +static int route4_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t) { - struct route4_filter *f = (struct route4_filter *)fh; + struct route4_filter *f = fh; struct nlattr *nest; u32 id; @@ -636,6 +648,14 @@ nla_put_failure: return -1; } +static void route4_bind_class(void *fh, u32 classid, unsigned long cl) +{ + struct route4_filter *f = fh; + + if (f && f->res.classid == classid) + f->res.class = cl; +} + static struct tcf_proto_ops cls_route4_ops __read_mostly = { .kind = "route", .classify = route4_classify, @@ -646,6 +666,7 @@ static struct tcf_proto_ops cls_route4_ops __read_mostly = { .delete = route4_delete, .walk = route4_walk, .dump = route4_dump, + .bind_class = route4_bind_class, .owner = THIS_MODULE, }; diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 0d9d07798699..cf325625c99d 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -97,7 +97,10 @@ struct rsvp_filter { u32 handle; struct rsvp_session *sess; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid) @@ -248,7 +251,7 @@ static void rsvp_replace(struct tcf_proto *tp, struct rsvp_filter *n, u32 h) BUG_ON(1); } -static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle) +static void *rsvp_get(struct tcf_proto *tp, u32 handle) { struct rsvp_head *head = rtnl_dereference(tp->root); struct rsvp_session *s; @@ -257,17 +260,17 @@ static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle) unsigned int h2 = (handle >> 8) & 0xFF; if (h2 > 16) - return 0; + return NULL; for (s = rtnl_dereference(head->ht[h1]); s; s = rtnl_dereference(s->next)) { for (f = rtnl_dereference(s->ht[h2]); f; f = rtnl_dereference(f->next)) { if (f->handle == handle) - return (unsigned long)f; + return f; } } - return 0; + return NULL; } static int rsvp_init(struct tcf_proto *tp) @@ -282,12 +285,28 @@ static int rsvp_init(struct tcf_proto *tp) return -ENOBUFS; } +static void __rsvp_delete_filter(struct rsvp_filter *f) +{ + tcf_exts_destroy(&f->exts); + tcf_exts_put_net(&f->exts); + kfree(f); +} + +static void rsvp_delete_filter_work(struct work_struct *work) +{ + struct rsvp_filter *f = container_of(work, struct rsvp_filter, work); + + rtnl_lock(); + __rsvp_delete_filter(f); + rtnl_unlock(); +} + static void rsvp_delete_filter_rcu(struct rcu_head *head) { struct rsvp_filter *f = container_of(head, struct rsvp_filter, rcu); - tcf_exts_destroy(&f->exts); - kfree(f); + INIT_WORK(&f->work, rsvp_delete_filter_work); + tcf_queue_work(&f->work); } static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f) @@ -297,7 +316,10 @@ static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f) * grace period, since converted-to-rcu actions are relying on that * in cleanup() callback */ - call_rcu(&f->rcu, rsvp_delete_filter_rcu); + if (tcf_exts_get_net(&f->exts)) + call_rcu(&f->rcu, rsvp_delete_filter_rcu); + else + __rsvp_delete_filter(f); } static void rsvp_destroy(struct tcf_proto *tp) @@ -328,10 +350,10 @@ static void rsvp_destroy(struct tcf_proto *tp) kfree_rcu(data, rcu); } -static int rsvp_delete(struct tcf_proto *tp, unsigned long arg, bool *last) +static int rsvp_delete(struct tcf_proto *tp, void *arg, bool *last) { struct rsvp_head *head = rtnl_dereference(tp->root); - struct rsvp_filter *nfp, *f = (struct rsvp_filter *)arg; + struct rsvp_filter *nfp, *f = arg; struct rsvp_filter __rcu **fp; unsigned int h = f->handle; struct rsvp_session __rcu **sp; @@ -389,7 +411,7 @@ static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt) if ((data->hgenerator += 0x10000) == 0) data->hgenerator = 0x10000; h = data->hgenerator|salt; - if (rsvp_get(tp, h) == 0) + if (!rsvp_get(tp, h)) return h; } return 0; @@ -464,7 +486,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - unsigned long *arg, bool ovr) + void **arg, bool ovr) { struct rsvp_head *data = rtnl_dereference(tp->root); struct rsvp_filter *f, *nfp; @@ -493,7 +515,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, if (err < 0) goto errout2; - f = (struct rsvp_filter *)*arg; + f = *arg; if (f) { /* Node exists: adjust only classid */ struct rsvp_filter *n; @@ -518,7 +540,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, tcf_bind_filter(tp, &n->res, base); } - tcf_exts_change(tp, &n->exts, &e); + tcf_exts_change(&n->exts, &e); rsvp_replace(tp, n, handle); return 0; } @@ -591,7 +613,7 @@ insert: if (f->tunnelhdr == 0) tcf_bind_filter(tp, &f->res, base); - tcf_exts_change(tp, &f->exts, &e); + tcf_exts_change(&f->exts, &e); fp = &s->ht[h2]; for (nfp = rtnl_dereference(*fp); nfp; @@ -604,7 +626,7 @@ insert: RCU_INIT_POINTER(f->next, nfp); rcu_assign_pointer(*fp, f); - *arg = (unsigned long)f; + *arg = f; return 0; } } @@ -663,7 +685,7 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg) arg->count++; continue; } - if (arg->fn(tp, (unsigned long)f, arg) < 0) { + if (arg->fn(tp, f, arg) < 0) { arg->stop = 1; return; } @@ -674,10 +696,10 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg) } } -static int rsvp_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, +static int rsvp_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t) { - struct rsvp_filter *f = (struct rsvp_filter *)fh; + struct rsvp_filter *f = fh; struct rsvp_session *s; struct nlattr *nest; struct tc_rsvp_pinfo pinfo; @@ -723,6 +745,14 @@ nla_put_failure: return -1; } +static void rsvp_bind_class(void *fh, u32 classid, unsigned long cl) +{ + struct rsvp_filter *f = fh; + + if (f && f->res.classid == classid) + f->res.class = cl; +} + static struct tcf_proto_ops RSVP_OPS __read_mostly = { .kind = RSVP_ID, .classify = rsvp_classify, @@ -733,6 +763,7 @@ static struct tcf_proto_ops RSVP_OPS __read_mostly = { .delete = rsvp_delete, .walk = rsvp_walk, .dump = rsvp_dump, + .bind_class = rsvp_bind_class, .owner = THIS_MODULE, }; diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 8a8a58357c39..67467ae24c97 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -13,6 +13,7 @@ #include <net/act_api.h> #include <net/netlink.h> #include <net/pkt_cls.h> +#include <net/sch_generic.h> /* * Passing parameters to the root seems to be done more awkwardly than really @@ -27,14 +28,20 @@ struct tcindex_filter_result { struct tcf_exts exts; struct tcf_result res; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; struct tcindex_filter { u16 key; struct tcindex_filter_result result; struct tcindex_filter __rcu *next; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; @@ -52,7 +59,7 @@ struct tcindex_data { static inline int tcindex_filter_is_set(struct tcindex_filter_result *r) { - return tcf_exts_is_predicative(&r->exts) || r->res.classid; + return tcf_exts_has_actions(&r->exts) || r->res.classid; } static struct tcindex_filter_result *tcindex_lookup(struct tcindex_data *p, @@ -90,9 +97,11 @@ static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp, f = tcindex_lookup(p, key); if (!f) { + struct Qdisc *q = tcf_block_q(tp->chain->block); + if (!p->fall_through) return -1; - res->classid = TC_H_MAKE(TC_H_MAJ(tp->q->handle), key); + res->classid = TC_H_MAKE(TC_H_MAJ(q->handle), key); res->class = 0; pr_debug("alg 0x%x\n", res->classid); return 0; @@ -104,16 +113,16 @@ static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp, } -static unsigned long tcindex_get(struct tcf_proto *tp, u32 handle) +static void *tcindex_get(struct tcf_proto *tp, u32 handle) { struct tcindex_data *p = rtnl_dereference(tp->root); struct tcindex_filter_result *r; pr_debug("tcindex_get(tp %p,handle 0x%08x)\n", tp, handle); if (p->perfect && handle >= p->alloc_hash) - return 0; + return NULL; r = tcindex_lookup(p, handle); - return r && tcindex_filter_is_set(r) ? (unsigned long) r : 0UL; + return r && tcindex_filter_is_set(r) ? r : NULL; } static int tcindex_init(struct tcf_proto *tp) @@ -133,12 +142,46 @@ static int tcindex_init(struct tcf_proto *tp) return 0; } +static void __tcindex_destroy_rexts(struct tcindex_filter_result *r) +{ + tcf_exts_destroy(&r->exts); + tcf_exts_put_net(&r->exts); +} + +static void tcindex_destroy_rexts_work(struct work_struct *work) +{ + struct tcindex_filter_result *r; + + r = container_of(work, struct tcindex_filter_result, work); + rtnl_lock(); + __tcindex_destroy_rexts(r); + rtnl_unlock(); +} + static void tcindex_destroy_rexts(struct rcu_head *head) { struct tcindex_filter_result *r; r = container_of(head, struct tcindex_filter_result, rcu); - tcf_exts_destroy(&r->exts); + INIT_WORK(&r->work, tcindex_destroy_rexts_work); + tcf_queue_work(&r->work); +} + +static void __tcindex_destroy_fexts(struct tcindex_filter *f) +{ + tcf_exts_destroy(&f->result.exts); + tcf_exts_put_net(&f->result.exts); + kfree(f); +} + +static void tcindex_destroy_fexts_work(struct work_struct *work) +{ + struct tcindex_filter *f = container_of(work, struct tcindex_filter, + work); + + rtnl_lock(); + __tcindex_destroy_fexts(f); + rtnl_unlock(); } static void tcindex_destroy_fexts(struct rcu_head *head) @@ -146,18 +189,18 @@ static void tcindex_destroy_fexts(struct rcu_head *head) struct tcindex_filter *f = container_of(head, struct tcindex_filter, rcu); - tcf_exts_destroy(&f->result.exts); - kfree(f); + INIT_WORK(&f->work, tcindex_destroy_fexts_work); + tcf_queue_work(&f->work); } -static int tcindex_delete(struct tcf_proto *tp, unsigned long arg, bool *last) +static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last) { struct tcindex_data *p = rtnl_dereference(tp->root); - struct tcindex_filter_result *r = (struct tcindex_filter_result *) arg; + struct tcindex_filter_result *r = arg; struct tcindex_filter __rcu **walk; struct tcindex_filter *f = NULL; - pr_debug("tcindex_delete(tp %p,arg 0x%lx),p %p\n", tp, arg, p); + pr_debug("tcindex_delete(tp %p,arg %p),p %p\n", tp, arg, p); if (p->perfect) { if (!r->res.class) return -ENOENT; @@ -182,18 +225,24 @@ found: * grace period, since converted-to-rcu actions are relying on that * in cleanup() callback */ - if (f) - call_rcu(&f->rcu, tcindex_destroy_fexts); - else - call_rcu(&r->rcu, tcindex_destroy_rexts); + if (f) { + if (tcf_exts_get_net(&f->result.exts)) + call_rcu(&f->rcu, tcindex_destroy_fexts); + else + __tcindex_destroy_fexts(f); + } else { + if (tcf_exts_get_net(&r->exts)) + call_rcu(&r->rcu, tcindex_destroy_rexts); + else + __tcindex_destroy_rexts(r); + } *last = false; return 0; } static int tcindex_destroy_element(struct tcf_proto *tp, - unsigned long arg, - struct tcf_walker *walker) + void *arg, struct tcf_walker *walker) { bool last; @@ -419,9 +468,9 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, } if (old_r) - tcf_exts_change(tp, &r->exts, &e); + tcf_exts_change(&r->exts, &e); else - tcf_exts_change(tp, &cr.exts, &e); + tcf_exts_change(&cr.exts, &e); if (old_r && old_r != r) { err = tcindex_filter_result_init(old_r); @@ -439,7 +488,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct tcindex_filter *nfp; struct tcindex_filter __rcu **fp; - tcf_exts_change(tp, &f->result.exts, &r->exts); + tcf_exts_change(&f->result.exts, &r->exts); fp = cp->h + (handle % cp->hash); for (nfp = rtnl_dereference(*fp); @@ -471,17 +520,17 @@ errout: static int tcindex_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, unsigned long *arg, bool ovr) + struct nlattr **tca, void **arg, bool ovr) { struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_TCINDEX_MAX + 1]; struct tcindex_data *p = rtnl_dereference(tp->root); - struct tcindex_filter_result *r = (struct tcindex_filter_result *) *arg; + struct tcindex_filter_result *r = *arg; int err; pr_debug("tcindex_change(tp %p,handle 0x%08x,tca %p,arg %p),opt %p," - "p %p,r %p,*arg 0x%lx\n", - tp, handle, tca, arg, opt, p, r, arg ? *arg : 0L); + "p %p,r %p,*arg %p\n", + tp, handle, tca, arg, opt, p, r, arg ? *arg : NULL); if (!opt) return 0; @@ -506,9 +555,7 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker) if (!p->perfect[i].res.class) continue; if (walker->count >= walker->skip) { - if (walker->fn(tp, - (unsigned long) (p->perfect+i), walker) - < 0) { + if (walker->fn(tp, p->perfect + i, walker) < 0) { walker->stop = 1; return; } @@ -522,8 +569,7 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker) for (f = rtnl_dereference(p->h[i]); f; f = next) { next = rtnl_dereference(f->next); if (walker->count >= walker->skip) { - if (walker->fn(tp, (unsigned long) &f->result, - walker) < 0) { + if (walker->fn(tp, &f->result, walker) < 0) { walker->stop = 1; return; } @@ -548,14 +594,14 @@ static void tcindex_destroy(struct tcf_proto *tp) } -static int tcindex_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, +static int tcindex_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t) { struct tcindex_data *p = rtnl_dereference(tp->root); - struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh; + struct tcindex_filter_result *r = fh; struct nlattr *nest; - pr_debug("tcindex_dump(tp %p,fh 0x%lx,skb %p,t %p),p %p,r %p\n", + pr_debug("tcindex_dump(tp %p,fh %p,skb %p,t %p),p %p,r %p\n", tp, fh, skb, t, p, r); pr_debug("p->perfect %p p->h %p\n", p->perfect, p->h); @@ -610,6 +656,14 @@ nla_put_failure: return -1; } +static void tcindex_bind_class(void *fh, u32 classid, unsigned long cl) +{ + struct tcindex_filter_result *r = fh; + + if (r && r->res.classid == classid) + r->res.class = cl; +} + static struct tcf_proto_ops cls_tcindex_ops __read_mostly = { .kind = "tcindex", .classify = tcindex_classify, @@ -620,6 +674,7 @@ static struct tcf_proto_ops cls_tcindex_ops __read_mostly = { .delete = tcindex_delete, .walk = tcindex_walk, .dump = tcindex_dump, + .bind_class = tcindex_bind_class, .owner = THIS_MODULE, }; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 2d01195153e6..ac152b4f4247 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -40,10 +40,13 @@ #include <linux/rtnetlink.h> #include <linux/skbuff.h> #include <linux/bitmap.h> +#include <linux/netdevice.h> +#include <linux/hash.h> #include <net/netlink.h> #include <net/act_api.h> #include <net/pkt_cls.h> #include <linux/netdevice.h> +#include <linux/idr.h> struct tc_u_knode { struct tc_u_knode __rcu *next; @@ -66,7 +69,10 @@ struct tc_u_knode { u32 __percpu *pcpu_success; #endif struct tcf_proto *tp; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; /* The 'sel' field MUST be the last field in structure to allow for * tc_u32_keys allocated at end of structure. */ @@ -80,6 +86,7 @@ struct tc_u_hnode { struct tc_u_common *tp_c; int refcnt; unsigned int divisor; + struct idr handle_idr; struct rcu_head rcu; /* The 'ht' field MUST be the last field in structure to allow for * more entries allocated at end of structure. @@ -89,9 +96,10 @@ struct tc_u_hnode { struct tc_u_common { struct tc_u_hnode __rcu *hlist; - struct Qdisc *q; + struct tcf_block *block; int refcnt; - u32 hgenerator; + struct idr handle_idr; + struct hlist_node hnode; struct rcu_head rcu; }; @@ -289,7 +297,7 @@ out: } -static unsigned long u32_get(struct tcf_proto *tp, u32 handle) +static void *u32_get(struct tcf_proto *tp, u32 handle) { struct tc_u_hnode *ht; struct tc_u_common *tp_c = tp->data; @@ -300,43 +308,68 @@ static unsigned long u32_get(struct tcf_proto *tp, u32 handle) ht = u32_lookup_ht(tp_c, TC_U32_HTID(handle)); if (!ht) - return 0; + return NULL; if (TC_U32_KEY(handle) == 0) - return (unsigned long)ht; + return ht; - return (unsigned long)u32_lookup_key(ht, handle); + return u32_lookup_key(ht, handle); } -static u32 gen_new_htid(struct tc_u_common *tp_c) +static u32 gen_new_htid(struct tc_u_common *tp_c, struct tc_u_hnode *ptr) { - int i = 0x800; + unsigned long idr_index; + int err; - /* hgenerator only used inside rtnl lock it is safe to increment + /* This is only used inside rtnl lock it is safe to increment * without read _copy_ update semantics */ - do { - if (++tp_c->hgenerator == 0x7FF) - tp_c->hgenerator = 1; - } while (--i > 0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20)); + err = idr_alloc_ext(&tp_c->handle_idr, ptr, &idr_index, + 1, 0x7FF, GFP_KERNEL); + if (err) + return 0; + return (u32)(idr_index | 0x800) << 20; +} + +static struct hlist_head *tc_u_common_hash; + +#define U32_HASH_SHIFT 10 +#define U32_HASH_SIZE (1 << U32_HASH_SHIFT) - return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0; +static unsigned int tc_u_hash(const struct tcf_proto *tp) +{ + return hash_ptr(tp->chain->block, U32_HASH_SHIFT); +} + +static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp) +{ + struct tc_u_common *tc; + unsigned int h; + + h = tc_u_hash(tp); + hlist_for_each_entry(tc, &tc_u_common_hash[h], hnode) { + if (tc->block == tp->chain->block) + return tc; + } + return NULL; } static int u32_init(struct tcf_proto *tp) { struct tc_u_hnode *root_ht; struct tc_u_common *tp_c; + unsigned int h; - tp_c = tp->q->u32_node; + tp_c = tc_u_common_find(tp); root_ht = kzalloc(sizeof(*root_ht), GFP_KERNEL); if (root_ht == NULL) return -ENOBUFS; root_ht->refcnt++; - root_ht->handle = tp_c ? gen_new_htid(tp_c) : 0x80000000; + root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : 0x80000000; root_ht->prio = tp->prio; + idr_init(&root_ht->handle_idr); if (tp_c == NULL) { tp_c = kzalloc(sizeof(*tp_c), GFP_KERNEL); @@ -344,8 +377,12 @@ static int u32_init(struct tcf_proto *tp) kfree(root_ht); return -ENOBUFS; } - tp_c->q = tp->q; - tp->q->u32_node = tp_c; + tp_c->block = tp->chain->block; + INIT_HLIST_NODE(&tp_c->hnode); + idr_init(&tp_c->handle_idr); + + h = tc_u_hash(tp); + hlist_add_head(&tp_c->hnode, &tc_u_common_hash[h]); } tp_c->refcnt++; @@ -362,6 +399,7 @@ static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n, bool free_pf) { tcf_exts_destroy(&n->exts); + tcf_exts_put_net(&n->exts); if (n->ht_down) n->ht_down->refcnt--; #ifdef CONFIG_CLS_U32_PERF @@ -384,11 +422,21 @@ static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n, * this the u32_delete_key_rcu variant does not free the percpu * statistics. */ +static void u32_delete_key_work(struct work_struct *work) +{ + struct tc_u_knode *key = container_of(work, struct tc_u_knode, work); + + rtnl_lock(); + u32_destroy_key(key->tp, key, false); + rtnl_unlock(); +} + static void u32_delete_key_rcu(struct rcu_head *rcu) { struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu); - u32_destroy_key(key->tp, key, false); + INIT_WORK(&key->work, u32_delete_key_work); + tcf_queue_work(&key->work); } /* u32_delete_key_freepf_rcu is the rcu callback variant @@ -398,11 +446,21 @@ static void u32_delete_key_rcu(struct rcu_head *rcu) * for the variant that should be used with keys return from * u32_init_knode() */ +static void u32_delete_key_freepf_work(struct work_struct *work) +{ + struct tc_u_knode *key = container_of(work, struct tc_u_knode, work); + + rtnl_lock(); + u32_destroy_key(key->tp, key, true); + rtnl_unlock(); +} + static void u32_delete_key_freepf_rcu(struct rcu_head *rcu) { struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu); - u32_destroy_key(key->tp, key, true); + INIT_WORK(&key->work, u32_delete_key_freepf_work); + tcf_queue_work(&key->work); } static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key) @@ -419,6 +477,7 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key) RCU_INIT_POINTER(*kp, key->next); tcf_unbind_filter(tp, &key->res); + tcf_exts_get_net(&key->exts); call_rcu(&key->rcu, u32_delete_key_freepf_rcu); return 0; } @@ -428,111 +487,95 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key) return 0; } -static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle) +static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) { - struct net_device *dev = tp->q->dev_queue->dev; - struct tc_cls_u32_offload u32_offload = {0}; - struct tc_to_netdev offload; - - offload.type = TC_SETUP_CLSU32; - offload.cls_u32 = &u32_offload; - - if (tc_should_offload(dev, tp, 0)) { - offload.cls_u32->command = TC_CLSU32_DELETE_KNODE; - offload.cls_u32->knode.handle = handle; - dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->chain->index, tp->protocol, - &offload); - } + struct tcf_block *block = tp->chain->block; + struct tc_cls_u32_offload cls_u32 = {}; + + tc_cls_common_offload_init(&cls_u32.common, tp); + cls_u32.command = TC_CLSU32_DELETE_HNODE; + cls_u32.hnode.divisor = h->divisor; + cls_u32.hnode.handle = h->handle; + cls_u32.hnode.prio = h->prio; + + tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false); } static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, u32 flags) { - struct net_device *dev = tp->q->dev_queue->dev; - struct tc_cls_u32_offload u32_offload = {0}; - struct tc_to_netdev offload; + struct tcf_block *block = tp->chain->block; + struct tc_cls_u32_offload cls_u32 = {}; + bool skip_sw = tc_skip_sw(flags); + bool offloaded = false; int err; - if (!tc_should_offload(dev, tp, flags)) - return tc_skip_sw(flags) ? -EINVAL : 0; - - offload.type = TC_SETUP_CLSU32; - offload.cls_u32 = &u32_offload; - - offload.cls_u32->command = TC_CLSU32_NEW_HNODE; - offload.cls_u32->hnode.divisor = h->divisor; - offload.cls_u32->hnode.handle = h->handle; - offload.cls_u32->hnode.prio = h->prio; + tc_cls_common_offload_init(&cls_u32.common, tp); + cls_u32.command = TC_CLSU32_NEW_HNODE; + cls_u32.hnode.divisor = h->divisor; + cls_u32.hnode.handle = h->handle; + cls_u32.hnode.prio = h->prio; - err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->chain->index, tp->protocol, - &offload); - if (tc_skip_sw(flags)) + err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw); + if (err < 0) { + u32_clear_hw_hnode(tp, h); return err; + } else if (err > 0) { + offloaded = true; + } + + if (skip_sw && !offloaded) + return -EINVAL; return 0; } -static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) +static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle) { - struct net_device *dev = tp->q->dev_queue->dev; - struct tc_cls_u32_offload u32_offload = {0}; - struct tc_to_netdev offload; - - offload.type = TC_SETUP_CLSU32; - offload.cls_u32 = &u32_offload; - - if (tc_should_offload(dev, tp, 0)) { - offload.cls_u32->command = TC_CLSU32_DELETE_HNODE; - offload.cls_u32->hnode.divisor = h->divisor; - offload.cls_u32->hnode.handle = h->handle; - offload.cls_u32->hnode.prio = h->prio; - - dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->chain->index, tp->protocol, - &offload); - } + struct tcf_block *block = tp->chain->block; + struct tc_cls_u32_offload cls_u32 = {}; + + tc_cls_common_offload_init(&cls_u32.common, tp); + cls_u32.command = TC_CLSU32_DELETE_KNODE; + cls_u32.knode.handle = handle; + + tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false); } static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, u32 flags) { - struct net_device *dev = tp->q->dev_queue->dev; - struct tc_cls_u32_offload u32_offload = {0}; - struct tc_to_netdev offload; + struct tcf_block *block = tp->chain->block; + struct tc_cls_u32_offload cls_u32 = {}; + bool skip_sw = tc_skip_sw(flags); int err; - offload.type = TC_SETUP_CLSU32; - offload.cls_u32 = &u32_offload; - - if (!tc_should_offload(dev, tp, flags)) - return tc_skip_sw(flags) ? -EINVAL : 0; - - offload.cls_u32->command = TC_CLSU32_REPLACE_KNODE; - offload.cls_u32->knode.handle = n->handle; - offload.cls_u32->knode.fshift = n->fshift; + tc_cls_common_offload_init(&cls_u32.common, tp); + cls_u32.command = TC_CLSU32_REPLACE_KNODE; + cls_u32.knode.handle = n->handle; + cls_u32.knode.fshift = n->fshift; #ifdef CONFIG_CLS_U32_MARK - offload.cls_u32->knode.val = n->val; - offload.cls_u32->knode.mask = n->mask; + cls_u32.knode.val = n->val; + cls_u32.knode.mask = n->mask; #else - offload.cls_u32->knode.val = 0; - offload.cls_u32->knode.mask = 0; + cls_u32.knode.val = 0; + cls_u32.knode.mask = 0; #endif - offload.cls_u32->knode.sel = &n->sel; - offload.cls_u32->knode.exts = &n->exts; + cls_u32.knode.sel = &n->sel; + cls_u32.knode.exts = &n->exts; if (n->ht_down) - offload.cls_u32->knode.link_handle = n->ht_down->handle; - - err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->chain->index, tp->protocol, - &offload); + cls_u32.knode.link_handle = n->ht_down->handle; - if (!err) + err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw); + if (err < 0) { + u32_remove_hw_knode(tp, n->handle); + return err; + } else if (err > 0) { n->flags |= TCA_CLS_FLAGS_IN_HW; + } - if (tc_skip_sw(flags)) - return err; + if (skip_sw && !(n->flags & TCA_CLS_FLAGS_IN_HW)) + return -EINVAL; return 0; } @@ -548,7 +591,11 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) rtnl_dereference(n->next)); tcf_unbind_filter(tp, &n->res); u32_remove_hw_knode(tp, n->handle); - call_rcu(&n->rcu, u32_delete_key_freepf_rcu); + idr_remove_ext(&ht->handle_idr, n->handle); + if (tcf_exts_get_net(&n->exts)) + call_rcu(&n->rcu, u32_delete_key_freepf_rcu); + else + u32_destroy_key(n->tp, n, true); } } } @@ -569,6 +616,8 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) hn = &phn->next, phn = rtnl_dereference(*hn)) { if (phn == ht) { u32_clear_hw_hnode(tp, ht); + idr_destroy(&ht->handle_idr); + idr_remove_ext(&tp_c->handle_idr, ht->handle); RCU_INIT_POINTER(*hn, ht->next); kfree_rcu(ht, rcu); return 0; @@ -602,7 +651,7 @@ static void u32_destroy(struct tcf_proto *tp) if (--tp_c->refcnt == 0) { struct tc_u_hnode *ht; - tp->q->u32_node = NULL; + hlist_del(&tp_c->hnode); for (ht = rtnl_dereference(tp_c->hlist); ht; @@ -616,15 +665,16 @@ static void u32_destroy(struct tcf_proto *tp) kfree_rcu(ht, rcu); } + idr_destroy(&tp_c->handle_idr); kfree(tp_c); } tp->data = NULL; } -static int u32_delete(struct tcf_proto *tp, unsigned long arg, bool *last) +static int u32_delete(struct tcf_proto *tp, void *arg, bool *last) { - struct tc_u_hnode *ht = (struct tc_u_hnode *)arg; + struct tc_u_hnode *ht = arg; struct tc_u_hnode *root_ht = rtnl_dereference(tp->root); struct tc_u_common *tp_c = tp->data; int ret = 0; @@ -684,27 +734,21 @@ ret: return ret; } -#define NR_U32_NODE (1<<12) -static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle) +static u32 gen_new_kid(struct tc_u_hnode *ht, u32 htid) { - struct tc_u_knode *n; - unsigned long i; - unsigned long *bitmap = kzalloc(BITS_TO_LONGS(NR_U32_NODE) * sizeof(unsigned long), - GFP_KERNEL); - if (!bitmap) - return handle | 0xFFF; - - for (n = rtnl_dereference(ht->ht[TC_U32_HASH(handle)]); - n; - n = rtnl_dereference(n->next)) - set_bit(TC_U32_NODE(n->handle), bitmap); - - i = find_next_zero_bit(bitmap, NR_U32_NODE, 0x800); - if (i >= NR_U32_NODE) - i = find_next_zero_bit(bitmap, NR_U32_NODE, 1); + unsigned long idr_index; + u32 start = htid | 0x800; + u32 max = htid | 0xFFF; + u32 min = htid; + + if (idr_alloc_ext(&ht->handle_idr, NULL, &idr_index, + start, max + 1, GFP_KERNEL)) { + if (idr_alloc_ext(&ht->handle_idr, NULL, &idr_index, + min + 1, max + 1, GFP_KERNEL)) + return max; + } - kfree(bitmap); - return handle | (i >= NR_U32_NODE ? 0xFFF : i); + return (u32)idr_index; } static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = { @@ -723,29 +767,24 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp, struct tc_u_knode *n, struct nlattr **tb, struct nlattr *est, bool ovr) { - struct tcf_exts e; int err; - err = tcf_exts_init(&e, TCA_U32_ACT, TCA_U32_POLICE); + err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr); if (err < 0) return err; - err = tcf_exts_validate(net, tp, tb, est, &e, ovr); - if (err < 0) - goto errout; - err = -EINVAL; if (tb[TCA_U32_LINK]) { u32 handle = nla_get_u32(tb[TCA_U32_LINK]); struct tc_u_hnode *ht_down = NULL, *ht_old; if (TC_U32_KEY(handle)) - goto errout; + return -EINVAL; if (handle) { ht_down = u32_lookup_ht(ht->tp_c, handle); if (ht_down == NULL) - goto errout; + return -EINVAL; ht_down->refcnt++; } @@ -765,16 +804,11 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp, int ret; ret = tcf_change_indev(net, tb[TCA_U32_INDEV]); if (ret < 0) - goto errout; + return -EINVAL; n->ifindex = ret; } #endif - tcf_exts_change(tp, &n->exts, &e); - return 0; -errout: - tcf_exts_destroy(&e); - return err; } static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c, @@ -799,6 +833,7 @@ static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c, if (pins->handle == n->handle) break; + idr_replace_ext(&ht->handle_idr, n, n->handle); RCU_INIT_POINTER(n->next, pins->next); rcu_assign_pointer(*ins, n); } @@ -858,7 +893,7 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp, static int u32_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, unsigned long *arg, bool ovr) + struct nlattr **tca, void **arg, bool ovr) { struct tc_u_common *tp_c = tp->data; struct tc_u_hnode *ht; @@ -885,7 +920,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, return -EINVAL; } - n = (struct tc_u_knode *)*arg; + n = *arg; if (n) { struct tc_u_knode *new; @@ -919,6 +954,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, u32_replace_knode(tp, tp_c, new); tcf_unbind_filter(tp, &n->res); + tcf_exts_get_net(&n->exts); call_rcu(&n->rcu, u32_delete_key_rcu); return 0; } @@ -930,29 +966,40 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, return -EINVAL; if (TC_U32_KEY(handle)) return -EINVAL; - if (handle == 0) { - handle = gen_new_htid(tp->data); - if (handle == 0) - return -ENOMEM; - } ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL); if (ht == NULL) return -ENOBUFS; + if (handle == 0) { + handle = gen_new_htid(tp->data, ht); + if (handle == 0) { + kfree(ht); + return -ENOMEM; + } + } else { + err = idr_alloc_ext(&tp_c->handle_idr, ht, NULL, + handle, handle + 1, GFP_KERNEL); + if (err) { + kfree(ht); + return err; + } + } ht->tp_c = tp_c; ht->refcnt = 1; ht->divisor = divisor; ht->handle = handle; ht->prio = tp->prio; + idr_init(&ht->handle_idr); err = u32_replace_hw_hnode(tp, ht, flags); if (err) { + idr_remove_ext(&tp_c->handle_idr, handle); kfree(ht); return err; } RCU_INIT_POINTER(ht->next, tp_c->hlist); rcu_assign_pointer(tp_c->hlist, ht); - *arg = (unsigned long)ht; + *arg = ht; return 0; } @@ -979,24 +1026,33 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, if (TC_U32_HTID(handle) && TC_U32_HTID(handle^htid)) return -EINVAL; handle = htid | TC_U32_NODE(handle); + err = idr_alloc_ext(&ht->handle_idr, NULL, NULL, + handle, handle + 1, + GFP_KERNEL); + if (err) + return err; } else handle = gen_new_kid(ht, htid); - if (tb[TCA_U32_SEL] == NULL) - return -EINVAL; + if (tb[TCA_U32_SEL] == NULL) { + err = -EINVAL; + goto erridr; + } s = nla_data(tb[TCA_U32_SEL]); n = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL); - if (n == NULL) - return -ENOBUFS; + if (n == NULL) { + err = -ENOBUFS; + goto erridr; + } #ifdef CONFIG_CLS_U32_PERF size = sizeof(struct tc_u32_pcnt) + s->nkeys * sizeof(u64); n->pf = __alloc_percpu(size, __alignof__(struct tc_u32_pcnt)); if (!n->pf) { - kfree(n); - return -ENOBUFS; + err = -ENOBUFS; + goto errfree; } #endif @@ -1047,7 +1103,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, RCU_INIT_POINTER(n->next, pins); rcu_assign_pointer(*ins, n); - *arg = (unsigned long)n; + *arg = n; return 0; } @@ -1059,9 +1115,12 @@ errhw: errout: tcf_exts_destroy(&n->exts); #ifdef CONFIG_CLS_U32_PERF +errfree: free_percpu(n->pf); #endif kfree(n); +erridr: + idr_remove_ext(&ht->handle_idr, handle); return err; } @@ -1081,7 +1140,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg) if (ht->prio != tp->prio) continue; if (arg->count >= arg->skip) { - if (arg->fn(tp, (unsigned long)ht, arg) < 0) { + if (arg->fn(tp, ht, arg) < 0) { arg->stop = 1; return; } @@ -1095,7 +1154,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg) arg->count++; continue; } - if (arg->fn(tp, (unsigned long)n, arg) < 0) { + if (arg->fn(tp, n, arg) < 0) { arg->stop = 1; return; } @@ -1105,10 +1164,18 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg) } } -static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, +static void u32_bind_class(void *fh, u32 classid, unsigned long cl) +{ + struct tc_u_knode *n = fh; + + if (n && n->res.classid == classid) + n->res.class = cl; +} + +static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t) { - struct tc_u_knode *n = (struct tc_u_knode *)fh; + struct tc_u_knode *n = fh; struct tc_u_hnode *ht_up, *ht_down; struct nlattr *nest; @@ -1122,7 +1189,7 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, goto nla_put_failure; if (TC_U32_KEY(n->handle) == 0) { - struct tc_u_hnode *ht = (struct tc_u_hnode *)fh; + struct tc_u_hnode *ht = fh; u32 divisor = ht->divisor + 1; if (nla_put_u32(skb, TCA_U32_DIVISOR, divisor)) @@ -1235,11 +1302,14 @@ static struct tcf_proto_ops cls_u32_ops __read_mostly = { .delete = u32_delete, .walk = u32_walk, .dump = u32_dump, + .bind_class = u32_bind_class, .owner = THIS_MODULE, }; static int __init init_u32(void) { + int i, ret; + pr_info("u32 classifier\n"); #ifdef CONFIG_CLS_U32_PERF pr_info(" Performance counters on\n"); @@ -1250,12 +1320,25 @@ static int __init init_u32(void) #ifdef CONFIG_NET_CLS_ACT pr_info(" Actions configured\n"); #endif - return register_tcf_proto_ops(&cls_u32_ops); + tc_u_common_hash = kvmalloc_array(U32_HASH_SIZE, + sizeof(struct hlist_head), + GFP_KERNEL); + if (!tc_u_common_hash) + return -ENOMEM; + + for (i = 0; i < U32_HASH_SIZE; i++) + INIT_HLIST_HEAD(&tc_u_common_hash[i]); + + ret = register_tcf_proto_ops(&cls_u32_ops); + if (ret) + kvfree(tc_u_common_hash); + return ret; } static void __exit exit_u32(void) { unregister_tcf_proto_ops(&cls_u32_ops); + kvfree(tc_u_common_hash); } module_init(init_u32) diff --git a/net/sched/ematch.c b/net/sched/ematch.c index 03b677bc0700..1331a4c2d8ff 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -178,7 +178,7 @@ static int tcf_em_validate(struct tcf_proto *tp, struct tcf_ematch_hdr *em_hdr = nla_data(nla); int data_len = nla_len(nla) - sizeof(*em_hdr); void *data = (void *) em_hdr + sizeof(*em_hdr); - struct net *net = dev_net(qdisc_dev(tp->q)); + struct net *net = tp->chain->block->net; if (!TCF_EM_REL_VALID(em_hdr->flags)) goto errout; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index bd24a550e0f9..b6c4f536876b 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -35,13 +35,7 @@ #include <net/sock.h> #include <net/netlink.h> #include <net/pkt_sched.h> - -static int qdisc_notify(struct net *net, struct sk_buff *oskb, - struct nlmsghdr *n, u32 clid, - struct Qdisc *old, struct Qdisc *new); -static int tclass_notify(struct net *net, struct sk_buff *oskb, - struct nlmsghdr *n, struct Qdisc *q, - unsigned long cl, int event); +#include <net/pkt_cls.h> /* @@ -160,7 +154,7 @@ int register_qdisc(struct Qdisc_ops *qops) if (qops->cl_ops) { const struct Qdisc_class_ops *cops = qops->cl_ops; - if (!(cops->get && cops->put && cops->walk && cops->leaf)) + if (!(cops->find && cops->walk && cops->leaf)) goto out_einval; if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf)) @@ -286,9 +280,6 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) void qdisc_hash_add(struct Qdisc *q, bool invisible) { if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { - struct Qdisc *root = qdisc_dev(q)->qdisc; - - WARN_ON_ONCE(root == &noop_qdisc); ASSERT_RTNL(); hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle); if (invisible) @@ -310,6 +301,8 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) { struct Qdisc *q; + if (!handle) + return NULL; q = qdisc_match_from_root(dev->qdisc, handle); if (q) goto out; @@ -330,12 +323,11 @@ static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid) if (cops == NULL) return NULL; - cl = cops->get(p, classid); + cl = cops->find(p, classid); if (cl == 0) return NULL; leaf = cops->leaf(p, cl); - cops->put(p, cl); return leaf; } @@ -624,14 +616,10 @@ EXPORT_SYMBOL(qdisc_watchdog_cancel); static struct hlist_head *qdisc_class_hash_alloc(unsigned int n) { - unsigned int size = n * sizeof(struct hlist_head), i; struct hlist_head *h; + unsigned int i; - if (size <= PAGE_SIZE) - h = kmalloc(size, GFP_KERNEL); - else - h = (struct hlist_head *) - __get_free_pages(GFP_KERNEL, get_order(size)); + h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL); if (h != NULL) { for (i = 0; i < n; i++) @@ -640,16 +628,6 @@ static struct hlist_head *qdisc_class_hash_alloc(unsigned int n) return h; } -static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n) -{ - unsigned int size = n * sizeof(struct hlist_head); - - if (size <= PAGE_SIZE) - kfree(h); - else - free_pages((unsigned long)h, get_order(size)); -} - void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash) { struct Qdisc_class_common *cl; @@ -682,7 +660,7 @@ void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash) clhash->hashmask = nmask; sch_tree_unlock(sch); - qdisc_class_hash_free(ohash, osize); + kvfree(ohash); } EXPORT_SYMBOL(qdisc_class_hash_grow); @@ -702,7 +680,7 @@ EXPORT_SYMBOL(qdisc_class_hash_init); void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash) { - qdisc_class_hash_free(clhash->hash, clhash->hashsize); + kvfree(clhash->hash); } EXPORT_SYMBOL(qdisc_class_hash_destroy); @@ -752,6 +730,7 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n, const struct Qdisc_class_ops *cops; unsigned long cl; u32 parentid; + bool notify; int drops; if (n == 0 && len == 0) @@ -764,6 +743,13 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n, if (sch->flags & TCQ_F_NOPARENT) break; + /* Notify parent qdisc only if child qdisc becomes empty. + * + * If child was empty even before update then backlog + * counter is screwed and we skip notification because + * parent class is already passive. + */ + notify = !sch->q.qlen && !WARN_ON_ONCE(!n); /* TODO: perform the search on a per txq basis */ sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid)); if (sch == NULL) { @@ -771,10 +757,9 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n, break; } cops = sch->ops->cl_ops; - if (cops->qlen_notify) { - cl = cops->get(sch, parentid); + if (notify && cops->qlen_notify) { + cl = cops->find(sch, parentid); cops->qlen_notify(sch, cl); - cops->put(sch, cl); } sch->q.qlen -= n; sch->qstats.backlog -= len; @@ -784,6 +769,111 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n, } EXPORT_SYMBOL(qdisc_tree_reduce_backlog); +static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, + u32 portid, u32 seq, u16 flags, int event) +{ + struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL; + struct gnet_stats_queue __percpu *cpu_qstats = NULL; + struct tcmsg *tcm; + struct nlmsghdr *nlh; + unsigned char *b = skb_tail_pointer(skb); + struct gnet_dump d; + struct qdisc_size_table *stab; + __u32 qlen; + + cond_resched(); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); + if (!nlh) + goto out_nlmsg_trim; + tcm = nlmsg_data(nlh); + tcm->tcm_family = AF_UNSPEC; + tcm->tcm__pad1 = 0; + tcm->tcm__pad2 = 0; + tcm->tcm_ifindex = qdisc_dev(q)->ifindex; + tcm->tcm_parent = clid; + tcm->tcm_handle = q->handle; + tcm->tcm_info = refcount_read(&q->refcnt); + if (nla_put_string(skb, TCA_KIND, q->ops->id)) + goto nla_put_failure; + if (q->ops->dump && q->ops->dump(q, skb) < 0) + goto nla_put_failure; + qlen = q->q.qlen; + + stab = rtnl_dereference(q->stab); + if (stab && qdisc_dump_stab(skb, stab) < 0) + goto nla_put_failure; + + if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, + NULL, &d, TCA_PAD) < 0) + goto nla_put_failure; + + if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0) + goto nla_put_failure; + + if (qdisc_is_percpu_stats(q)) { + cpu_bstats = q->cpu_bstats; + cpu_qstats = q->cpu_qstats; + } + + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q), + &d, cpu_bstats, &q->bstats) < 0 || + gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 || + gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0) + goto nla_put_failure; + + if (gnet_stats_finish_copy(&d) < 0) + goto nla_put_failure; + + nlh->nlmsg_len = skb_tail_pointer(skb) - b; + return skb->len; + +out_nlmsg_trim: +nla_put_failure: + nlmsg_trim(skb, b); + return -1; +} + +static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible) +{ + if (q->flags & TCQ_F_BUILTIN) + return true; + if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible) + return true; + + return false; +} + +static int qdisc_notify(struct net *net, struct sk_buff *oskb, + struct nlmsghdr *n, u32 clid, + struct Qdisc *old, struct Qdisc *new) +{ + struct sk_buff *skb; + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + if (old && !tc_qdisc_dump_ignore(old, false)) { + if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq, + 0, RTM_DELQDISC) < 0) + goto err_out; + } + if (new && !tc_qdisc_dump_ignore(new, false)) { + if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq, + old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0) + goto err_out; + } + + if (skb->len) + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); + +err_out: + kfree_skb(skb); + return -EINVAL; +} + static void notify_and_destroy(struct net *net, struct sk_buff *skb, struct nlmsghdr *n, u32 clid, struct Qdisc *old, struct Qdisc *new) @@ -839,7 +929,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, old = dev_graft_qdisc(dev_queue, new); if (new && i > 0) - refcount_inc(&new->refcnt); + qdisc_refcount_inc(new); if (!ingress) qdisc_destroy(old); @@ -850,7 +940,7 @@ skip: notify_and_destroy(net, skb, n, classid, dev->qdisc, new); if (new && !new->ops->attach) - refcount_inc(&new->refcnt); + qdisc_refcount_inc(new); dev->qdisc = new ? : &noop_qdisc; if (new && new->ops->attach) @@ -866,11 +956,11 @@ skip: err = -EOPNOTSUPP; if (cops && cops->graft) { - unsigned long cl = cops->get(parent, classid); - if (cl) { + unsigned long cl = cops->find(parent, classid); + + if (cl) err = cops->graft(parent, cl, new, &old); - cops->put(parent, cl); - } else + else err = -ENOENT; } if (!err) @@ -1259,7 +1349,7 @@ replay: if (q == p || (p && check_loop(q, p, 0))) return -ELOOP; - refcount_inc(&q->refcnt); + qdisc_refcount_inc(q); goto graft; } else { if (!q) @@ -1351,111 +1441,6 @@ graft: return 0; } -static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, - u32 portid, u32 seq, u16 flags, int event) -{ - struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL; - struct gnet_stats_queue __percpu *cpu_qstats = NULL; - struct tcmsg *tcm; - struct nlmsghdr *nlh; - unsigned char *b = skb_tail_pointer(skb); - struct gnet_dump d; - struct qdisc_size_table *stab; - __u32 qlen; - - cond_resched(); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); - if (!nlh) - goto out_nlmsg_trim; - tcm = nlmsg_data(nlh); - tcm->tcm_family = AF_UNSPEC; - tcm->tcm__pad1 = 0; - tcm->tcm__pad2 = 0; - tcm->tcm_ifindex = qdisc_dev(q)->ifindex; - tcm->tcm_parent = clid; - tcm->tcm_handle = q->handle; - tcm->tcm_info = refcount_read(&q->refcnt); - if (nla_put_string(skb, TCA_KIND, q->ops->id)) - goto nla_put_failure; - if (q->ops->dump && q->ops->dump(q, skb) < 0) - goto nla_put_failure; - qlen = q->q.qlen; - - stab = rtnl_dereference(q->stab); - if (stab && qdisc_dump_stab(skb, stab) < 0) - goto nla_put_failure; - - if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, - NULL, &d, TCA_PAD) < 0) - goto nla_put_failure; - - if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0) - goto nla_put_failure; - - if (qdisc_is_percpu_stats(q)) { - cpu_bstats = q->cpu_bstats; - cpu_qstats = q->cpu_qstats; - } - - if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q), - &d, cpu_bstats, &q->bstats) < 0 || - gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 || - gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0) - goto nla_put_failure; - - if (gnet_stats_finish_copy(&d) < 0) - goto nla_put_failure; - - nlh->nlmsg_len = skb_tail_pointer(skb) - b; - return skb->len; - -out_nlmsg_trim: -nla_put_failure: - nlmsg_trim(skb, b); - return -1; -} - -static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible) -{ - if (q->flags & TCQ_F_BUILTIN) - return true; - if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible) - return true; - - return false; -} - -static int qdisc_notify(struct net *net, struct sk_buff *oskb, - struct nlmsghdr *n, u32 clid, - struct Qdisc *old, struct Qdisc *new) -{ - struct sk_buff *skb; - u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; - - skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); - if (!skb) - return -ENOBUFS; - - if (old && !tc_qdisc_dump_ignore(old, false)) { - if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq, - 0, RTM_DELQDISC) < 0) - goto err_out; - } - if (new && !tc_qdisc_dump_ignore(new, false)) { - if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq, - old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0) - goto err_out; - } - - if (skb->len) - return rtnetlink_send(skb, net, portid, RTNLGRP_TC, - n->nlmsg_flags & NLM_F_ECHO); - -err_out: - kfree_skb(skb); - return -EINVAL; -} - static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, struct netlink_callback *cb, int *q_idx_p, int s_q_idx, bool recur, @@ -1517,7 +1502,6 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) int s_idx, s_q_idx; struct net_device *dev; const struct nlmsghdr *nlh = cb->nlh; - struct tcmsg *tcm = nlmsg_data(nlh); struct nlattr *tca[TCA_MAX + 1]; int err; @@ -1527,7 +1511,7 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) idx = 0; ASSERT_RTNL(); - err = nlmsg_parse(nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL); + err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX, NULL, NULL); if (err < 0) return err; @@ -1568,7 +1552,163 @@ done: * Traffic classes manipulation. * ************************************************/ +static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, + unsigned long cl, + u32 portid, u32 seq, u16 flags, int event) +{ + struct tcmsg *tcm; + struct nlmsghdr *nlh; + unsigned char *b = skb_tail_pointer(skb); + struct gnet_dump d; + const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops; + + cond_resched(); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); + if (!nlh) + goto out_nlmsg_trim; + tcm = nlmsg_data(nlh); + tcm->tcm_family = AF_UNSPEC; + tcm->tcm__pad1 = 0; + tcm->tcm__pad2 = 0; + tcm->tcm_ifindex = qdisc_dev(q)->ifindex; + tcm->tcm_parent = q->handle; + tcm->tcm_handle = q->handle; + tcm->tcm_info = 0; + if (nla_put_string(skb, TCA_KIND, q->ops->id)) + goto nla_put_failure; + if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0) + goto nla_put_failure; + + if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, + NULL, &d, TCA_PAD) < 0) + goto nla_put_failure; + + if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0) + goto nla_put_failure; + + if (gnet_stats_finish_copy(&d) < 0) + goto nla_put_failure; + + nlh->nlmsg_len = skb_tail_pointer(skb) - b; + return skb->len; + +out_nlmsg_trim: +nla_put_failure: + nlmsg_trim(skb, b); + return -1; +} + +static int tclass_notify(struct net *net, struct sk_buff *oskb, + struct nlmsghdr *n, struct Qdisc *q, + unsigned long cl, int event) +{ + struct sk_buff *skb; + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) { + kfree_skb(skb); + return -EINVAL; + } + + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); +} + +static int tclass_del_notify(struct net *net, + const struct Qdisc_class_ops *cops, + struct sk_buff *oskb, struct nlmsghdr *n, + struct Qdisc *q, unsigned long cl) +{ + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + struct sk_buff *skb; + int err = 0; + + if (!cops->delete) + return -EOPNOTSUPP; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, + RTM_DELTCLASS) < 0) { + kfree_skb(skb); + return -EINVAL; + } + + err = cops->delete(q, cl); + if (err) { + kfree_skb(skb); + return err; + } + + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); +} + +#ifdef CONFIG_NET_CLS + +struct tcf_bind_args { + struct tcf_walker w; + u32 classid; + unsigned long cl; +}; + +static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg) +{ + struct tcf_bind_args *a = (void *)arg; + + if (tp->ops->bind_class) { + struct Qdisc *q = tcf_block_q(tp->chain->block); + + sch_tree_lock(q); + tp->ops->bind_class(n, a->classid, a->cl); + sch_tree_unlock(q); + } + return 0; +} + +static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, + unsigned long new_cl) +{ + const struct Qdisc_class_ops *cops = q->ops->cl_ops; + struct tcf_block *block; + struct tcf_chain *chain; + unsigned long cl; + + cl = cops->find(q, portid); + if (!cl) + return; + block = cops->tcf_block(q, cl); + if (!block) + return; + list_for_each_entry(chain, &block->chain_list, list) { + struct tcf_proto *tp; + + for (tp = rtnl_dereference(chain->filter_chain); + tp; tp = rtnl_dereference(tp->next)) { + struct tcf_bind_args arg = {}; + + arg.w.fn = tcf_node_bind; + arg.classid = clid; + arg.cl = new_cl; + tp->ops->walk(tp, &arg.w); + } + } +} +#else + +static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, + unsigned long new_cl) +{ +} + +#endif static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack) @@ -1659,7 +1799,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, clid = TC_H_MAKE(qid, clid); if (clid) - cl = cops->get(q, clid); + cl = cops->find(q, clid); if (cl == 0) { err = -ENOENT; @@ -1674,12 +1814,9 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, goto out; break; case RTM_DELTCLASS: - err = -EOPNOTSUPP; - if (cops->delete) - err = cops->delete(q, cl); - if (err == 0) - tclass_notify(net, skb, n, q, cl, - RTM_DELTCLASS); + err = tclass_del_notify(net, cops, skb, n, q, cl); + /* Unbind the class with flilters with 0 */ + tc_bind_tclass(q, portid, clid, 0); goto out; case RTM_GETTCLASS: err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS); @@ -1694,83 +1831,16 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, err = -EOPNOTSUPP; if (cops->change) err = cops->change(q, clid, portid, tca, &new_cl); - if (err == 0) + if (err == 0) { tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS); - + /* We just create a new class, need to do reverse binding. */ + if (cl != new_cl) + tc_bind_tclass(q, portid, clid, new_cl); + } out: - if (cl) - cops->put(q, cl); - return err; } - -static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, - unsigned long cl, - u32 portid, u32 seq, u16 flags, int event) -{ - struct tcmsg *tcm; - struct nlmsghdr *nlh; - unsigned char *b = skb_tail_pointer(skb); - struct gnet_dump d; - const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops; - - cond_resched(); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); - if (!nlh) - goto out_nlmsg_trim; - tcm = nlmsg_data(nlh); - tcm->tcm_family = AF_UNSPEC; - tcm->tcm__pad1 = 0; - tcm->tcm__pad2 = 0; - tcm->tcm_ifindex = qdisc_dev(q)->ifindex; - tcm->tcm_parent = q->handle; - tcm->tcm_handle = q->handle; - tcm->tcm_info = 0; - if (nla_put_string(skb, TCA_KIND, q->ops->id)) - goto nla_put_failure; - if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0) - goto nla_put_failure; - - if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, - NULL, &d, TCA_PAD) < 0) - goto nla_put_failure; - - if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0) - goto nla_put_failure; - - if (gnet_stats_finish_copy(&d) < 0) - goto nla_put_failure; - - nlh->nlmsg_len = skb_tail_pointer(skb) - b; - return skb->len; - -out_nlmsg_trim: -nla_put_failure: - nlmsg_trim(skb, b); - return -1; -} - -static int tclass_notify(struct net *net, struct sk_buff *oskb, - struct nlmsghdr *n, struct Qdisc *q, - unsigned long cl, int event) -{ - struct sk_buff *skb; - u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; - - skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); - if (!skb) - return -ENOBUFS; - - if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) { - kfree_skb(skb); - return -EINVAL; - } - - return rtnetlink_send(skb, net, portid, RTNLGRP_TC, - n->nlmsg_flags & NLM_F_ECHO); -} - struct qdisc_dump_args { struct qdisc_walker w; struct sk_buff *skb; @@ -1952,14 +2022,14 @@ static int __init pktsched_init(void) register_qdisc(&mq_qdisc_ops); register_qdisc(&noqueue_qdisc_ops); - rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, NULL); - rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0); + rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0); rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc, - NULL); - rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, NULL); - rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, NULL); + 0); + rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0); + rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0); rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass, - NULL); + 0); return 0; } diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 572fe2584e48..2dbd249c0b2f 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -41,6 +41,7 @@ #define VCC2FLOW(vcc) ((struct atm_flow_data *) ((vcc)->user_back)) struct atm_flow_data { + struct Qdisc_class_common common; struct Qdisc *q; /* FIFO, TBF, etc. */ struct tcf_proto __rcu *filter_list; struct tcf_block *block; @@ -49,7 +50,6 @@ struct atm_flow_data { struct sk_buff *skb); /* chaining */ struct atm_qdisc_data *parent; /* parent qdisc */ struct socket *sock; /* for closing */ - u32 classid; /* x:y type ID */ int ref; /* reference count */ struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; @@ -75,7 +75,7 @@ static inline struct atm_flow_data *lookup_flow(struct Qdisc *sch, u32 classid) struct atm_flow_data *flow; list_for_each_entry(flow, &p->flows, list) { - if (flow->classid == classid) + if (flow->common.classid == classid) return flow; } return NULL; @@ -108,23 +108,29 @@ static struct Qdisc *atm_tc_leaf(struct Qdisc *sch, unsigned long cl) return flow ? flow->q : NULL; } -static unsigned long atm_tc_get(struct Qdisc *sch, u32 classid) +static unsigned long atm_tc_find(struct Qdisc *sch, u32 classid) { struct atm_qdisc_data *p __maybe_unused = qdisc_priv(sch); struct atm_flow_data *flow; - pr_debug("atm_tc_get(sch %p,[qdisc %p],classid %x)\n", sch, p, classid); + pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", __func__, sch, p, classid); flow = lookup_flow(sch, classid); - if (flow) - flow->ref++; - pr_debug("atm_tc_get: flow %p\n", flow); + pr_debug("%s: flow %p\n", __func__, flow); return (unsigned long)flow; } static unsigned long atm_tc_bind_filter(struct Qdisc *sch, unsigned long parent, u32 classid) { - return atm_tc_get(sch, classid); + struct atm_qdisc_data *p __maybe_unused = qdisc_priv(sch); + struct atm_flow_data *flow; + + pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", __func__, sch, p, classid); + flow = lookup_flow(sch, classid); + if (flow) + flow->ref++; + pr_debug("%s: flow %p\n", __func__, flow); + return (unsigned long)flow; } /* @@ -234,7 +240,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, excess = NULL; else { excess = (struct atm_flow_data *) - atm_tc_get(sch, nla_get_u32(tb[TCA_ATM_EXCESS])); + atm_tc_find(sch, nla_get_u32(tb[TCA_ATM_EXCESS])); if (!excess) return -ENOENT; } @@ -262,10 +268,9 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, for (i = 1; i < 0x8000; i++) { classid = TC_H_MAKE(sch->handle, 0x8000 | i); - cl = atm_tc_get(sch, classid); + cl = atm_tc_find(sch, classid); if (!cl) break; - atm_tc_put(sch, cl); } } pr_debug("atm_tc_change: new id %x\n", classid); @@ -276,7 +281,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, goto err_out; } - error = tcf_block_get(&flow->block, &flow->filter_list); + error = tcf_block_get(&flow->block, &flow->filter_list, sch); if (error) { kfree(flow); goto err_out; @@ -293,7 +298,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, flow->old_pop = flow->vcc->pop; flow->parent = p; flow->vcc->pop = sch_atm_pop; - flow->classid = classid; + flow->common.classid = classid; flow->ref = 1; flow->excess = excess; list_add(&flow->list, &p->link.list); @@ -305,8 +310,6 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, *arg = (unsigned long)flow; return 0; err_out: - if (excess) - atm_tc_put(sch, (unsigned long)excess); sockfd_put(sock); return error; } @@ -377,7 +380,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch, result = TC_ACT_OK; /* be nice to gcc */ flow = NULL; if (TC_H_MAJ(skb->priority) != sch->handle || - !(flow = (struct atm_flow_data *)atm_tc_get(sch, skb->priority))) { + !(flow = (struct atm_flow_data *)atm_tc_find(sch, skb->priority))) { struct tcf_proto *fl; list_for_each_entry(flow, &p->flows, list) { @@ -543,13 +546,13 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt) p->link.q = &noop_qdisc; pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q); - err = tcf_block_get(&p->link.block, &p->link.filter_list); + err = tcf_block_get(&p->link.block, &p->link.filter_list, sch); if (err) return err; p->link.vcc = NULL; p->link.sock = NULL; - p->link.classid = sch->handle; + p->link.common.classid = sch->handle; p->link.ref = 1; tasklet_init(&p->task, sch_atm_dequeue, (unsigned long)sch); return 0; @@ -572,8 +575,10 @@ static void atm_tc_destroy(struct Qdisc *sch) struct atm_flow_data *flow, *tmp; pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p); - list_for_each_entry(flow, &p->flows, list) + list_for_each_entry(flow, &p->flows, list) { tcf_block_put(flow->block); + flow->block = NULL; + } list_for_each_entry_safe(flow, tmp, &p->flows, list) { if (flow->ref > 1) @@ -594,7 +599,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl, sch, p, flow, skb, tcm); if (list_empty(&flow->list)) return -EINVAL; - tcm->tcm_handle = flow->classid; + tcm->tcm_handle = flow->common.classid; tcm->tcm_info = flow->q->handle; nest = nla_nest_start(skb, TCA_OPTIONS); @@ -619,7 +624,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl, goto nla_put_failure; } if (flow->excess) { - if (nla_put_u32(skb, TCA_ATM_EXCESS, flow->classid)) + if (nla_put_u32(skb, TCA_ATM_EXCESS, flow->common.classid)) goto nla_put_failure; } else { if (nla_put_u32(skb, TCA_ATM_EXCESS, 0)) @@ -653,8 +658,7 @@ static int atm_tc_dump(struct Qdisc *sch, struct sk_buff *skb) static const struct Qdisc_class_ops atm_class_ops = { .graft = atm_tc_graft, .leaf = atm_tc_leaf, - .get = atm_tc_get, - .put = atm_tc_put, + .find = atm_tc_find, .change = atm_tc_change, .delete = atm_tc_delete, .walk = atm_tc_walk, diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 481036f6b54e..6361be7881f1 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -129,7 +129,6 @@ struct cbq_class { struct tcf_proto __rcu *filter_list; struct tcf_block *block; - int refcnt; int filters; struct cbq_class *defaults[TC_PRIO_MAX + 1]; @@ -256,6 +255,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) case TC_ACT_STOLEN: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + /* fall through */ case TC_ACT_SHOT: return NULL; case TC_ACT_RECLASSIFY: @@ -1139,6 +1139,13 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) struct tc_ratespec *r; int err; + qdisc_watchdog_init(&q->watchdog, sch); + hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); + q->delay_timer.function = cbq_undelay; + + if (!opt) + return -EINVAL; + err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy, NULL); if (err < 0) return err; @@ -1155,7 +1162,6 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) if (err < 0) goto put_rtab; - q->link.refcnt = 1; q->link.sibling = &q->link; q->link.common.classid = sch->handle; q->link.qdisc = sch; @@ -1177,9 +1183,6 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) q->link.avpkt = q->link.allot/2; q->link.minidle = -0x7FFFFFFF; - qdisc_watchdog_init(&q->watchdog, sch); - hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); - q->delay_timer.function = cbq_undelay; q->toplevel = TC_CBQ_MAXLEVEL; q->now = psched_get_time(); @@ -1385,20 +1388,14 @@ static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg) { struct cbq_class *cl = (struct cbq_class *)arg; - if (cl->q->q.qlen == 0) - cbq_deactivate_class(cl); + cbq_deactivate_class(cl); } -static unsigned long cbq_get(struct Qdisc *sch, u32 classid) +static unsigned long cbq_find(struct Qdisc *sch, u32 classid) { struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl = cbq_class_lookup(q, classid); - if (cl) { - cl->refcnt++; - return (unsigned long)cl; - } - return 0; + return (unsigned long)cbq_class_lookup(q, classid); } static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl) @@ -1431,8 +1428,10 @@ static void cbq_destroy(struct Qdisc *sch) * be bound to classes which have been destroyed already. --TGR '04 */ for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) + hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) { tcf_block_put(cl->block); + cl->block = NULL; + } } for (h = 0; h < q->clhash.hashsize; h++) { hlist_for_each_entry_safe(cl, next, &q->clhash.hash[h], @@ -1442,25 +1441,6 @@ static void cbq_destroy(struct Qdisc *sch) qdisc_class_hash_destroy(&q->clhash); } -static void cbq_put(struct Qdisc *sch, unsigned long arg) -{ - struct cbq_class *cl = (struct cbq_class *)arg; - - if (--cl->refcnt == 0) { -#ifdef CONFIG_NET_CLS_ACT - spinlock_t *root_lock = qdisc_root_sleeping_lock(sch); - struct cbq_sched_data *q = qdisc_priv(sch); - - spin_lock_bh(root_lock); - if (q->rx_class == cl) - q->rx_class = NULL; - spin_unlock_bh(root_lock); -#endif - - cbq_destroy_class(sch, cl); - } -} - static int cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca, unsigned long *arg) @@ -1587,7 +1567,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t if (cl == NULL) goto failure; - err = tcf_block_get(&cl->block, &cl->filter_list); + err = tcf_block_get(&cl->block, &cl->filter_list, sch); if (err) { kfree(cl); return err; @@ -1607,7 +1587,6 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t cl->R_tab = rtab; rtab = NULL; - cl->refcnt = 1; cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid); if (!cl->q) cl->q = &noop_qdisc; @@ -1688,12 +1667,7 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg) cbq_rmprio(q, cl); sch_tree_unlock(sch); - BUG_ON(--cl->refcnt == 0); - /* - * This shouldn't happen: we "hold" one cops->get() when called - * from tc_ctl_tclass; the destroy method is done from cops->put(). - */ - + cbq_destroy_class(sch, cl); return 0; } @@ -1759,8 +1733,7 @@ static const struct Qdisc_class_ops cbq_class_ops = { .graft = cbq_graft, .leaf = cbq_leaf, .qlen_notify = cbq_qlen_notify, - .get = cbq_get, - .put = cbq_put, + .find = cbq_find, .change = cbq_change_class, .delete = cbq_delete, .walk = cbq_walk, diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c new file mode 100644 index 000000000000..7a72980c1509 --- /dev/null +++ b/net/sched/sch_cbs.c @@ -0,0 +1,373 @@ +/* + * net/sched/sch_cbs.c Credit Based Shaper + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Vinicius Costa Gomes <vinicius.gomes@intel.com> + * + */ + +/* Credit Based Shaper (CBS) + * ========================= + * + * This is a simple rate-limiting shaper aimed at TSN applications on + * systems with known traffic workloads. + * + * Its algorithm is defined by the IEEE 802.1Q-2014 Specification, + * Section 8.6.8.2, and explained in more detail in the Annex L of the + * same specification. + * + * There are four tunables to be considered: + * + * 'idleslope': Idleslope is the rate of credits that is + * accumulated (in kilobits per second) when there is at least + * one packet waiting for transmission. Packets are transmitted + * when the current value of credits is equal or greater than + * zero. When there is no packet to be transmitted the amount of + * credits is set to zero. This is the main tunable of the CBS + * algorithm. + * + * 'sendslope': + * Sendslope is the rate of credits that is depleted (it should be a + * negative number of kilobits per second) when a transmission is + * ocurring. It can be calculated as follows, (IEEE 802.1Q-2014 Section + * 8.6.8.2 item g): + * + * sendslope = idleslope - port_transmit_rate + * + * 'hicredit': Hicredit defines the maximum amount of credits (in + * bytes) that can be accumulated. Hicredit depends on the + * characteristics of interfering traffic, + * 'max_interference_size' is the maximum size of any burst of + * traffic that can delay the transmission of a frame that is + * available for transmission for this traffic class, (IEEE + * 802.1Q-2014 Annex L, Equation L-3): + * + * hicredit = max_interference_size * (idleslope / port_transmit_rate) + * + * 'locredit': Locredit is the minimum amount of credits that can + * be reached. It is a function of the traffic flowing through + * this qdisc (IEEE 802.1Q-2014 Annex L, Equation L-2): + * + * locredit = max_frame_size * (sendslope / port_transmit_rate) + */ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/errno.h> +#include <linux/skbuff.h> +#include <net/netlink.h> +#include <net/sch_generic.h> +#include <net/pkt_sched.h> + +#define BYTES_PER_KBIT (1000LL / 8) + +struct cbs_sched_data { + bool offload; + int queue; + s64 port_rate; /* in bytes/s */ + s64 last; /* timestamp in ns */ + s64 credits; /* in bytes */ + s32 locredit; /* in bytes */ + s32 hicredit; /* in bytes */ + s64 sendslope; /* in bytes/s */ + s64 idleslope; /* in bytes/s */ + struct qdisc_watchdog watchdog; + int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch); + struct sk_buff *(*dequeue)(struct Qdisc *sch); +}; + +static int cbs_enqueue_offload(struct sk_buff *skb, struct Qdisc *sch) +{ + return qdisc_enqueue_tail(skb, sch); +} + +static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch) +{ + struct cbs_sched_data *q = qdisc_priv(sch); + + if (sch->q.qlen == 0 && q->credits > 0) { + /* We need to stop accumulating credits when there's + * no enqueued packets and q->credits is positive. + */ + q->credits = 0; + q->last = ktime_get_ns(); + } + + return qdisc_enqueue_tail(skb, sch); +} + +static int cbs_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) +{ + struct cbs_sched_data *q = qdisc_priv(sch); + + return q->enqueue(skb, sch); +} + +/* timediff is in ns, slope is in bytes/s */ +static s64 timediff_to_credits(s64 timediff, s64 slope) +{ + return div64_s64(timediff * slope, NSEC_PER_SEC); +} + +static s64 delay_from_credits(s64 credits, s64 slope) +{ + if (unlikely(slope == 0)) + return S64_MAX; + + return div64_s64(-credits * NSEC_PER_SEC, slope); +} + +static s64 credits_from_len(unsigned int len, s64 slope, s64 port_rate) +{ + if (unlikely(port_rate == 0)) + return S64_MAX; + + return div64_s64(len * slope, port_rate); +} + +static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch) +{ + struct cbs_sched_data *q = qdisc_priv(sch); + s64 now = ktime_get_ns(); + struct sk_buff *skb; + s64 credits; + int len; + + if (q->credits < 0) { + credits = timediff_to_credits(now - q->last, q->idleslope); + + credits = q->credits + credits; + q->credits = min_t(s64, credits, q->hicredit); + + if (q->credits < 0) { + s64 delay; + + delay = delay_from_credits(q->credits, q->idleslope); + qdisc_watchdog_schedule_ns(&q->watchdog, now + delay); + + q->last = now; + + return NULL; + } + } + + skb = qdisc_dequeue_head(sch); + if (!skb) + return NULL; + + len = qdisc_pkt_len(skb); + + /* As sendslope is a negative number, this will decrease the + * amount of q->credits. + */ + credits = credits_from_len(len, q->sendslope, q->port_rate); + credits += q->credits; + + q->credits = max_t(s64, credits, q->locredit); + q->last = now; + + return skb; +} + +static struct sk_buff *cbs_dequeue_offload(struct Qdisc *sch) +{ + return qdisc_dequeue_head(sch); +} + +static struct sk_buff *cbs_dequeue(struct Qdisc *sch) +{ + struct cbs_sched_data *q = qdisc_priv(sch); + + return q->dequeue(sch); +} + +static const struct nla_policy cbs_policy[TCA_CBS_MAX + 1] = { + [TCA_CBS_PARMS] = { .len = sizeof(struct tc_cbs_qopt) }, +}; + +static void cbs_disable_offload(struct net_device *dev, + struct cbs_sched_data *q) +{ + struct tc_cbs_qopt_offload cbs = { }; + const struct net_device_ops *ops; + int err; + + if (!q->offload) + return; + + q->enqueue = cbs_enqueue_soft; + q->dequeue = cbs_dequeue_soft; + + ops = dev->netdev_ops; + if (!ops->ndo_setup_tc) + return; + + cbs.queue = q->queue; + cbs.enable = 0; + + err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_CBS, &cbs); + if (err < 0) + pr_warn("Couldn't disable CBS offload for queue %d\n", + cbs.queue); +} + +static int cbs_enable_offload(struct net_device *dev, struct cbs_sched_data *q, + const struct tc_cbs_qopt *opt) +{ + const struct net_device_ops *ops = dev->netdev_ops; + struct tc_cbs_qopt_offload cbs = { }; + int err; + + if (!ops->ndo_setup_tc) + return -EOPNOTSUPP; + + cbs.queue = q->queue; + + cbs.enable = 1; + cbs.hicredit = opt->hicredit; + cbs.locredit = opt->locredit; + cbs.idleslope = opt->idleslope; + cbs.sendslope = opt->sendslope; + + err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_CBS, &cbs); + if (err < 0) + return err; + + q->enqueue = cbs_enqueue_offload; + q->dequeue = cbs_dequeue_offload; + + return 0; +} + +static int cbs_change(struct Qdisc *sch, struct nlattr *opt) +{ + struct cbs_sched_data *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + struct nlattr *tb[TCA_CBS_MAX + 1]; + struct tc_cbs_qopt *qopt; + int err; + + err = nla_parse_nested(tb, TCA_CBS_MAX, opt, cbs_policy, NULL); + if (err < 0) + return err; + + if (!tb[TCA_CBS_PARMS]) + return -EINVAL; + + qopt = nla_data(tb[TCA_CBS_PARMS]); + + if (!qopt->offload) { + struct ethtool_link_ksettings ecmd; + s64 link_speed; + + if (!__ethtool_get_link_ksettings(dev, &ecmd)) + link_speed = ecmd.base.speed; + else + link_speed = SPEED_1000; + + q->port_rate = link_speed * 1000 * BYTES_PER_KBIT; + + cbs_disable_offload(dev, q); + } else { + err = cbs_enable_offload(dev, q, qopt); + if (err < 0) + return err; + } + + /* Everything went OK, save the parameters used. */ + q->hicredit = qopt->hicredit; + q->locredit = qopt->locredit; + q->idleslope = qopt->idleslope * BYTES_PER_KBIT; + q->sendslope = qopt->sendslope * BYTES_PER_KBIT; + q->offload = qopt->offload; + + return 0; +} + +static int cbs_init(struct Qdisc *sch, struct nlattr *opt) +{ + struct cbs_sched_data *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + + if (!opt) + return -EINVAL; + + q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0); + + q->enqueue = cbs_enqueue_soft; + q->dequeue = cbs_dequeue_soft; + + qdisc_watchdog_init(&q->watchdog, sch); + + return cbs_change(sch, opt); +} + +static void cbs_destroy(struct Qdisc *sch) +{ + struct cbs_sched_data *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + + qdisc_watchdog_cancel(&q->watchdog); + + cbs_disable_offload(dev, q); +} + +static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb) +{ + struct cbs_sched_data *q = qdisc_priv(sch); + struct tc_cbs_qopt opt = { }; + struct nlattr *nest; + + nest = nla_nest_start(skb, TCA_OPTIONS); + if (!nest) + goto nla_put_failure; + + opt.hicredit = q->hicredit; + opt.locredit = q->locredit; + opt.sendslope = div64_s64(q->sendslope, BYTES_PER_KBIT); + opt.idleslope = div64_s64(q->idleslope, BYTES_PER_KBIT); + opt.offload = q->offload; + + if (nla_put(skb, TCA_CBS_PARMS, sizeof(opt), &opt)) + goto nla_put_failure; + + return nla_nest_end(skb, nest); + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -1; +} + +static struct Qdisc_ops cbs_qdisc_ops __read_mostly = { + .id = "cbs", + .priv_size = sizeof(struct cbs_sched_data), + .enqueue = cbs_enqueue, + .dequeue = cbs_dequeue, + .peek = qdisc_peek_dequeued, + .init = cbs_init, + .reset = qdisc_reset_queue, + .destroy = cbs_destroy, + .change = cbs_change, + .dump = cbs_dump, + .owner = THIS_MODULE, +}; + +static int __init cbs_module_init(void) +{ + return register_qdisc(&cbs_qdisc_ops); +} + +static void __exit cbs_module_exit(void) +{ + unregister_qdisc(&cbs_qdisc_ops); +} +module_init(cbs_module_init) +module_exit(cbs_module_exit) +MODULE_LICENSE("GPL"); diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index a413dc1c2098..5bbcef3dcd8c 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -20,7 +20,6 @@ struct drr_class { struct Qdisc_class_common common; - unsigned int refcnt; unsigned int filter_cnt; struct gnet_stats_basic_packed bstats; @@ -111,7 +110,6 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (cl == NULL) return -ENOBUFS; - cl->refcnt = 1; cl->common.classid = classid; cl->quantum = quantum; cl->qdisc = qdisc_create_dflt(sch->dev_queue, @@ -163,32 +161,15 @@ static int drr_delete_class(struct Qdisc *sch, unsigned long arg) drr_purge_queue(cl); qdisc_class_hash_remove(&q->clhash, &cl->common); - BUG_ON(--cl->refcnt == 0); - /* - * This shouldn't happen: we "hold" one cops->get() when called - * from tc_ctl_tclass; the destroy method is done from cops->put(). - */ - sch_tree_unlock(sch); - return 0; -} - -static unsigned long drr_get_class(struct Qdisc *sch, u32 classid) -{ - struct drr_class *cl = drr_find_class(sch, classid); - if (cl != NULL) - cl->refcnt++; - - return (unsigned long)cl; + drr_destroy_class(sch, cl); + return 0; } -static void drr_put_class(struct Qdisc *sch, unsigned long arg) +static unsigned long drr_search_class(struct Qdisc *sch, u32 classid) { - struct drr_class *cl = (struct drr_class *)arg; - - if (--cl->refcnt == 0) - drr_destroy_class(sch, cl); + return (unsigned long)drr_find_class(sch, classid); } static struct tcf_block *drr_tcf_block(struct Qdisc *sch, unsigned long cl) @@ -246,8 +227,7 @@ static void drr_qlen_notify(struct Qdisc *csh, unsigned long arg) { struct drr_class *cl = (struct drr_class *)arg; - if (cl->qdisc->q.qlen == 0) - list_del(&cl->alist); + list_del(&cl->alist); } static int drr_dump_class(struct Qdisc *sch, unsigned long arg, @@ -341,6 +321,7 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch, case TC_ACT_STOLEN: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + /* fall through */ case TC_ACT_SHOT: return NULL; } @@ -432,7 +413,7 @@ static int drr_init_qdisc(struct Qdisc *sch, struct nlattr *opt) struct drr_sched *q = qdisc_priv(sch); int err; - err = tcf_block_get(&q->block, &q->filter_list); + err = tcf_block_get(&q->block, &q->filter_list, sch); if (err) return err; err = qdisc_class_hash_init(&q->clhash); @@ -479,8 +460,7 @@ static void drr_destroy_qdisc(struct Qdisc *sch) static const struct Qdisc_class_ops drr_class_ops = { .change = drr_change_class, .delete = drr_delete_class, - .get = drr_get_class, - .put = drr_put_class, + .find = drr_search_class, .tcf_block = drr_tcf_block, .bind_tcf = drr_bind_tcf, .unbind_tcf = drr_unbind_tcf, diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 6d94fcc3592a..fb4fb71c68cf 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -85,21 +85,21 @@ static struct Qdisc *dsmark_leaf(struct Qdisc *sch, unsigned long arg) return p->q; } -static unsigned long dsmark_get(struct Qdisc *sch, u32 classid) +static unsigned long dsmark_find(struct Qdisc *sch, u32 classid) { - pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", - __func__, sch, qdisc_priv(sch), classid); - return TC_H_MIN(classid) + 1; } static unsigned long dsmark_bind_filter(struct Qdisc *sch, unsigned long parent, u32 classid) { - return dsmark_get(sch, classid); + pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", + __func__, sch, qdisc_priv(sch), classid); + + return dsmark_find(sch, classid); } -static void dsmark_put(struct Qdisc *sch, unsigned long cl) +static void dsmark_unbind_filter(struct Qdisc *sch, unsigned long cl) { } @@ -344,7 +344,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt) if (!opt) goto errout; - err = tcf_block_get(&p->block, &p->filter_list); + err = tcf_block_get(&p->block, &p->filter_list, sch); if (err) return err; @@ -469,14 +469,13 @@ nla_put_failure: static const struct Qdisc_class_ops dsmark_class_ops = { .graft = dsmark_graft, .leaf = dsmark_leaf, - .get = dsmark_get, - .put = dsmark_put, + .find = dsmark_find, .change = dsmark_change, .delete = dsmark_delete, .walk = dsmark_walk, .tcf_block = dsmark_tcf_block, .bind_tcf = dsmark_bind_filter, - .unbind_tcf = dsmark_put, + .unbind_tcf = dsmark_unbind_filter, .dump = dsmark_dump_class, }; diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 337f2d6d81e4..0305d791ea94 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -105,6 +105,7 @@ static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch, case TC_ACT_QUEUED: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + /* fall through */ case TC_ACT_SHOT: return 0; } @@ -481,7 +482,7 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt) return err; } - err = tcf_block_get(&q->block, &q->filter_list); + err = tcf_block_get(&q->block, &q->filter_list, sch); if (err) return err; @@ -491,10 +492,8 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt) if (!q->flows) return -ENOMEM; q->backlogs = kvzalloc(q->flows_cnt * sizeof(u32), GFP_KERNEL); - if (!q->backlogs) { - kvfree(q->flows); + if (!q->backlogs) return -ENOMEM; - } for (i = 0; i < q->flows_cnt; i++) { struct fq_codel_flow *flow = q->flows + i; @@ -579,7 +578,7 @@ static struct Qdisc *fq_codel_leaf(struct Qdisc *sch, unsigned long arg) return NULL; } -static unsigned long fq_codel_get(struct Qdisc *sch, u32 classid) +static unsigned long fq_codel_find(struct Qdisc *sch, u32 classid) { return 0; } @@ -592,7 +591,7 @@ static unsigned long fq_codel_bind(struct Qdisc *sch, unsigned long parent, return 0; } -static void fq_codel_put(struct Qdisc *q, unsigned long cl) +static void fq_codel_unbind(struct Qdisc *q, unsigned long cl) { } @@ -683,11 +682,10 @@ static void fq_codel_walk(struct Qdisc *sch, struct qdisc_walker *arg) static const struct Qdisc_class_ops fq_codel_class_ops = { .leaf = fq_codel_leaf, - .get = fq_codel_get, - .put = fq_codel_put, + .find = fq_codel_find, .tcf_block = fq_codel_tcf_block, .bind_tcf = fq_codel_bind, - .unbind_tcf = fq_codel_put, + .unbind_tcf = fq_codel_unbind, .dump = fq_codel_dump_class, .dump_stats = fq_codel_dump_class_stats, .walk = fq_codel_walk, diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 57ba406f1437..3839cbbdc32b 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -29,6 +29,7 @@ #include <net/sch_generic.h> #include <net/pkt_sched.h> #include <net/dst.h> +#include <trace/events/qdisc.h> /* Qdisc to use by default */ const struct Qdisc_ops *default_qdisc_ops = &pfifo_fast_ops; @@ -126,7 +127,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate, q->q.qlen--; } else skb = NULL; - return skb; + goto trace; } *validate = true; skb = q->skb_bad_txq; @@ -139,7 +140,8 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate, q->q.qlen--; goto bulk; } - return NULL; + skb = NULL; + goto trace; } if (!(q->flags & TCQ_F_ONETXQUEUE) || !netif_xmit_frozen_or_stopped(txq)) @@ -151,6 +153,8 @@ bulk: else try_bulk_dequeue_skb_slow(q, skb, packets); } +trace: + trace_qdisc_dequeue(q, txq, *packets, skb); return skb; } @@ -284,9 +288,9 @@ unsigned long dev_trans_start(struct net_device *dev) } EXPORT_SYMBOL(dev_trans_start); -static void dev_watchdog(unsigned long arg) +static void dev_watchdog(struct timer_list *t) { - struct net_device *dev = (struct net_device *)arg; + struct net_device *dev = from_timer(dev, t, watchdog_timer); netif_tx_lock(dev); if (!qdisc_tx_is_noop(dev)) { @@ -599,8 +603,14 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, struct Qdisc *sch; unsigned int size = QDISC_ALIGN(sizeof(*sch)) + ops->priv_size; int err = -ENOBUFS; - struct net_device *dev = dev_queue->dev; + struct net_device *dev; + if (!dev_queue) { + err = -EINVAL; + goto errout; + } + + dev = dev_queue->dev; p = kzalloc_node(size, GFP_KERNEL, netdev_queue_numa_node_read(dev_queue)); @@ -681,13 +691,12 @@ void qdisc_reset(struct Qdisc *qdisc) qdisc->gso_skb = NULL; } qdisc->q.qlen = 0; + qdisc->qstats.backlog = 0; } EXPORT_SYMBOL(qdisc_reset); -static void qdisc_rcu_free(struct rcu_head *head) +static void qdisc_free(struct Qdisc *qdisc) { - struct Qdisc *qdisc = container_of(head, struct Qdisc, rcu_head); - if (qdisc_is_percpu_stats(qdisc)) { free_percpu(qdisc->cpu_bstats); free_percpu(qdisc->cpu_qstats); @@ -720,11 +729,7 @@ void qdisc_destroy(struct Qdisc *qdisc) kfree_skb_list(qdisc->gso_skb); kfree_skb(qdisc->skb_bad_txq); - /* - * gen_estimator est_timer() might access qdisc->q.lock, - * wait a RCU grace period before freeing qdisc. - */ - call_rcu(&qdisc->rcu_head, qdisc_rcu_free); + qdisc_free(qdisc); } EXPORT_SYMBOL(qdisc_destroy); @@ -785,7 +790,7 @@ static void attach_default_qdiscs(struct net_device *dev) dev->priv_flags & IFF_NO_QUEUE) { netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL); dev->qdisc = txq->qdisc_sleeping; - refcount_inc(&dev->qdisc->refcnt); + qdisc_refcount_inc(dev->qdisc); } else { qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT); if (qdisc) { @@ -955,7 +960,7 @@ void dev_init_scheduler(struct net_device *dev) if (dev_ingress_queue(dev)) dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc); - setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev); + timer_setup(&dev->watchdog_timer, dev_watchdog, 0); } static void shutdown_scheduler_queue(struct net_device *dev, @@ -1019,3 +1024,49 @@ void psched_ratecfg_precompute(struct psched_ratecfg *r, } } EXPORT_SYMBOL(psched_ratecfg_precompute); + +static void mini_qdisc_rcu_func(struct rcu_head *head) +{ +} + +void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp, + struct tcf_proto *tp_head) +{ + struct mini_Qdisc *miniq_old = rtnl_dereference(*miniqp->p_miniq); + struct mini_Qdisc *miniq; + + if (!tp_head) { + RCU_INIT_POINTER(*miniqp->p_miniq, NULL); + return; + } + + miniq = !miniq_old || miniq_old == &miniqp->miniq2 ? + &miniqp->miniq1 : &miniqp->miniq2; + + /* We need to make sure that readers won't see the miniq + * we are about to modify. So wait until previous call_rcu_bh callback + * is done. + */ + rcu_barrier_bh(); + miniq->filter_list = tp_head; + rcu_assign_pointer(*miniqp->p_miniq, miniq); + + if (miniq_old) + /* This is counterpart of the rcu barrier above. We need to + * block potential new user of miniq_old until all readers + * are not seeing it. + */ + call_rcu_bh(&miniq_old->rcu, mini_qdisc_rcu_func); +} +EXPORT_SYMBOL(mini_qdisc_pair_swap); + +void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc, + struct mini_Qdisc __rcu **p_miniq) +{ + miniqp->miniq1.cpu_bstats = qdisc->cpu_bstats; + miniqp->miniq1.cpu_qstats = qdisc->cpu_qstats; + miniqp->miniq2.cpu_bstats = qdisc->cpu_bstats; + miniqp->miniq2.cpu_qstats = qdisc->cpu_qstats; + miniqp->p_miniq = p_miniq; +} +EXPORT_SYMBOL(mini_qdisc_pair_init); diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index b52f74610dc7..d04068a97d81 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -110,7 +110,6 @@ enum hfsc_class_flags { struct hfsc_class { struct Qdisc_class_common cl_common; - unsigned int refcnt; /* usage count */ struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; @@ -829,28 +828,6 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time) } } -static void -set_active(struct hfsc_class *cl, unsigned int len) -{ - if (cl->cl_flags & HFSC_RSC) - init_ed(cl, len); - if (cl->cl_flags & HFSC_FSC) - init_vf(cl, len); - -} - -static void -set_passive(struct hfsc_class *cl) -{ - if (cl->cl_flags & HFSC_RSC) - eltree_remove(cl); - - /* - * vttree is now handled in update_vf() so that update_vf(cl, 0, 0) - * needs to be called explicitly to remove a class from vttree. - */ -} - static unsigned int qdisc_peek_len(struct Qdisc *sch) { @@ -981,6 +958,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, } if (cl != NULL) { + int old_flags; + if (parentid) { if (cl->cl_parent && cl->cl_parent->cl_common.classid != parentid) @@ -1001,6 +980,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, } sch_tree_lock(sch); + old_flags = cl->cl_flags; + if (rsc != NULL) hfsc_change_rsc(cl, rsc, cur_time); if (fsc != NULL) @@ -1009,10 +990,21 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, hfsc_change_usc(cl, usc, cur_time); if (cl->qdisc->q.qlen != 0) { - if (cl->cl_flags & HFSC_RSC) - update_ed(cl, qdisc_peek_len(cl->qdisc)); - if (cl->cl_flags & HFSC_FSC) - update_vf(cl, 0, cur_time); + int len = qdisc_peek_len(cl->qdisc); + + if (cl->cl_flags & HFSC_RSC) { + if (old_flags & HFSC_RSC) + update_ed(cl, len); + else + init_ed(cl, len); + } + + if (cl->cl_flags & HFSC_FSC) { + if (old_flags & HFSC_FSC) + update_vf(cl, 0, cur_time); + else + init_vf(cl, len); + } } sch_tree_unlock(sch); @@ -1041,7 +1033,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (cl == NULL) return -ENOBUFS; - err = tcf_block_get(&cl->block, &cl->filter_list); + err = tcf_block_get(&cl->block, &cl->filter_list, sch); if (err) { kfree(cl); return err; @@ -1067,7 +1059,6 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, hfsc_change_usc(cl, usc, 0); cl->cl_common.classid = classid; - cl->refcnt = 1; cl->sched = q; cl->cl_parent = parent; cl->qdisc = qdisc_create_dflt(sch->dev_queue, @@ -1123,13 +1114,9 @@ hfsc_delete_class(struct Qdisc *sch, unsigned long arg) hfsc_purge_queue(sch, cl); qdisc_class_hash_remove(&q->clhash, &cl->cl_common); - BUG_ON(--cl->refcnt == 0); - /* - * This shouldn't happen: we "hold" one cops->get() when called - * from tc_ctl_tclass; the destroy method is done from cops->put(). - */ - sch_tree_unlock(sch); + + hfsc_destroy_class(sch, cl); return 0; } @@ -1157,6 +1144,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) case TC_ACT_STOLEN: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + /* fall through */ case TC_ACT_SHOT: return NULL; } @@ -1221,30 +1209,18 @@ hfsc_qlen_notify(struct Qdisc *sch, unsigned long arg) { struct hfsc_class *cl = (struct hfsc_class *)arg; - if (cl->qdisc->q.qlen == 0) { - update_vf(cl, 0, 0); - set_passive(cl); - } + /* vttree is now handled in update_vf() so that update_vf(cl, 0, 0) + * needs to be called explicitly to remove a class from vttree. + */ + update_vf(cl, 0, 0); + if (cl->cl_flags & HFSC_RSC) + eltree_remove(cl); } static unsigned long -hfsc_get_class(struct Qdisc *sch, u32 classid) +hfsc_search_class(struct Qdisc *sch, u32 classid) { - struct hfsc_class *cl = hfsc_find_class(classid, sch); - - if (cl != NULL) - cl->refcnt++; - - return (unsigned long)cl; -} - -static void -hfsc_put_class(struct Qdisc *sch, unsigned long arg) -{ - struct hfsc_class *cl = (struct hfsc_class *)arg; - - if (--cl->refcnt == 0) - hfsc_destroy_class(sch, cl); + return (unsigned long)hfsc_find_class(classid, sch); } static unsigned long @@ -1418,6 +1394,8 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt) struct tc_hfsc_qopt *qopt; int err; + qdisc_watchdog_init(&q->watchdog, sch); + if (opt == NULL || nla_len(opt) < sizeof(*qopt)) return -EINVAL; qopt = nla_data(opt); @@ -1428,8 +1406,11 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt) return err; q->eligible = RB_ROOT; + err = tcf_block_get(&q->root.block, &q->root.filter_list, sch); + if (err) + return err; + q->root.cl_common.classid = sch->handle; - q->root.refcnt = 1; q->root.sched = q; q->root.qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle); @@ -1444,8 +1425,6 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt) qdisc_class_hash_insert(&q->clhash, &q->root.cl_common); qdisc_class_hash_grow(sch, &q->clhash); - qdisc_watchdog_init(&q->watchdog, sch); - return 0; } @@ -1522,8 +1501,10 @@ hfsc_destroy_qdisc(struct Qdisc *sch) unsigned int i; for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode) + hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode) { tcf_block_put(cl->block); + cl->block = NULL; + } } for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], @@ -1575,7 +1556,12 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) } if (cl->qdisc->q.qlen == 1) { - set_active(cl, qdisc_pkt_len(skb)); + unsigned int len = qdisc_pkt_len(skb); + + if (cl->cl_flags & HFSC_RSC) + init_ed(cl, len); + if (cl->cl_flags & HFSC_FSC) + init_vf(cl, len); /* * If this is the first packet, isolate the head so an eventual * head drop before the first dequeue operation has no chance @@ -1639,18 +1625,18 @@ hfsc_dequeue(struct Qdisc *sch) if (realtime) cl->cl_cumul += qdisc_pkt_len(skb); - if (cl->qdisc->q.qlen != 0) { - if (cl->cl_flags & HFSC_RSC) { + if (cl->cl_flags & HFSC_RSC) { + if (cl->qdisc->q.qlen != 0) { /* update ed */ next_len = qdisc_peek_len(cl->qdisc); if (realtime) update_ed(cl, next_len); else update_d(cl, next_len); + } else { + /* the class becomes passive */ + eltree_remove(cl); } - } else { - /* the class becomes passive */ - set_passive(cl); } qdisc_bstats_update(sch, skb); @@ -1666,8 +1652,7 @@ static const struct Qdisc_class_ops hfsc_class_ops = { .graft = hfsc_graft_class, .leaf = hfsc_class_leaf, .qlen_notify = hfsc_qlen_notify, - .get = hfsc_get_class, - .put = hfsc_put_class, + .find = hfsc_search_class, .bind_tcf = hfsc_bind_tcf, .unbind_tcf = hfsc_unbind_tcf, .tcf_block = hfsc_tcf_block, diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index 51d3ba682af9..73a53c08091b 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -477,6 +477,9 @@ static void hhf_destroy(struct Qdisc *sch) kvfree(q->hhf_valid_bits[i]); } + if (!q->hh_flows) + return; + for (i = 0; i < HH_FLOWS_CNT; i++) { struct hh_flow_state *flow, *next; struct list_head *head = &q->hh_flows[i]; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 203286ab4427..fa0380730ff0 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -107,7 +107,6 @@ struct htb_class { struct tcf_proto __rcu *filter_list; /* class attached filters */ struct tcf_block *block; int filter_cnt; - int refcnt; /* usage count of this class */ int level; /* our level (see above) */ unsigned int children; @@ -143,6 +142,7 @@ struct htb_class { struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */ unsigned int drops ____cacheline_aligned_in_smp; + unsigned int overlimits; }; struct htb_level { @@ -193,6 +193,10 @@ static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch) return container_of(clc, struct htb_class, common); } +static unsigned long htb_search(struct Qdisc *sch, u32 handle) +{ + return (unsigned long)htb_find(handle, sch); +} /** * htb_classify - classify a packet into class * @@ -240,6 +244,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, case TC_ACT_STOLEN: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + /* fall through */ case TC_ACT_SHOT: return NULL; } @@ -530,6 +535,9 @@ htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff) if (new_mode == cl->cmode) return; + if (new_mode == HTB_CANT_SEND) + cl->overlimits++; + if (cl->prio_activity) { /* not necessary: speed optimization */ if (cl->cmode != HTB_CANT_SEND) htb_deactivate_prios(q, cl); @@ -1017,10 +1025,13 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) int err; int i; + qdisc_watchdog_init(&q->watchdog, sch); + INIT_WORK(&q->work, htb_work_func); + if (!opt) return -EINVAL; - err = tcf_block_get(&q->block, &q->filter_list); + err = tcf_block_get(&q->block, &q->filter_list, sch); if (err) return err; @@ -1041,8 +1052,6 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) for (i = 0; i < TC_HTB_NUMPRIO; i++) INIT_LIST_HEAD(q->drops + i); - qdisc_watchdog_init(&q->watchdog, sch); - INIT_WORK(&q->work, htb_work_func); qdisc_skb_head_init(&q->direct_queue); if (tb[TCA_HTB_DIRECT_QLEN]) @@ -1139,6 +1148,7 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) struct htb_class *cl = (struct htb_class *)arg; struct gnet_stats_queue qs = { .drops = cl->drops, + .overlimits = cl->overlimits, }; __u32 qlen = 0; @@ -1186,16 +1196,7 @@ static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg) { struct htb_class *cl = (struct htb_class *)arg; - if (cl->un.leaf.q->q.qlen == 0) - htb_deactivate(qdisc_priv(sch), cl); -} - -static unsigned long htb_get(struct Qdisc *sch, u32 classid) -{ - struct htb_class *cl = htb_find(classid, sch); - if (cl) - cl->refcnt++; - return (unsigned long)cl; + htb_deactivate(qdisc_priv(sch), cl); } static inline int htb_parent_last_child(struct htb_class *cl) @@ -1258,8 +1259,10 @@ static void htb_destroy(struct Qdisc *sch) tcf_block_put(q->block); for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) + hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { tcf_block_put(cl->block); + cl->block = NULL; + } } for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], @@ -1315,22 +1318,10 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) if (last_child) htb_parent_to_leaf(q, cl, new_q); - BUG_ON(--cl->refcnt == 0); - /* - * This shouldn't happen: we "hold" one cops->get() when called - * from tc_ctl_tclass; the destroy method is done from cops->put(). - */ - sch_tree_unlock(sch); - return 0; -} - -static void htb_put(struct Qdisc *sch, unsigned long arg) -{ - struct htb_class *cl = (struct htb_class *)arg; - if (--cl->refcnt == 0) - htb_destroy_class(sch, cl); + htb_destroy_class(sch, cl); + return 0; } static int htb_change_class(struct Qdisc *sch, u32 classid, @@ -1403,7 +1394,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (!cl) goto failure; - err = tcf_block_get(&cl->block, &cl->filter_list); + err = tcf_block_get(&cl->block, &cl->filter_list, sch); if (err) { kfree(cl); goto failure; @@ -1421,7 +1412,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, } } - cl->refcnt = 1; cl->children = 0; INIT_LIST_HEAD(&cl->un.leaf.drop_list); RB_CLEAR_NODE(&cl->pq_node); @@ -1597,8 +1587,7 @@ static const struct Qdisc_class_ops htb_class_ops = { .graft = htb_graft, .leaf = htb_leaf, .qlen_notify = htb_qlen_notify, - .get = htb_get, - .put = htb_put, + .find = htb_search, .change = htb_change_class, .delete = htb_delete, .walk = htb_walk, diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index d8a9bebcab90..5ecc38f35d47 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -20,6 +20,8 @@ struct ingress_sched_data { struct tcf_block *block; + struct tcf_block_ext_info block_info; + struct mini_Qdisc_pair miniqp; }; static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg) @@ -27,23 +29,18 @@ static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg) return NULL; } -static unsigned long ingress_get(struct Qdisc *sch, u32 classid) +static unsigned long ingress_find(struct Qdisc *sch, u32 classid) { return TC_H_MIN(classid) + 1; } -static bool ingress_cl_offload(u32 classid) -{ - return true; -} - static unsigned long ingress_bind_filter(struct Qdisc *sch, unsigned long parent, u32 classid) { - return ingress_get(sch, classid); + return ingress_find(sch, classid); } -static void ingress_put(struct Qdisc *sch, unsigned long cl) +static void ingress_unbind_filter(struct Qdisc *sch, unsigned long cl) { } @@ -58,13 +55,26 @@ static struct tcf_block *ingress_tcf_block(struct Qdisc *sch, unsigned long cl) return q->block; } +static void clsact_chain_head_change(struct tcf_proto *tp_head, void *priv) +{ + struct mini_Qdisc_pair *miniqp = priv; + + mini_qdisc_pair_swap(miniqp, tp_head); +} + static int ingress_init(struct Qdisc *sch, struct nlattr *opt) { struct ingress_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); int err; - err = tcf_block_get(&q->block, &dev->ingress_cl_list); + mini_qdisc_pair_init(&q->miniqp, sch, &dev->miniq_ingress); + + q->block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS; + q->block_info.chain_head_change = clsact_chain_head_change; + q->block_info.chain_head_change_priv = &q->miniqp; + + err = tcf_block_get_ext(&q->block, sch, &q->block_info); if (err) return err; @@ -78,7 +88,7 @@ static void ingress_destroy(struct Qdisc *sch) { struct ingress_sched_data *q = qdisc_priv(sch); - tcf_block_put(q->block); + tcf_block_put_ext(q->block, sch, &q->block_info); net_dec_ingress_queue(); } @@ -99,13 +109,11 @@ nla_put_failure: static const struct Qdisc_class_ops ingress_class_ops = { .leaf = ingress_leaf, - .get = ingress_get, - .put = ingress_put, + .find = ingress_find, .walk = ingress_walk, .tcf_block = ingress_tcf_block, - .tcf_cl_offload = ingress_cl_offload, .bind_tcf = ingress_bind_filter, - .unbind_tcf = ingress_put, + .unbind_tcf = ingress_unbind_filter, }; static struct Qdisc_ops ingress_qdisc_ops __read_mostly = { @@ -121,9 +129,13 @@ static struct Qdisc_ops ingress_qdisc_ops __read_mostly = { struct clsact_sched_data { struct tcf_block *ingress_block; struct tcf_block *egress_block; + struct tcf_block_ext_info ingress_block_info; + struct tcf_block_ext_info egress_block_info; + struct mini_Qdisc_pair miniqp_ingress; + struct mini_Qdisc_pair miniqp_egress; }; -static unsigned long clsact_get(struct Qdisc *sch, u32 classid) +static unsigned long clsact_find(struct Qdisc *sch, u32 classid) { switch (TC_H_MIN(classid)) { case TC_H_MIN(TC_H_MIN_INGRESS): @@ -134,15 +146,10 @@ static unsigned long clsact_get(struct Qdisc *sch, u32 classid) } } -static bool clsact_cl_offload(u32 classid) -{ - return TC_H_MIN(classid) == TC_H_MIN(TC_H_MIN_INGRESS); -} - static unsigned long clsact_bind_filter(struct Qdisc *sch, unsigned long parent, u32 classid) { - return clsact_get(sch, classid); + return clsact_find(sch, classid); } static struct tcf_block *clsact_tcf_block(struct Qdisc *sch, unsigned long cl) @@ -165,13 +172,25 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt) struct net_device *dev = qdisc_dev(sch); int err; - err = tcf_block_get(&q->ingress_block, &dev->ingress_cl_list); + mini_qdisc_pair_init(&q->miniqp_ingress, sch, &dev->miniq_ingress); + + q->ingress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS; + q->ingress_block_info.chain_head_change = clsact_chain_head_change; + q->ingress_block_info.chain_head_change_priv = &q->miniqp_ingress; + + err = tcf_block_get_ext(&q->ingress_block, sch, &q->ingress_block_info); if (err) return err; - err = tcf_block_get(&q->egress_block, &dev->egress_cl_list); + mini_qdisc_pair_init(&q->miniqp_egress, sch, &dev->miniq_egress); + + q->egress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS; + q->egress_block_info.chain_head_change = clsact_chain_head_change; + q->egress_block_info.chain_head_change_priv = &q->miniqp_egress; + + err = tcf_block_get_ext(&q->egress_block, sch, &q->egress_block_info); if (err) - return err; + goto err_egress_block_get; net_inc_ingress_queue(); net_inc_egress_queue(); @@ -179,14 +198,18 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt) sch->flags |= TCQ_F_CPUSTATS; return 0; + +err_egress_block_get: + tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info); + return err; } static void clsact_destroy(struct Qdisc *sch) { struct clsact_sched_data *q = qdisc_priv(sch); - tcf_block_put(q->egress_block); - tcf_block_put(q->ingress_block); + tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info); + tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info); net_dec_ingress_queue(); net_dec_egress_queue(); @@ -194,13 +217,11 @@ static void clsact_destroy(struct Qdisc *sch) static const struct Qdisc_class_ops clsact_class_ops = { .leaf = ingress_leaf, - .get = clsact_get, - .put = ingress_put, + .find = clsact_find, .walk = ingress_walk, .tcf_block = clsact_tcf_block, - .tcf_cl_offload = clsact_cl_offload, .bind_tcf = clsact_bind_filter, - .unbind_tcf = ingress_put, + .unbind_tcf = ingress_unbind_filter, }; static struct Qdisc_ops clsact_qdisc_ops __read_mostly = { diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index cadfdd4f1e52..213b586a06a0 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -130,15 +130,7 @@ static struct netdev_queue *mq_queue_get(struct Qdisc *sch, unsigned long cl) static struct netdev_queue *mq_select_queue(struct Qdisc *sch, struct tcmsg *tcm) { - unsigned int ntx = TC_H_MIN(tcm->tcm_parent); - struct netdev_queue *dev_queue = mq_queue_get(sch, ntx); - - if (!dev_queue) { - struct net_device *dev = qdisc_dev(sch); - - return netdev_get_tx_queue(dev, 0); - } - return dev_queue; + return mq_queue_get(sch, TC_H_MIN(tcm->tcm_parent)); } static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, @@ -165,7 +157,7 @@ static struct Qdisc *mq_leaf(struct Qdisc *sch, unsigned long cl) return dev_queue->qdisc_sleeping; } -static unsigned long mq_get(struct Qdisc *sch, u32 classid) +static unsigned long mq_find(struct Qdisc *sch, u32 classid) { unsigned int ntx = TC_H_MIN(classid); @@ -174,10 +166,6 @@ static unsigned long mq_get(struct Qdisc *sch, u32 classid) return ntx; } -static void mq_put(struct Qdisc *sch, unsigned long cl) -{ -} - static int mq_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb, struct tcmsg *tcm) { @@ -223,8 +211,7 @@ static const struct Qdisc_class_ops mq_class_ops = { .select_queue = mq_select_queue, .graft = mq_graft, .leaf = mq_leaf, - .get = mq_get, - .put = mq_put, + .find = mq_find, .walk = mq_walk, .dump = mq_dump_class, .dump_stats = mq_dump_class_stats, diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index e0c02725cd48..b85885a9d8a1 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -18,10 +18,16 @@ #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/sch_generic.h> +#include <net/pkt_cls.h> struct mqprio_sched { struct Qdisc **qdiscs; + u16 mode; + u16 shaper; int hw_offload; + u32 flags; + u64 min_rate[TC_QOPT_MAX_QUEUE]; + u64 max_rate[TC_QOPT_MAX_QUEUE]; }; static void mqprio_destroy(struct Qdisc *sch) @@ -39,11 +45,18 @@ static void mqprio_destroy(struct Qdisc *sch) } if (priv->hw_offload && dev->netdev_ops->ndo_setup_tc) { - struct tc_mqprio_qopt offload = { 0 }; - struct tc_to_netdev tc = { .type = TC_SETUP_MQPRIO, - { .mqprio = &offload } }; - - dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, 0, &tc); + struct tc_mqprio_qopt_offload mqprio = { { 0 } }; + + switch (priv->mode) { + case TC_MQPRIO_MODE_DCB: + case TC_MQPRIO_MODE_CHANNEL: + dev->netdev_ops->ndo_setup_tc(dev, + TC_SETUP_QDISC_MQPRIO, + &mqprio); + break; + default: + return; + } } else { netdev_set_num_tc(dev, 0); } @@ -99,6 +112,26 @@ static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt) return 0; } +static const struct nla_policy mqprio_policy[TCA_MQPRIO_MAX + 1] = { + [TCA_MQPRIO_MODE] = { .len = sizeof(u16) }, + [TCA_MQPRIO_SHAPER] = { .len = sizeof(u16) }, + [TCA_MQPRIO_MIN_RATE64] = { .type = NLA_NESTED }, + [TCA_MQPRIO_MAX_RATE64] = { .type = NLA_NESTED }, +}; + +static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, + const struct nla_policy *policy, int len) +{ + int nested_len = nla_len(nla) - NLA_ALIGN(len); + + if (nested_len >= nla_attr_size(0)) + return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len), + nested_len, policy, NULL); + + memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); + return 0; +} + static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) { struct net_device *dev = qdisc_dev(sch); @@ -107,6 +140,10 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) struct Qdisc *qdisc; int i, err = -EOPNOTSUPP; struct tc_mqprio_qopt *qopt = NULL; + struct nlattr *tb[TCA_MQPRIO_MAX + 1]; + struct nlattr *attr; + int rem; + int len; BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE); BUILD_BUG_ON(TC_BITMASK != TC_QOPT_BITMASK); @@ -117,6 +154,10 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) if (!netif_is_multiqueue(dev)) return -EOPNOTSUPP; + /* make certain can allocate enough classids to handle queues */ + if (dev->num_tx_queues >= TC_H_MIN_PRIORITY) + return -ENOMEM; + if (!opt || nla_len(opt) < sizeof(*qopt)) return -EINVAL; @@ -124,6 +165,59 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) if (mqprio_parse_opt(dev, qopt)) return -EINVAL; + len = nla_len(opt) - NLA_ALIGN(sizeof(*qopt)); + if (len > 0) { + err = parse_attr(tb, TCA_MQPRIO_MAX, opt, mqprio_policy, + sizeof(*qopt)); + if (err < 0) + return err; + + if (!qopt->hw) + return -EINVAL; + + if (tb[TCA_MQPRIO_MODE]) { + priv->flags |= TC_MQPRIO_F_MODE; + priv->mode = *(u16 *)nla_data(tb[TCA_MQPRIO_MODE]); + } + + if (tb[TCA_MQPRIO_SHAPER]) { + priv->flags |= TC_MQPRIO_F_SHAPER; + priv->shaper = *(u16 *)nla_data(tb[TCA_MQPRIO_SHAPER]); + } + + if (tb[TCA_MQPRIO_MIN_RATE64]) { + if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE) + return -EINVAL; + i = 0; + nla_for_each_nested(attr, tb[TCA_MQPRIO_MIN_RATE64], + rem) { + if (nla_type(attr) != TCA_MQPRIO_MIN_RATE64) + return -EINVAL; + if (i >= qopt->num_tc) + break; + priv->min_rate[i] = *(u64 *)nla_data(attr); + i++; + } + priv->flags |= TC_MQPRIO_F_MIN_RATE; + } + + if (tb[TCA_MQPRIO_MAX_RATE64]) { + if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE) + return -EINVAL; + i = 0; + nla_for_each_nested(attr, tb[TCA_MQPRIO_MAX_RATE64], + rem) { + if (nla_type(attr) != TCA_MQPRIO_MAX_RATE64) + return -EINVAL; + if (i >= qopt->num_tc) + break; + priv->max_rate[i] = *(u64 *)nla_data(attr); + i++; + } + priv->flags |= TC_MQPRIO_F_MAX_RATE; + } + } + /* pre-allocate qdisc, attachment can't fail */ priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]), GFP_KERNEL); @@ -148,16 +242,36 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) * supplied and verified mapping */ if (qopt->hw) { - struct tc_mqprio_qopt offload = *qopt; - struct tc_to_netdev tc = { .type = TC_SETUP_MQPRIO, - { .mqprio = &offload } }; + struct tc_mqprio_qopt_offload mqprio = {.qopt = *qopt}; - err = dev->netdev_ops->ndo_setup_tc(dev, sch->handle, - 0, 0, &tc); + switch (priv->mode) { + case TC_MQPRIO_MODE_DCB: + if (priv->shaper != TC_MQPRIO_SHAPER_DCB) + return -EINVAL; + break; + case TC_MQPRIO_MODE_CHANNEL: + mqprio.flags = priv->flags; + if (priv->flags & TC_MQPRIO_F_MODE) + mqprio.mode = priv->mode; + if (priv->flags & TC_MQPRIO_F_SHAPER) + mqprio.shaper = priv->shaper; + if (priv->flags & TC_MQPRIO_F_MIN_RATE) + for (i = 0; i < mqprio.qopt.num_tc; i++) + mqprio.min_rate[i] = priv->min_rate[i]; + if (priv->flags & TC_MQPRIO_F_MAX_RATE) + for (i = 0; i < mqprio.qopt.num_tc; i++) + mqprio.max_rate[i] = priv->max_rate[i]; + break; + default: + return -EINVAL; + } + err = dev->netdev_ops->ndo_setup_tc(dev, + TC_SETUP_QDISC_MQPRIO, + &mqprio); if (err) return err; - priv->hw_offload = offload.hw; + priv->hw_offload = mqprio.qopt.hw; } else { netdev_set_num_tc(dev, qopt->num_tc); for (i = 0; i < qopt->num_tc; i++) @@ -197,7 +311,7 @@ static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch, unsigned long cl) { struct net_device *dev = qdisc_dev(sch); - unsigned long ntx = cl - 1 - netdev_get_num_tc(dev); + unsigned long ntx = cl - 1; if (ntx >= dev->num_tx_queues) return NULL; @@ -227,11 +341,51 @@ static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, return 0; } +static int dump_rates(struct mqprio_sched *priv, + struct tc_mqprio_qopt *opt, struct sk_buff *skb) +{ + struct nlattr *nest; + int i; + + if (priv->flags & TC_MQPRIO_F_MIN_RATE) { + nest = nla_nest_start(skb, TCA_MQPRIO_MIN_RATE64); + if (!nest) + goto nla_put_failure; + + for (i = 0; i < opt->num_tc; i++) { + if (nla_put(skb, TCA_MQPRIO_MIN_RATE64, + sizeof(priv->min_rate[i]), + &priv->min_rate[i])) + goto nla_put_failure; + } + nla_nest_end(skb, nest); + } + + if (priv->flags & TC_MQPRIO_F_MAX_RATE) { + nest = nla_nest_start(skb, TCA_MQPRIO_MAX_RATE64); + if (!nest) + goto nla_put_failure; + + for (i = 0; i < opt->num_tc; i++) { + if (nla_put(skb, TCA_MQPRIO_MAX_RATE64, + sizeof(priv->max_rate[i]), + &priv->max_rate[i])) + goto nla_put_failure; + } + nla_nest_end(skb, nest); + } + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -1; +} + static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) { struct net_device *dev = qdisc_dev(sch); struct mqprio_sched *priv = qdisc_priv(sch); - unsigned char *b = skb_tail_pointer(skb); + struct nlattr *nla = (struct nlattr *)skb_tail_pointer(skb); struct tc_mqprio_qopt opt = { 0 }; struct Qdisc *qdisc; unsigned int i; @@ -262,12 +416,25 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) opt.offset[i] = dev->tc_to_txq[i].offset; } - if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt)) + if (nla_put(skb, TCA_OPTIONS, NLA_ALIGN(sizeof(opt)), &opt)) + goto nla_put_failure; + + if ((priv->flags & TC_MQPRIO_F_MODE) && + nla_put_u16(skb, TCA_MQPRIO_MODE, priv->mode)) goto nla_put_failure; - return skb->len; + if ((priv->flags & TC_MQPRIO_F_SHAPER) && + nla_put_u16(skb, TCA_MQPRIO_SHAPER, priv->shaper)) + goto nla_put_failure; + + if ((priv->flags & TC_MQPRIO_F_MIN_RATE || + priv->flags & TC_MQPRIO_F_MAX_RATE) && + (dump_rates(priv, &opt, skb) != 0)) + goto nla_put_failure; + + return nla_nest_end(skb, nla); nla_put_failure: - nlmsg_trim(skb, b); + nlmsg_trim(skb, nla); return -1; } @@ -281,47 +448,40 @@ static struct Qdisc *mqprio_leaf(struct Qdisc *sch, unsigned long cl) return dev_queue->qdisc_sleeping; } -static unsigned long mqprio_get(struct Qdisc *sch, u32 classid) +static unsigned long mqprio_find(struct Qdisc *sch, u32 classid) { struct net_device *dev = qdisc_dev(sch); unsigned int ntx = TC_H_MIN(classid); - if (ntx > dev->num_tx_queues + netdev_get_num_tc(dev)) - return 0; - return ntx; -} + /* There are essentially two regions here that have valid classid + * values. The first region will have a classid value of 1 through + * num_tx_queues. All of these are backed by actual Qdiscs. + */ + if (ntx < TC_H_MIN_PRIORITY) + return (ntx <= dev->num_tx_queues) ? ntx : 0; -static void mqprio_put(struct Qdisc *sch, unsigned long cl) -{ + /* The second region represents the hardware traffic classes. These + * are represented by classid values of TC_H_MIN_PRIORITY through + * TC_H_MIN_PRIORITY + netdev_get_num_tc - 1 + */ + return ((ntx - TC_H_MIN_PRIORITY) < netdev_get_num_tc(dev)) ? ntx : 0; } static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb, struct tcmsg *tcm) { - struct net_device *dev = qdisc_dev(sch); + if (cl < TC_H_MIN_PRIORITY) { + struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl); + struct net_device *dev = qdisc_dev(sch); + int tc = netdev_txq_to_tc(dev, cl - 1); - if (cl <= netdev_get_num_tc(dev)) { + tcm->tcm_parent = (tc < 0) ? 0 : + TC_H_MAKE(TC_H_MAJ(sch->handle), + TC_H_MIN(tc + TC_H_MIN_PRIORITY)); + tcm->tcm_info = dev_queue->qdisc_sleeping->handle; + } else { tcm->tcm_parent = TC_H_ROOT; tcm->tcm_info = 0; - } else { - int i; - struct netdev_queue *dev_queue; - - dev_queue = mqprio_queue_get(sch, cl); - tcm->tcm_parent = 0; - for (i = 0; i < netdev_get_num_tc(dev); i++) { - struct netdev_tc_txq tc = dev->tc_to_txq[i]; - int q_idx = cl - netdev_get_num_tc(dev); - - if (q_idx > tc.offset && - q_idx <= tc.offset + tc.count) { - tcm->tcm_parent = - TC_H_MAKE(TC_H_MAJ(sch->handle), - TC_H_MIN(i + 1)); - break; - } - } - tcm->tcm_info = dev_queue->qdisc_sleeping->handle; } tcm->tcm_handle |= TC_H_MIN(cl); return 0; @@ -332,15 +492,14 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, __releases(d->lock) __acquires(d->lock) { - struct net_device *dev = qdisc_dev(sch); - - if (cl <= netdev_get_num_tc(dev)) { + if (cl >= TC_H_MIN_PRIORITY) { int i; __u32 qlen = 0; struct Qdisc *qdisc; struct gnet_stats_queue qstats = {0}; struct gnet_stats_basic_packed bstats = {0}; - struct netdev_tc_txq tc = dev->tc_to_txq[cl - 1]; + struct net_device *dev = qdisc_dev(sch); + struct netdev_tc_txq tc = dev->tc_to_txq[cl & TC_BITMASK]; /* Drop lock here it will be reclaimed before touching * statistics this is required because the d->lock we @@ -393,25 +552,44 @@ static void mqprio_walk(struct Qdisc *sch, struct qdisc_walker *arg) /* Walk hierarchy with a virtual class per tc */ arg->count = arg->skip; - for (ntx = arg->skip; - ntx < dev->num_tx_queues + netdev_get_num_tc(dev); - ntx++) { + for (ntx = arg->skip; ntx < netdev_get_num_tc(dev); ntx++) { + if (arg->fn(sch, ntx + TC_H_MIN_PRIORITY, arg) < 0) { + arg->stop = 1; + return; + } + arg->count++; + } + + /* Pad the values and skip over unused traffic classes */ + if (ntx < TC_MAX_QUEUE) { + arg->count = TC_MAX_QUEUE; + ntx = TC_MAX_QUEUE; + } + + /* Reset offset, sort out remaining per-queue qdiscs */ + for (ntx -= TC_MAX_QUEUE; ntx < dev->num_tx_queues; ntx++) { if (arg->fn(sch, ntx + 1, arg) < 0) { arg->stop = 1; - break; + return; } arg->count++; } } +static struct netdev_queue *mqprio_select_queue(struct Qdisc *sch, + struct tcmsg *tcm) +{ + return mqprio_queue_get(sch, TC_H_MIN(tcm->tcm_parent)); +} + static const struct Qdisc_class_ops mqprio_class_ops = { .graft = mqprio_graft, .leaf = mqprio_leaf, - .get = mqprio_get, - .put = mqprio_put, + .find = mqprio_find, .walk = mqprio_walk, .dump = mqprio_dump_class, .dump_stats = mqprio_dump_class_stats, + .select_queue = mqprio_select_queue, }; static struct Qdisc_ops mqprio_qdisc_ops __read_mostly = { diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index f143b7bbaa0d..012216386c0b 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -54,6 +54,7 @@ multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) case TC_ACT_QUEUED: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + /* fall through */ case TC_ACT_SHOT: return NULL; } @@ -245,7 +246,7 @@ static int multiq_init(struct Qdisc *sch, struct nlattr *opt) if (opt == NULL) return -EINVAL; - err = tcf_block_get(&q->block, &q->filter_list); + err = tcf_block_get(&q->block, &q->filter_list, sch); if (err) return err; @@ -257,12 +258,7 @@ static int multiq_init(struct Qdisc *sch, struct nlattr *opt) for (i = 0; i < q->max_bands; i++) q->queues[i] = &noop_qdisc; - err = multiq_tune(sch, opt); - - if (err) - kfree(q->queues); - - return err; + return multiq_tune(sch, opt); } static int multiq_dump(struct Qdisc *sch, struct sk_buff *skb) @@ -306,7 +302,7 @@ multiq_leaf(struct Qdisc *sch, unsigned long arg) return q->queues[band]; } -static unsigned long multiq_get(struct Qdisc *sch, u32 classid) +static unsigned long multiq_find(struct Qdisc *sch, u32 classid) { struct multiq_sched_data *q = qdisc_priv(sch); unsigned long band = TC_H_MIN(classid); @@ -319,11 +315,11 @@ static unsigned long multiq_get(struct Qdisc *sch, u32 classid) static unsigned long multiq_bind(struct Qdisc *sch, unsigned long parent, u32 classid) { - return multiq_get(sch, classid); + return multiq_find(sch, classid); } -static void multiq_put(struct Qdisc *q, unsigned long cl) +static void multiq_unbind(struct Qdisc *q, unsigned long cl) { } @@ -385,12 +381,11 @@ static struct tcf_block *multiq_tcf_block(struct Qdisc *sch, unsigned long cl) static const struct Qdisc_class_ops multiq_class_ops = { .graft = multiq_graft, .leaf = multiq_leaf, - .get = multiq_get, - .put = multiq_put, + .find = multiq_find, .walk = multiq_walk, .tcf_block = multiq_tcf_block, .bind_tcf = multiq_bind, - .unbind_tcf = multiq_put, + .unbind_tcf = multiq_unbind, .dump = multiq_dump_class, .dump_stats = multiq_dump_class_stats, }; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 1b3dd6190e93..dd70924cbcdf 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -77,8 +77,8 @@ struct netem_sched_data { struct qdisc_watchdog watchdog; - psched_tdiff_t latency; - psched_tdiff_t jitter; + s64 latency; + s64 jitter; u32 loss; u32 ecn; @@ -135,6 +135,13 @@ struct netem_sched_data { u32 a5; /* p23 used only in 4-states */ } clg; + struct tc_netem_slot slot_config; + struct slotstate { + u64 slot_next; + s32 packets_left; + s32 bytes_left; + } slot; + }; /* Time stamp put into socket buffer control block @@ -145,16 +152,9 @@ struct netem_sched_data { * we save skb->tstamp value in skb->cb[] before destroying it. */ struct netem_skb_cb { - psched_time_t time_to_send; - ktime_t tstamp_save; + u64 time_to_send; }; - -static struct sk_buff *netem_rb_to_skb(struct rb_node *rb) -{ - return rb_entry(rb, struct sk_buff, rbnode); -} - static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb) { /* we assume we can use skb next/prev/tstamp as storage for rb_node */ @@ -312,11 +312,11 @@ static bool loss_event(struct netem_sched_data *q) * std deviation sigma. Uses table lookup to approximate the desired * distribution, and a uniformly-distributed pseudo-random source. */ -static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma, - struct crndstate *state, - const struct disttable *dist) +static s64 tabledist(s64 mu, s32 sigma, + struct crndstate *state, + const struct disttable *dist) { - psched_tdiff_t x; + s64 x; long t; u32 rnd; @@ -327,7 +327,7 @@ static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma, /* default uniform distribution */ if (dist == NULL) - return (rnd % (2*sigma)) - sigma + mu; + return (rnd % (2 * sigma)) - sigma + mu; t = dist->table[rnd % dist->size]; x = (sigma % NETEM_DIST_SCALE) * t; @@ -339,10 +339,8 @@ static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma, return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu; } -static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sched_data *q) +static u64 packet_time_ns(u64 len, const struct netem_sched_data *q) { - u64 ticks; - len += q->packet_overhead; if (q->cell_size) { @@ -353,21 +351,19 @@ static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sche len = cells * (q->cell_size + q->cell_overhead); } - ticks = (u64)len * NSEC_PER_SEC; - - do_div(ticks, q->rate); - return PSCHED_NS2TICKS(ticks); + return div64_u64(len * NSEC_PER_SEC, q->rate); } static void tfifo_reset(struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); - struct rb_node *p; + struct rb_node *p = rb_first(&q->t_root); - while ((p = rb_first(&q->t_root))) { - struct sk_buff *skb = netem_rb_to_skb(p); + while (p) { + struct sk_buff *skb = rb_to_skb(p); - rb_erase(p, &q->t_root); + p = rb_next(p); + rb_erase(&skb->rbnode, &q->t_root); rtnl_kfree_skbs(skb, skb); } } @@ -375,14 +371,14 @@ static void tfifo_reset(struct Qdisc *sch) static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); - psched_time_t tnext = netem_skb_cb(nskb)->time_to_send; + u64 tnext = netem_skb_cb(nskb)->time_to_send; struct rb_node **p = &q->t_root.rb_node, *parent = NULL; while (*p) { struct sk_buff *skb; parent = *p; - skb = netem_rb_to_skb(parent); + skb = rb_to_skb(parent); if (tnext >= netem_skb_cb(skb)->time_to_send) p = &parent->rb_right; else @@ -521,13 +517,13 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (q->gap == 0 || /* not doing reordering */ q->counter < q->gap - 1 || /* inside last reordering gap */ q->reorder < get_crandom(&q->reorder_cor)) { - psched_time_t now; - psched_tdiff_t delay; + u64 now; + s64 delay; delay = tabledist(q->latency, q->jitter, &q->delay_cor, q->delay_dist); - now = psched_get_time(); + now = ktime_get_ns(); if (q->rate) { struct netem_skb_cb *last = NULL; @@ -538,7 +534,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff *t_skb; struct netem_skb_cb *t_last; - t_skb = netem_rb_to_skb(rb_last(&q->t_root)); + t_skb = skb_rb_last(&q->t_root); t_last = netem_skb_cb(t_skb); if (!last || t_last->time_to_send > last->time_to_send) { @@ -553,15 +549,14 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, * from delay. */ delay -= last->time_to_send - now; - delay = max_t(psched_tdiff_t, 0, delay); + delay = max_t(s64, 0, delay); now = last->time_to_send; } - delay += packet_len_2_sched_time(qdisc_pkt_len(skb), q); + delay += packet_time_ns(qdisc_pkt_len(skb), q); } cb->time_to_send = now + delay; - cb->tstamp_save = skb->tstamp; ++q->counter; tfifo_enqueue(skb, sch); } else { @@ -569,7 +564,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, * Do re-ordering by putting one out of N packets at the front * of the queue. */ - cb->time_to_send = psched_get_time(); + cb->time_to_send = ktime_get_ns(); q->counter = 0; netem_enqueue_skb_head(&sch->q, skb); @@ -600,6 +595,20 @@ finish_segs: return NET_XMIT_SUCCESS; } +/* Delay the next round with a new future slot with a + * correct number of bytes and packets. + */ + +static void get_slot_next(struct netem_sched_data *q, u64 now) +{ + q->slot.slot_next = now + q->slot_config.min_delay + + (prandom_u32() * + (q->slot_config.max_delay - + q->slot_config.min_delay) >> 32); + q->slot.packets_left = q->slot_config.max_packets; + q->slot.bytes_left = q->slot_config.max_bytes; +} + static struct sk_buff *netem_dequeue(struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); @@ -616,20 +625,26 @@ deliver: } p = rb_first(&q->t_root); if (p) { - psched_time_t time_to_send; + u64 time_to_send; + u64 now = ktime_get_ns(); - skb = netem_rb_to_skb(p); + skb = rb_to_skb(p); /* if more time remaining? */ time_to_send = netem_skb_cb(skb)->time_to_send; - if (time_to_send <= psched_get_time()) { - rb_erase(p, &q->t_root); + if (q->slot.slot_next && q->slot.slot_next < time_to_send) + get_slot_next(q, now); + if (time_to_send <= now && q->slot.slot_next <= now) { + rb_erase(p, &q->t_root); sch->q.qlen--; qdisc_qstats_backlog_dec(sch, skb); skb->next = NULL; skb->prev = NULL; - skb->tstamp = netem_skb_cb(skb)->tstamp_save; + /* skb->dev shares skb->rbnode area, + * we need to restore its value. + */ + skb->dev = qdisc_dev(sch); #ifdef CONFIG_NET_CLS_ACT /* @@ -640,6 +655,14 @@ deliver: skb->tstamp = 0; #endif + if (q->slot.slot_next) { + q->slot.packets_left--; + q->slot.bytes_left -= qdisc_pkt_len(skb); + if (q->slot.packets_left <= 0 || + q->slot.bytes_left <= 0) + get_slot_next(q, now); + } + if (q->qdisc) { unsigned int pkt_len = qdisc_pkt_len(skb); struct sk_buff *to_free = NULL; @@ -663,7 +686,10 @@ deliver: if (skb) goto deliver; } - qdisc_watchdog_schedule(&q->watchdog, time_to_send); + + qdisc_watchdog_schedule_ns(&q->watchdog, + max(time_to_send, + q->slot.slot_next)); } if (q->qdisc) { @@ -694,6 +720,7 @@ static void dist_free(struct disttable *d) * Distribution data is a variable size payload containing * signed 16 bit values. */ + static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) { struct netem_sched_data *q = qdisc_priv(sch); @@ -724,6 +751,23 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) return 0; } +static void get_slot(struct netem_sched_data *q, const struct nlattr *attr) +{ + const struct tc_netem_slot *c = nla_data(attr); + + q->slot_config = *c; + if (q->slot_config.max_packets == 0) + q->slot_config.max_packets = INT_MAX; + if (q->slot_config.max_bytes == 0) + q->slot_config.max_bytes = INT_MAX; + q->slot.packets_left = q->slot_config.max_packets; + q->slot.bytes_left = q->slot_config.max_bytes; + if (q->slot_config.min_delay | q->slot_config.max_delay) + q->slot.slot_next = ktime_get_ns(); + else + q->slot.slot_next = 0; +} + static void get_correlation(struct netem_sched_data *q, const struct nlattr *attr) { const struct tc_netem_corr *c = nla_data(attr); @@ -825,6 +869,9 @@ static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = { [TCA_NETEM_LOSS] = { .type = NLA_NESTED }, [TCA_NETEM_ECN] = { .type = NLA_U32 }, [TCA_NETEM_RATE64] = { .type = NLA_U64 }, + [TCA_NETEM_LATENCY64] = { .type = NLA_S64 }, + [TCA_NETEM_JITTER64] = { .type = NLA_S64 }, + [TCA_NETEM_SLOT] = { .len = sizeof(struct tc_netem_slot) }, }; static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, @@ -892,8 +939,8 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) sch->limit = qopt->limit; - q->latency = qopt->latency; - q->jitter = qopt->jitter; + q->latency = PSCHED_TICKS2NS(qopt->latency); + q->jitter = PSCHED_TICKS2NS(qopt->jitter); q->limit = qopt->limit; q->gap = qopt->gap; q->counter = 0; @@ -922,9 +969,18 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) q->rate = max_t(u64, q->rate, nla_get_u64(tb[TCA_NETEM_RATE64])); + if (tb[TCA_NETEM_LATENCY64]) + q->latency = nla_get_s64(tb[TCA_NETEM_LATENCY64]); + + if (tb[TCA_NETEM_JITTER64]) + q->jitter = nla_get_s64(tb[TCA_NETEM_JITTER64]); + if (tb[TCA_NETEM_ECN]) q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]); + if (tb[TCA_NETEM_SLOT]) + get_slot(q, tb[TCA_NETEM_SLOT]); + return ret; } @@ -933,11 +989,11 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt) struct netem_sched_data *q = qdisc_priv(sch); int ret; + qdisc_watchdog_init(&q->watchdog, sch); + if (!opt) return -EINVAL; - qdisc_watchdog_init(&q->watchdog, sch); - q->loss_model = CLG_RANDOM; ret = netem_change(sch, opt); if (ret) @@ -1014,9 +1070,12 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) struct tc_netem_reorder reorder; struct tc_netem_corrupt corrupt; struct tc_netem_rate rate; + struct tc_netem_slot slot; - qopt.latency = q->latency; - qopt.jitter = q->jitter; + qopt.latency = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->latency), + UINT_MAX); + qopt.jitter = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->jitter), + UINT_MAX); qopt.limit = q->limit; qopt.loss = q->loss; qopt.gap = q->gap; @@ -1024,6 +1083,12 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt)) goto nla_put_failure; + if (nla_put(skb, TCA_NETEM_LATENCY64, sizeof(q->latency), &q->latency)) + goto nla_put_failure; + + if (nla_put(skb, TCA_NETEM_JITTER64, sizeof(q->jitter), &q->jitter)) + goto nla_put_failure; + cor.delay_corr = q->delay_cor.rho; cor.loss_corr = q->loss_cor.rho; cor.dup_corr = q->dup_cor.rho; @@ -1060,6 +1125,16 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) if (dump_loss_model(q, skb) != 0) goto nla_put_failure; + if (q->slot_config.min_delay | q->slot_config.max_delay) { + slot = q->slot_config; + if (slot.max_packets == INT_MAX) + slot.max_packets = 0; + if (slot.max_bytes == INT_MAX) + slot.max_bytes = 0; + if (nla_put(skb, TCA_NETEM_SLOT, sizeof(slot), &slot)) + goto nla_put_failure; + } + return nla_nest_end(skb, nla); nla_put_failure: @@ -1096,15 +1171,11 @@ static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg) return q->qdisc; } -static unsigned long netem_get(struct Qdisc *sch, u32 classid) +static unsigned long netem_find(struct Qdisc *sch, u32 classid) { return 1; } -static void netem_put(struct Qdisc *sch, unsigned long arg) -{ -} - static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker) { if (!walker->stop) { @@ -1120,8 +1191,7 @@ static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker) static const struct Qdisc_class_ops netem_class_ops = { .graft = netem_graft, .leaf = netem_leaf, - .get = netem_get, - .put = netem_put, + .find = netem_find, .walk = netem_walk, .dump = netem_dump_class, }; diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index 6c2791d6102d..776c694c77c7 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -74,6 +74,7 @@ struct pie_sched_data { struct pie_vars vars; struct pie_stats stats; struct timer_list adapt_timer; + struct Qdisc *sch; }; static void pie_params_init(struct pie_params *params) @@ -422,10 +423,10 @@ static void calculate_probability(struct Qdisc *sch) pie_vars_init(&q->vars); } -static void pie_timer(unsigned long arg) +static void pie_timer(struct timer_list *t) { - struct Qdisc *sch = (struct Qdisc *)arg; - struct pie_sched_data *q = qdisc_priv(sch); + struct pie_sched_data *q = from_timer(q, t, adapt_timer); + struct Qdisc *sch = q->sch; spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); spin_lock(root_lock); @@ -446,7 +447,8 @@ static int pie_init(struct Qdisc *sch, struct nlattr *opt) pie_vars_init(&q->vars); sch->limit = q->params.limit; - setup_timer(&q->adapt_timer, pie_timer, (unsigned long)sch); + q->sch = sch; + timer_setup(&q->adapt_timer, pie_timer, 0); if (opt) { int err = pie_change(sch, opt); diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index e3e364cc9a70..2c79559a0d31 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -50,6 +50,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) case TC_ACT_QUEUED: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + /* fall through */ case TC_ACT_SHOT: return NULL; } @@ -80,7 +81,7 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) if (ret & __NET_XMIT_BYPASS) qdisc_qstats_drop(sch); - kfree_skb(skb); + __qdisc_drop(skb, to_free); return ret; } #endif @@ -212,7 +213,7 @@ static int prio_init(struct Qdisc *sch, struct nlattr *opt) if (!opt) return -EINVAL; - err = tcf_block_get(&q->block, &q->filter_list); + err = tcf_block_get(&q->block, &q->filter_list, sch); if (err) return err; @@ -260,7 +261,7 @@ prio_leaf(struct Qdisc *sch, unsigned long arg) return q->queues[band]; } -static unsigned long prio_get(struct Qdisc *sch, u32 classid) +static unsigned long prio_find(struct Qdisc *sch, u32 classid) { struct prio_sched_data *q = qdisc_priv(sch); unsigned long band = TC_H_MIN(classid); @@ -272,11 +273,11 @@ static unsigned long prio_get(struct Qdisc *sch, u32 classid) static unsigned long prio_bind(struct Qdisc *sch, unsigned long parent, u32 classid) { - return prio_get(sch, classid); + return prio_find(sch, classid); } -static void prio_put(struct Qdisc *q, unsigned long cl) +static void prio_unbind(struct Qdisc *q, unsigned long cl) { } @@ -338,12 +339,11 @@ static struct tcf_block *prio_tcf_block(struct Qdisc *sch, unsigned long cl) static const struct Qdisc_class_ops prio_class_ops = { .graft = prio_graft, .leaf = prio_leaf, - .get = prio_get, - .put = prio_put, + .find = prio_find, .walk = prio_walk, .tcf_block = prio_tcf_block, .bind_tcf = prio_bind, - .unbind_tcf = prio_put, + .unbind_tcf = prio_unbind, .dump = prio_dump_class, .dump_stats = prio_dump_class_stats, }; diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 0e16dfda0bd7..6962b37a3ad3 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -132,7 +132,6 @@ struct qfq_aggregate; struct qfq_class { struct Qdisc_class_common common; - unsigned int refcnt; unsigned int filter_cnt; struct gnet_stats_basic_packed bstats; @@ -477,7 +476,6 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (cl == NULL) return -ENOBUFS; - cl->refcnt = 1; cl->common.classid = classid; cl->deficit = lmax; @@ -555,32 +553,15 @@ static int qfq_delete_class(struct Qdisc *sch, unsigned long arg) qfq_purge_queue(cl); qdisc_class_hash_remove(&q->clhash, &cl->common); - BUG_ON(--cl->refcnt == 0); - /* - * This shouldn't happen: we "hold" one cops->get() when called - * from tc_ctl_tclass; the destroy method is done from cops->put(). - */ - sch_tree_unlock(sch); - return 0; -} - -static unsigned long qfq_get_class(struct Qdisc *sch, u32 classid) -{ - struct qfq_class *cl = qfq_find_class(sch, classid); - if (cl != NULL) - cl->refcnt++; - - return (unsigned long)cl; + qfq_destroy_class(sch, cl); + return 0; } -static void qfq_put_class(struct Qdisc *sch, unsigned long arg) +static unsigned long qfq_search_class(struct Qdisc *sch, u32 classid) { - struct qfq_class *cl = (struct qfq_class *)arg; - - if (--cl->refcnt == 0) - qfq_destroy_class(sch, cl); + return (unsigned long)qfq_find_class(sch, classid); } static struct tcf_block *qfq_tcf_block(struct Qdisc *sch, unsigned long cl) @@ -728,6 +709,7 @@ static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch, case TC_ACT_STOLEN: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + /* fall through */ case TC_ACT_SHOT: return NULL; } @@ -1234,7 +1216,7 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (cl == NULL) { if (err & __NET_XMIT_BYPASS) qdisc_qstats_drop(sch); - kfree_skb(skb); + __qdisc_drop(skb, to_free); return err; } pr_debug("qfq_enqueue: cl = %x\n", cl->common.classid); @@ -1428,8 +1410,7 @@ static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg) struct qfq_sched *q = qdisc_priv(sch); struct qfq_class *cl = (struct qfq_class *)arg; - if (cl->qdisc->q.qlen == 0) - qfq_deactivate_class(q, cl); + qfq_deactivate_class(q, cl); } static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt) @@ -1439,7 +1420,7 @@ static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt) int i, j, err; u32 max_cl_shift, maxbudg_shift, max_classes; - err = tcf_block_get(&q->block, &q->filter_list); + err = tcf_block_get(&q->block, &q->filter_list, sch); if (err) return err; @@ -1511,8 +1492,7 @@ static void qfq_destroy_qdisc(struct Qdisc *sch) static const struct Qdisc_class_ops qfq_class_ops = { .change = qfq_change_class, .delete = qfq_delete_class, - .get = qfq_get_class, - .put = qfq_put_class, + .find = qfq_search_class, .tcf_block = qfq_tcf_block, .bind_tcf = qfq_bind_tcf, .unbind_tcf = qfq_unbind_tcf, diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 11292adce412..7f8ea9e297c3 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -19,6 +19,7 @@ #include <linux/kernel.h> #include <linux/skbuff.h> #include <net/pkt_sched.h> +#include <net/pkt_cls.h> #include <net/inet_ecn.h> #include <net/red.h> @@ -40,6 +41,7 @@ struct red_sched_data { u32 limit; /* HARD maximal queue length */ unsigned char flags; struct timer_list adapt_timer; + struct Qdisc *sch; struct red_parms parms; struct red_vars vars; struct red_stats stats; @@ -147,11 +149,37 @@ static void red_reset(struct Qdisc *sch) red_restart(&q->vars); } +static int red_offload(struct Qdisc *sch, bool enable) +{ + struct red_sched_data *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + struct tc_red_qopt_offload opt = { + .handle = sch->handle, + .parent = sch->parent, + }; + + if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) + return -EOPNOTSUPP; + + if (enable) { + opt.command = TC_RED_REPLACE; + opt.set.min = q->parms.qth_min >> q->parms.Wlog; + opt.set.max = q->parms.qth_max >> q->parms.Wlog; + opt.set.probability = q->parms.max_P; + opt.set.is_ecn = red_use_ecn(q); + } else { + opt.command = TC_RED_DESTROY; + } + + return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt); +} + static void red_destroy(struct Qdisc *sch) { struct red_sched_data *q = qdisc_priv(sch); del_timer_sync(&q->adapt_timer); + red_offload(sch, false); qdisc_destroy(q->qdisc); } @@ -218,13 +246,14 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt) red_start_of_idle_period(&q->vars); sch_tree_unlock(sch); + red_offload(sch, true); return 0; } -static inline void red_adaptative_timer(unsigned long arg) +static inline void red_adaptative_timer(struct timer_list *t) { - struct Qdisc *sch = (struct Qdisc *)arg; - struct red_sched_data *q = qdisc_priv(sch); + struct red_sched_data *q = from_timer(q, t, adapt_timer); + struct Qdisc *sch = q->sch; spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); spin_lock(root_lock); @@ -238,10 +267,40 @@ static int red_init(struct Qdisc *sch, struct nlattr *opt) struct red_sched_data *q = qdisc_priv(sch); q->qdisc = &noop_qdisc; - setup_timer(&q->adapt_timer, red_adaptative_timer, (unsigned long)sch); + q->sch = sch; + timer_setup(&q->adapt_timer, red_adaptative_timer, 0); return red_change(sch, opt); } +static int red_dump_offload(struct Qdisc *sch, struct tc_red_qopt *opt) +{ + struct net_device *dev = qdisc_dev(sch); + struct tc_red_qopt_offload hw_stats = { + .command = TC_RED_STATS, + .handle = sch->handle, + .parent = sch->parent, + { + .stats.bstats = &sch->bstats, + .stats.qstats = &sch->qstats, + }, + }; + int err; + + opt->flags &= ~TC_RED_OFFLOADED; + if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) + return 0; + + err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, + &hw_stats); + if (err == -EOPNOTSUPP) + return 0; + + if (!err) + opt->flags |= TC_RED_OFFLOADED; + + return err; +} + static int red_dump(struct Qdisc *sch, struct sk_buff *skb) { struct red_sched_data *q = qdisc_priv(sch); @@ -255,8 +314,13 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb) .Plog = q->parms.Plog, .Scell_log = q->parms.Scell_log, }; + int err; sch->qstats.backlog = q->qdisc->qstats.backlog; + err = red_dump_offload(sch, &opt); + if (err) + goto nla_put_failure; + opts = nla_nest_start(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; @@ -273,6 +337,7 @@ nla_put_failure: static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d) { struct red_sched_data *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); struct tc_red_xstats st = { .early = q->stats.prob_drop + q->stats.forced_drop, .pdrop = q->stats.pdrop, @@ -280,6 +345,26 @@ static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d) .marked = q->stats.prob_mark + q->stats.forced_mark, }; + if (tc_can_offload(dev) && dev->netdev_ops->ndo_setup_tc) { + struct red_stats hw_stats = {0}; + struct tc_red_qopt_offload hw_stats_request = { + .command = TC_RED_XSTATS, + .handle = sch->handle, + .parent = sch->parent, + { + .xstats = &hw_stats, + }, + }; + if (!dev->netdev_ops->ndo_setup_tc(dev, + TC_SETUP_QDISC_RED, + &hw_stats_request)) { + st.early += hw_stats.prob_drop + hw_stats.forced_drop; + st.pdrop += hw_stats.pdrop; + st.other += hw_stats.other; + st.marked += hw_stats.prob_mark + hw_stats.forced_mark; + } + } + return gnet_stats_copy_app(d, &st, sizeof(st)); } @@ -311,15 +396,11 @@ static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg) return q->qdisc; } -static unsigned long red_get(struct Qdisc *sch, u32 classid) +static unsigned long red_find(struct Qdisc *sch, u32 classid) { return 1; } -static void red_put(struct Qdisc *sch, unsigned long arg) -{ -} - static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker) { if (!walker->stop) { @@ -335,8 +416,7 @@ static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker) static const struct Qdisc_class_ops red_class_ops = { .graft = red_graft, .leaf = red_leaf, - .get = red_get, - .put = red_put, + .find = red_find, .walk = red_walk, .dump = red_dump_class, }; diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index 11fb6ec878d6..0678debdd856 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -268,6 +268,7 @@ static bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl, case TC_ACT_QUEUED: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + /* fall through */ case TC_ACT_SHOT: return false; } @@ -553,7 +554,7 @@ static int sfb_init(struct Qdisc *sch, struct nlattr *opt) struct sfb_sched_data *q = qdisc_priv(sch); int err; - err = tcf_block_get(&q->block, &q->filter_list); + err = tcf_block_get(&q->block, &q->filter_list, sch); if (err) return err; @@ -632,12 +633,12 @@ static struct Qdisc *sfb_leaf(struct Qdisc *sch, unsigned long arg) return q->qdisc; } -static unsigned long sfb_get(struct Qdisc *sch, u32 classid) +static unsigned long sfb_find(struct Qdisc *sch, u32 classid) { return 1; } -static void sfb_put(struct Qdisc *sch, unsigned long arg) +static void sfb_unbind(struct Qdisc *sch, unsigned long arg) { } @@ -683,14 +684,13 @@ static unsigned long sfb_bind(struct Qdisc *sch, unsigned long parent, static const struct Qdisc_class_ops sfb_class_ops = { .graft = sfb_graft, .leaf = sfb_leaf, - .get = sfb_get, - .put = sfb_put, + .find = sfb_find, .change = sfb_change_class, .delete = sfb_delete, .walk = sfb_walk, .tcf_block = sfb_tcf_block, .bind_tcf = sfb_bind, - .unbind_tcf = sfb_put, + .unbind_tcf = sfb_unbind, .dump = sfb_dump_class, }; diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index f80ea2cc5f1f..890f4a4564e7 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -145,6 +145,7 @@ struct sfq_sched_data { int perturb_period; unsigned int quantum; /* Allotment per round: MUST BE >= MTU */ struct timer_list perturb_timer; + struct Qdisc *sch; }; /* @@ -189,6 +190,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, case TC_ACT_QUEUED: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + /* fall through */ case TC_ACT_SHOT: return 0; } @@ -292,7 +294,7 @@ static inline void slot_queue_add(struct sfq_slot *slot, struct sk_buff *skb) slot->skblist_prev = skb; } -static unsigned int sfq_drop(struct Qdisc *sch) +static unsigned int sfq_drop(struct Qdisc *sch, struct sk_buff **to_free) { struct sfq_sched_data *q = qdisc_priv(sch); sfq_index x, d = q->cur_depth; @@ -310,9 +312,8 @@ drop: slot->backlog -= len; sfq_dec(q, x); sch->q.qlen--; - qdisc_qstats_drop(sch); qdisc_qstats_backlog_dec(sch, skb); - kfree_skb(skb); + qdisc_drop(skb, sch, to_free); return len; } @@ -360,7 +361,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) if (hash == 0) { if (ret & __NET_XMIT_BYPASS) qdisc_qstats_drop(sch); - kfree_skb(skb); + __qdisc_drop(skb, to_free); return ret; } hash--; @@ -437,6 +438,7 @@ congestion_drop: qdisc_drop(head, sch, to_free); slot_queue_add(slot, skb); + qdisc_tree_reduce_backlog(sch, 0, delta); return NET_XMIT_CN; } @@ -464,12 +466,14 @@ enqueue: return NET_XMIT_SUCCESS; qlen = slot->qlen; - dropped = sfq_drop(sch); + dropped = sfq_drop(sch, to_free); /* Return Congestion Notification only if we dropped a packet * from this flow. */ - if (qlen != slot->qlen) + if (qlen != slot->qlen) { + qdisc_tree_reduce_backlog(sch, 0, dropped - qdisc_pkt_len(skb)); return NET_XMIT_CN; + } /* As we dropped a packet, better let upper stack know this */ qdisc_tree_reduce_backlog(sch, 1, dropped); @@ -602,10 +606,10 @@ drop: qdisc_tree_reduce_backlog(sch, dropped, drop_len); } -static void sfq_perturbation(unsigned long arg) +static void sfq_perturbation(struct timer_list *t) { - struct Qdisc *sch = (struct Qdisc *)arg; - struct sfq_sched_data *q = qdisc_priv(sch); + struct sfq_sched_data *q = from_timer(q, t, perturb_timer); + struct Qdisc *sch = q->sch; spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); spin_lock(root_lock); @@ -625,6 +629,8 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) struct tc_sfq_qopt_v1 *ctl_v1 = NULL; unsigned int qlen, dropped = 0; struct red_parms *p = NULL; + struct sk_buff *to_free = NULL; + struct sk_buff *tail = NULL; if (opt->nla_len < nla_attr_size(sizeof(*ctl))) return -EINVAL; @@ -671,8 +677,13 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) } qlen = sch->q.qlen; - while (sch->q.qlen > q->limit) - dropped += sfq_drop(sch); + while (sch->q.qlen > q->limit) { + dropped += sfq_drop(sch, &to_free); + if (!tail) + tail = to_free; + } + + rtnl_kfree_skbs(to_free, tail); qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped); del_timer(&q->perturb_timer); @@ -713,13 +724,12 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) int i; int err; - err = tcf_block_get(&q->block, &q->filter_list); + timer_setup(&q->perturb_timer, sfq_perturbation, TIMER_DEFERRABLE); + + err = tcf_block_get(&q->block, &q->filter_list, sch); if (err) return err; - setup_deferrable_timer(&q->perturb_timer, sfq_perturbation, - (unsigned long)sch); - for (i = 0; i < SFQ_MAX_DEPTH + 1; i++) { q->dep[i].next = i + SFQ_MAX_FLOWS; q->dep[i].prev = i + SFQ_MAX_FLOWS; @@ -805,7 +815,7 @@ static struct Qdisc *sfq_leaf(struct Qdisc *sch, unsigned long arg) return NULL; } -static unsigned long sfq_get(struct Qdisc *sch, u32 classid) +static unsigned long sfq_find(struct Qdisc *sch, u32 classid) { return 0; } @@ -818,7 +828,7 @@ static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent, return 0; } -static void sfq_put(struct Qdisc *q, unsigned long cl) +static void sfq_unbind(struct Qdisc *q, unsigned long cl) { } @@ -882,11 +892,10 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg) static const struct Qdisc_class_ops sfq_class_ops = { .leaf = sfq_leaf, - .get = sfq_get, - .put = sfq_put, + .find = sfq_find, .tcf_block = sfq_tcf_block, .bind_tcf = sfq_bind, - .unbind_tcf = sfq_put, + .unbind_tcf = sfq_unbind, .dump = sfq_dump_class, .dump_stats = sfq_dump_class_stats, .walk = sfq_walk, diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index b2e4b6ad241a..120f4f365967 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -425,12 +425,13 @@ static int tbf_init(struct Qdisc *sch, struct nlattr *opt) { struct tbf_sched_data *q = qdisc_priv(sch); + qdisc_watchdog_init(&q->watchdog, sch); + q->qdisc = &noop_qdisc; + if (opt == NULL) return -EINVAL; q->t_c = ktime_get_ns(); - qdisc_watchdog_init(&q->watchdog, sch); - q->qdisc = &noop_qdisc; return tbf_change(sch, opt); } @@ -510,15 +511,11 @@ static struct Qdisc *tbf_leaf(struct Qdisc *sch, unsigned long arg) return q->qdisc; } -static unsigned long tbf_get(struct Qdisc *sch, u32 classid) +static unsigned long tbf_find(struct Qdisc *sch, u32 classid) { return 1; } -static void tbf_put(struct Qdisc *sch, unsigned long arg) -{ -} - static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker) { if (!walker->stop) { @@ -534,8 +531,7 @@ static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker) static const struct Qdisc_class_ops tbf_class_ops = { .graft = tbf_graft, .leaf = tbf_leaf, - .get = tbf_get, - .put = tbf_put, + .find = tbf_find, .walk = tbf_walk, .dump = tbf_dump_class, }; |