diff options
Diffstat (limited to 'net/sched')
48 files changed, 734 insertions, 1142 deletions
diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 336774a535c3..47ec2305f920 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -224,8 +224,8 @@ int tcf_hash_search(struct tc_action_net *tn, struct tc_action *a, u32 index) } EXPORT_SYMBOL(tcf_hash_search); -int tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action *a, - int bind) +bool tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action *a, + int bind) { struct tcf_hashinfo *hinfo = tn->hinfo; struct tcf_common *p = NULL; @@ -235,9 +235,9 @@ int tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action *a, p->tcfc_refcnt++; a->priv = p; a->hinfo = hinfo; - return 1; + return true; } - return 0; + return false; } EXPORT_SYMBOL(tcf_hash_check); @@ -283,10 +283,11 @@ err2: p->tcfc_index = index ? index : tcf_hash_new_index(tn); p->tcfc_tm.install = jiffies; p->tcfc_tm.lastuse = jiffies; + p->tcfc_tm.firstuse = 0; if (est) { err = gen_new_estimator(&p->tcfc_bstats, p->cpu_bstats, &p->tcfc_rate_est, - &p->tcfc_lock, est); + &p->tcfc_lock, NULL, est); if (err) { free_percpu(p->cpu_qstats); goto err2; @@ -503,8 +504,8 @@ nla_put_failure: } EXPORT_SYMBOL(tcf_action_dump_1); -int -tcf_action_dump(struct sk_buff *skb, struct list_head *actions, int bind, int ref) +int tcf_action_dump(struct sk_buff *skb, struct list_head *actions, + int bind, int ref) { struct tc_action *a; int err = -EINVAL; @@ -670,7 +671,7 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a, if (err < 0) goto errout; - if (gnet_stats_copy_basic(&d, p->cpu_bstats, &p->tcfc_bstats) < 0 || + if (gnet_stats_copy_basic(NULL, &d, p->cpu_bstats, &p->tcfc_bstats) < 0 || gnet_stats_copy_rate_est(&d, &p->tcfc_bstats, &p->tcfc_rate_est) < 0 || gnet_stats_copy_queue(&d, p->cpu_qstats, @@ -687,9 +688,9 @@ errout: return -1; } -static int -tca_get_fill(struct sk_buff *skb, struct list_head *actions, u32 portid, u32 seq, - u16 flags, int event, int bind, int ref) +static int tca_get_fill(struct sk_buff *skb, struct list_head *actions, + u32 portid, u32 seq, u16 flags, int event, int bind, + int ref) { struct tcamsg *t; struct nlmsghdr *nlh; @@ -730,7 +731,8 @@ act_get_notify(struct net *net, u32 portid, struct nlmsghdr *n, skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; - if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, event, 0, 0) <= 0) { + if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, event, + 0, 0) <= 0) { kfree_skb(skb); return -EINVAL; } @@ -838,7 +840,8 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, if (a.ops == NULL) /*some idjot trying to flush unknown action */ goto err_out; - nlh = nlmsg_put(skb, portid, n->nlmsg_seq, RTM_DELACTION, sizeof(*t), 0); + nlh = nlmsg_put(skb, portid, n->nlmsg_seq, RTM_DELACTION, + sizeof(*t), 0); if (!nlh) goto out_module_put; t = nlmsg_data(nlh); @@ -1001,7 +1004,8 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n) u32 portid = skb ? NETLINK_CB(skb).portid : 0; int ret = 0, ovr = 0; - if ((n->nlmsg_type != RTM_GETACTION) && !netlink_capable(skb, CAP_NET_ADMIN)) + if ((n->nlmsg_type != RTM_GETACTION) && + !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL); @@ -1118,7 +1122,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) nla_nest_end(skb, nest); ret = skb->len; } else - nla_nest_cancel(skb, nest); + nlmsg_trim(skb, b); nlh->nlmsg_len = skb_tail_pointer(skb) - b; if (NETLINK_CB(cb->skb).portid && ret) diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index c7123e01c2ca..ef74bffa6101 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -154,10 +154,7 @@ static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *act, if (ret) goto nla_put_failure; - tm.install = jiffies_to_clock_t(jiffies - prog->tcf_tm.install); - tm.lastuse = jiffies_to_clock_t(jiffies - prog->tcf_tm.lastuse); - tm.expires = jiffies_to_clock_t(prog->tcf_tm.expires); - + tcf_tm_dump(&tm, &prog->tcf_tm); if (nla_put_64bit(skb, TCA_ACT_BPF_TM, sizeof(tm), &tm, TCA_ACT_BPF_PAD)) goto nla_put_failure; @@ -172,7 +169,8 @@ nla_put_failure: static const struct nla_policy act_bpf_policy[TCA_ACT_BPF_MAX + 1] = { [TCA_ACT_BPF_PARMS] = { .len = sizeof(struct tc_act_bpf) }, [TCA_ACT_BPF_FD] = { .type = NLA_U32 }, - [TCA_ACT_BPF_NAME] = { .type = NLA_NUL_STRING, .len = ACT_BPF_NAME_LEN }, + [TCA_ACT_BPF_NAME] = { .type = NLA_NUL_STRING, + .len = ACT_BPF_NAME_LEN }, [TCA_ACT_BPF_OPS_LEN] = { .type = NLA_U16 }, [TCA_ACT_BPF_OPS] = { .type = NLA_BINARY, .len = sizeof(struct sock_filter) * BPF_MAXINSNS }, @@ -225,15 +223,10 @@ static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg) bpf_fd = nla_get_u32(tb[TCA_ACT_BPF_FD]); - fp = bpf_prog_get(bpf_fd); + fp = bpf_prog_get_type(bpf_fd, BPF_PROG_TYPE_SCHED_ACT); if (IS_ERR(fp)) return PTR_ERR(fp); - if (fp->type != BPF_PROG_TYPE_SCHED_ACT) { - bpf_prog_put(fp); - return -EINVAL; - } - if (tb[TCA_ACT_BPF_NAME]) { name = kmemdup(nla_data(tb[TCA_ACT_BPF_NAME]), nla_len(tb[TCA_ACT_BPF_NAME]), diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 2ba700c765e0..35a5270f289d 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -44,7 +44,7 @@ static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a, int proto; spin_lock(&ca->tcf_lock); - ca->tcf_tm.lastuse = jiffies; + tcf_lastuse_update(&ca->tcf_tm); bstats_update(&ca->tcf_bstats, skb); if (skb->protocol == htons(ETH_P_IP)) { @@ -160,9 +160,7 @@ static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a, if (nla_put(skb, TCA_CONNMARK_PARMS, sizeof(opt), &opt)) goto nla_put_failure; - t.install = jiffies_to_clock_t(jiffies - ci->tcf_tm.install); - t.lastuse = jiffies_to_clock_t(jiffies - ci->tcf_tm.lastuse); - t.expires = jiffies_to_clock_t(ci->tcf_tm.expires); + tcf_tm_dump(&t, &ci->tcf_tm); if (nla_put_64bit(skb, TCA_CONNMARK_TM, sizeof(t), &t, TCA_CONNMARK_PAD)) goto nla_put_failure; diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 28e934ed038a..dcd9ababd351 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -501,7 +501,7 @@ static int tcf_csum(struct sk_buff *skb, u32 update_flags; spin_lock(&p->tcf_lock); - p->tcf_tm.lastuse = jiffies; + tcf_lastuse_update(&p->tcf_tm); bstats_update(&p->tcf_bstats, skb); action = p->tcf_action; update_flags = p->update_flags; @@ -546,9 +546,8 @@ static int tcf_csum_dump(struct sk_buff *skb, if (nla_put(skb, TCA_CSUM_PARMS, sizeof(opt), &opt)) goto nla_put_failure; - t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install); - t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse); - t.expires = jiffies_to_clock_t(p->tcf_tm.expires); + + tcf_tm_dump(&t, &p->tcf_tm); if (nla_put_64bit(skb, TCA_CSUM_TM, sizeof(t), &t, TCA_CSUM_PAD)) goto nla_put_failure; diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index ec5cc8435238..19058a7f3e5c 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -162,7 +162,8 @@ static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets, tm->lastuse = lastuse; } -static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) +static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_gact *gact = a->priv; @@ -188,9 +189,7 @@ static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int goto nla_put_failure; } #endif - t.install = jiffies_to_clock_t(jiffies - gact->tcf_tm.install); - t.lastuse = jiffies_to_clock_t(jiffies - gact->tcf_tm.lastuse); - t.expires = jiffies_to_clock_t(gact->tcf_tm.expires); + tcf_tm_dump(&t, &gact->tcf_tm); if (nla_put_64bit(skb, TCA_GACT_TM, sizeof(t), &t, TCA_GACT_PAD)) goto nla_put_failure; return skb->len; diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 658046dfe02d..845ab5119c05 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -106,9 +106,9 @@ int ife_get_meta_u16(struct sk_buff *skb, struct tcf_meta_info *mi) } EXPORT_SYMBOL_GPL(ife_get_meta_u16); -int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval) +int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval, gfp_t gfp) { - mi->metaval = kmemdup(metaval, sizeof(u32), GFP_KERNEL); + mi->metaval = kmemdup(metaval, sizeof(u32), gfp); if (!mi->metaval) return -ENOMEM; @@ -116,9 +116,9 @@ int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval) } EXPORT_SYMBOL_GPL(ife_alloc_meta_u32); -int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval) +int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval, gfp_t gfp) { - mi->metaval = kmemdup(metaval, sizeof(u16), GFP_KERNEL); + mi->metaval = kmemdup(metaval, sizeof(u16), gfp); if (!mi->metaval) return -ENOMEM; @@ -240,10 +240,10 @@ static int ife_validate_metatype(struct tcf_meta_ops *ops, void *val, int len) } /* called when adding new meta information - * under ife->tcf_lock + * under ife->tcf_lock for existing action */ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid, - void *val, int len) + void *val, int len, bool exists) { struct tcf_meta_ops *ops = find_ife_oplist(metaid); int ret = 0; @@ -251,11 +251,13 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid, if (!ops) { ret = -ENOENT; #ifdef CONFIG_MODULES - spin_unlock_bh(&ife->tcf_lock); + if (exists) + spin_unlock_bh(&ife->tcf_lock); rtnl_unlock(); request_module("ifemeta%u", metaid); rtnl_lock(); - spin_lock_bh(&ife->tcf_lock); + if (exists) + spin_lock_bh(&ife->tcf_lock); ops = find_ife_oplist(metaid); #endif } @@ -272,10 +274,10 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid, } /* called when adding new meta information - * under ife->tcf_lock + * under ife->tcf_lock for existing action */ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, - int len) + int len, bool atomic) { struct tcf_meta_info *mi = NULL; struct tcf_meta_ops *ops = find_ife_oplist(metaid); @@ -284,7 +286,7 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, if (!ops) return -ENOENT; - mi = kzalloc(sizeof(*mi), GFP_KERNEL); + mi = kzalloc(sizeof(*mi), atomic ? GFP_ATOMIC : GFP_KERNEL); if (!mi) { /*put back what find_ife_oplist took */ module_put(ops->owner); @@ -294,7 +296,7 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, mi->metaid = metaid; mi->ops = ops; if (len > 0) { - ret = ops->alloc(mi, metaval); + ret = ops->alloc(mi, metaval, atomic ? GFP_ATOMIC : GFP_KERNEL); if (ret != 0) { kfree(mi); module_put(ops->owner); @@ -313,11 +315,13 @@ static int use_all_metadata(struct tcf_ife_info *ife) int rc = 0; int installed = 0; + read_lock(&ife_mod_lock); list_for_each_entry(o, &ifeoplist, list) { - rc = add_metainfo(ife, o->metaid, NULL, 0); + rc = add_metainfo(ife, o->metaid, NULL, 0, true); if (rc == 0) installed += 1; } + read_unlock(&ife_mod_lock); if (installed) return 0; @@ -385,8 +389,9 @@ static void tcf_ife_cleanup(struct tc_action *a, int bind) spin_unlock_bh(&ife->tcf_lock); } -/* under ife->tcf_lock */ -static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb) +/* under ife->tcf_lock for existing action */ +static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, + bool exists) { int len = 0; int rc = 0; @@ -398,11 +403,11 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb) val = nla_data(tb[i]); len = nla_len(tb[i]); - rc = load_metaops_and_vet(ife, i, val, len); + rc = load_metaops_and_vet(ife, i, val, len, exists); if (rc != 0) return rc; - rc = add_metainfo(ife, i, val, len); + rc = add_metainfo(ife, i, val, len, exists); if (rc) return rc; } @@ -423,7 +428,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, u16 ife_type = 0; u8 *daddr = NULL; u8 *saddr = NULL; - int ret = 0, exists = 0; + bool exists = false; + int ret = 0; int err; err = nla_parse_nested(tb, TCA_IFE_MAX, nla, ife_policy); @@ -474,7 +480,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, saddr = nla_data(tb[TCA_IFE_SMAC]); } - spin_lock_bh(&ife->tcf_lock); + if (exists) + spin_lock_bh(&ife->tcf_lock); ife->tcf_action = parm->action; if (parm->flags & IFE_ENCODE) { @@ -504,11 +511,12 @@ metadata_parse_err: if (ret == ACT_P_CREATED) _tcf_ife_cleanup(a, bind); - spin_unlock_bh(&ife->tcf_lock); + if (exists) + spin_unlock_bh(&ife->tcf_lock); return err; } - err = populate_metalist(ife, tb2); + err = populate_metalist(ife, tb2, exists); if (err) goto metadata_parse_err; @@ -523,12 +531,14 @@ metadata_parse_err: if (ret == ACT_P_CREATED) _tcf_ife_cleanup(a, bind); - spin_unlock_bh(&ife->tcf_lock); + if (exists) + spin_unlock_bh(&ife->tcf_lock); return err; } } - spin_unlock_bh(&ife->tcf_lock); + if (exists) + spin_unlock_bh(&ife->tcf_lock); if (ret == ACT_P_CREATED) tcf_hash_insert(tn, a); @@ -553,9 +563,7 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind, if (nla_put(skb, TCA_IFE_PARMS, sizeof(opt), &opt)) goto nla_put_failure; - t.install = jiffies_to_clock_t(jiffies - ife->tcf_tm.install); - t.lastuse = jiffies_to_clock_t(jiffies - ife->tcf_tm.lastuse); - t.expires = jiffies_to_clock_t(ife->tcf_tm.expires); + tcf_tm_dump(&t, &ife->tcf_tm); if (nla_put_64bit(skb, TCA_IFE_TM, sizeof(t), &t, TCA_IFE_PAD)) goto nla_put_failure; @@ -623,7 +631,7 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a, spin_lock(&ife->tcf_lock); bstats_update(&ife->tcf_bstats, skb); - ife->tcf_tm.lastuse = jiffies; + tcf_lastuse_update(&ife->tcf_tm); spin_unlock(&ife->tcf_lock); ifehdrln = ntohs(ifehdrln); @@ -711,7 +719,7 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, spin_lock(&ife->tcf_lock); bstats_update(&ife->tcf_bstats, skb); - ife->tcf_tm.lastuse = jiffies; + tcf_lastuse_update(&ife->tcf_tm); if (!metalen) { /* no metadata to send */ /* abuse overlimits to count when we allow packet @@ -802,7 +810,7 @@ static int tcf_ife_act(struct sk_buff *skb, const struct tc_action *a, pr_info_ratelimited("unknown failure(policy neither de/encode\n"); spin_lock(&ife->tcf_lock); bstats_update(&ife->tcf_bstats, skb); - ife->tcf_tm.lastuse = jiffies; + tcf_lastuse_update(&ife->tcf_tm); ife->tcf_qstats.drops++; spin_unlock(&ife->tcf_lock); diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 9f002ada7074..b8c50600697a 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -34,7 +34,8 @@ static int ipt_net_id; static int xt_net_id; -static int ipt_init_target(struct xt_entry_target *t, char *table, unsigned int hook) +static int ipt_init_target(struct xt_entry_target *t, char *table, + unsigned int hook) { struct xt_tgchk_param par; struct xt_target *target; @@ -96,7 +97,8 @@ static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla, struct tcf_ipt *ipt; struct xt_entry_target *td, *t; char *tname; - int ret = 0, err, exists = 0; + bool exists = false; + int ret = 0, err; u32 hook = 0; u32 index = 0; @@ -121,10 +123,13 @@ static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla, } td = (struct xt_entry_target *)nla_data(tb[TCA_IPT_TARG]); - if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size) + if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size) { + if (exists) + tcf_hash_release(a, bind); return -EINVAL; + } - if (!tcf_hash_check(tn, index, a, bind)) { + if (!exists) { ret = tcf_hash_create(tn, index, est, a, sizeof(*ipt), bind, false); if (ret) @@ -212,7 +217,7 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a, spin_lock(&ipt->tcf_lock); - ipt->tcf_tm.lastuse = jiffies; + tcf_lastuse_update(&ipt->tcf_tm); bstats_update(&ipt->tcf_bstats, skb); /* yes, we have to worry about both in and out dev @@ -242,7 +247,7 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a, default: net_notice_ratelimited("tc filter: Bogus netfilter code %d assume ACCEPT\n", ret); - result = TC_POLICE_OK; + result = TC_ACT_OK; break; } spin_unlock(&ipt->tcf_lock); @@ -250,7 +255,8 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a, } -static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) +static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, + int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_ipt *ipt = a->priv; @@ -277,11 +283,11 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int nla_put(skb, TCA_IPT_CNT, sizeof(struct tc_cnt), &c) || nla_put_string(skb, TCA_IPT_TABLE, ipt->tcfi_tname)) goto nla_put_failure; - tm.install = jiffies_to_clock_t(jiffies - ipt->tcf_tm.install); - tm.lastuse = jiffies_to_clock_t(jiffies - ipt->tcf_tm.lastuse); - tm.expires = jiffies_to_clock_t(ipt->tcf_tm.expires); + + tcf_tm_dump(&tm, &ipt->tcf_tm); if (nla_put_64bit(skb, TCA_IPT_TM, sizeof(tm), &tm, TCA_IPT_PAD)) goto nla_put_failure; + kfree(t); return skb->len; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 128942bc9e42..5b135d357e1e 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -62,7 +62,8 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, struct tc_mirred *parm; struct tcf_mirred *m; struct net_device *dev; - int ret, ok_push = 0, exists = 0; + int ret, ok_push = 0; + bool exists = false; if (nla == NULL) return -EINVAL; @@ -157,7 +158,6 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, u32 at; tcf_lastuse_update(&m->tcf_tm); - bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb); rcu_read_lock(); @@ -219,9 +219,8 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, i if (nla_put(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt)) goto nla_put_failure; - t.install = jiffies_to_clock_t(jiffies - m->tcf_tm.install); - t.lastuse = jiffies_to_clock_t(jiffies - m->tcf_tm.lastuse); - t.expires = jiffies_to_clock_t(m->tcf_tm.expires); + + tcf_tm_dump(&t, &m->tcf_tm); if (nla_put_64bit(skb, TCA_MIRRED_TM, sizeof(t), &t, TCA_MIRRED_PAD)) goto nla_put_failure; return skb->len; diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index c0a879f940de..06ccb03f25da 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -103,7 +103,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a, spin_lock(&p->tcf_lock); - p->tcf_tm.lastuse = jiffies; + tcf_lastuse_update(&p->tcf_tm); old_addr = p->old_addr; new_addr = p->new_addr; mask = p->mask; @@ -264,9 +264,8 @@ static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a, if (nla_put(skb, TCA_NAT_PARMS, sizeof(opt), &opt)) goto nla_put_failure; - t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install); - t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse); - t.expires = jiffies_to_clock_t(p->tcf_tm.expires); + + tcf_tm_dump(&t, &p->tcf_tm); if (nla_put_64bit(skb, TCA_NAT_TM, sizeof(t), &t, TCA_NAT_PAD)) goto nla_put_failure; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index c6e18f230af6..82d3c1479029 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -121,7 +121,7 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a, spin_lock(&p->tcf_lock); - p->tcf_tm.lastuse = jiffies; + tcf_lastuse_update(&p->tcf_tm); if (p->tcfp_nkeys > 0) { struct tc_pedit_key *tkey = p->tcfp_keys; @@ -200,11 +200,11 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a, if (nla_put(skb, TCA_PEDIT_PARMS, s, opt)) goto nla_put_failure; - t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install); - t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse); - t.expires = jiffies_to_clock_t(p->tcf_tm.expires); + + tcf_tm_dump(&t, &p->tcf_tm); if (nla_put_64bit(skb, TCA_PEDIT_TM, sizeof(t), &t, TCA_PEDIT_PAD)) goto nla_put_failure; + kfree(opt); return skb->len; diff --git a/net/sched/act_police.c b/net/sched/act_police.c index b884dae692a1..1e8ede3955f4 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -38,7 +38,7 @@ struct tcf_police { bool peak_present; }; #define to_police(pc) \ - container_of(pc, struct tcf_police, common) + container_of(pc->priv, struct tcf_police, common) #define POL_TAB_MASK 15 @@ -115,18 +115,16 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = { [TCA_POLICE_RESULT] = { .type = NLA_U32 }, }; -static int tcf_act_police_locate(struct net *net, struct nlattr *nla, - struct nlattr *est, struct tc_action *a, - int ovr, int bind) +static int tcf_act_police_init(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action *a, + int ovr, int bind) { - unsigned int h; int ret = 0, err; struct nlattr *tb[TCA_POLICE_MAX + 1]; struct tc_police *parm; struct tcf_police *police; struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL; struct tc_action_net *tn = net_generic(net, police_net_id); - struct tcf_hashinfo *hinfo = tn->hinfo; int size; if (nla == NULL) @@ -145,7 +143,7 @@ static int tcf_act_police_locate(struct net *net, struct nlattr *nla, if (parm->index) { if (tcf_hash_search(tn, a, parm->index)) { - police = to_police(a->priv); + police = to_police(a); if (bind) { police->tcf_bindcnt += 1; police->tcf_refcnt += 1; @@ -156,16 +154,15 @@ static int tcf_act_police_locate(struct net *net, struct nlattr *nla, /* not replacing */ return -EEXIST; } + } else { + ret = tcf_hash_create(tn, parm->index, NULL, a, + sizeof(*police), bind, false); + if (ret) + return ret; + ret = ACT_P_CREATED; } - police = kzalloc(sizeof(*police), GFP_KERNEL); - if (police == NULL) - return -ENOMEM; - ret = ACT_P_CREATED; - police->tcf_refcnt = 1; - spin_lock_init(&police->tcf_lock); - if (bind) - police->tcf_bindcnt = 1; + police = to_police(a); override: if (parm->rate.rate) { err = -ENOMEM; @@ -185,7 +182,8 @@ override: if (est) { err = gen_replace_estimator(&police->tcf_bstats, NULL, &police->tcf_rate_est, - &police->tcf_lock, est); + &police->tcf_lock, + NULL, est); if (err) goto failure_unlock; } else if (tb[TCA_POLICE_AVRATE] && @@ -237,16 +235,8 @@ override: return ret; police->tcfp_t_c = ktime_get_ns(); - police->tcf_index = parm->index ? parm->index : - tcf_hash_new_index(tn); - police->tcf_tm.install = jiffies; - police->tcf_tm.lastuse = jiffies; - h = tcf_hash(police->tcf_index, POL_TAB_MASK); - spin_lock_bh(&hinfo->lock); - hlist_add_head(&police->tcf_head, &hinfo->htab[h]); - spin_unlock_bh(&hinfo->lock); + tcf_hash_insert(tn, a); - a->priv = police; return ret; failure_unlock: @@ -255,7 +245,7 @@ failure: qdisc_put_rtab(P_tab); qdisc_put_rtab(R_tab); if (ret == ACT_P_CREATED) - kfree(police); + tcf_hash_cleanup(a, est); return err; } @@ -347,6 +337,7 @@ tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) t.install = jiffies_to_clock_t(jiffies - police->tcf_tm.install); t.lastuse = jiffies_to_clock_t(jiffies - police->tcf_tm.lastuse); + t.firstuse = jiffies_to_clock_t(jiffies - police->tcf_tm.firstuse); t.expires = jiffies_to_clock_t(police->tcf_tm.expires); if (nla_put_64bit(skb, TCA_POLICE_TM, sizeof(t), &t, TCA_POLICE_PAD)) goto nla_put_failure; @@ -375,7 +366,7 @@ static struct tc_action_ops act_police_ops = { .owner = THIS_MODULE, .act = tcf_act_police, .dump = tcf_act_police_dump, - .init = tcf_act_police_locate, + .init = tcf_act_police_init, .walk = tcf_act_police_walker, .lookup = tcf_police_search, }; diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index e42f8daca147..318328d34d12 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -35,7 +35,7 @@ static int tcf_simp(struct sk_buff *skb, const struct tc_action *a, struct tcf_defact *d = a->priv; spin_lock(&d->tcf_lock); - d->tcf_tm.lastuse = jiffies; + tcf_lastuse_update(&d->tcf_tm); bstats_update(&d->tcf_bstats, skb); /* print policy string followed by _ then packet count @@ -86,8 +86,9 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, struct nlattr *tb[TCA_DEF_MAX + 1]; struct tc_defact *parm; struct tcf_defact *d; + bool exists = false; + int ret = 0, err; char *defdata; - int ret = 0, err, exists = 0; if (nla == NULL) return -EINVAL; @@ -158,9 +159,8 @@ static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a, if (nla_put(skb, TCA_DEF_PARMS, sizeof(opt), &opt) || nla_put_string(skb, TCA_DEF_DATA, d->tcfd_defdata)) goto nla_put_failure; - t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install); - t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse); - t.expires = jiffies_to_clock_t(d->tcf_tm.expires); + + tcf_tm_dump(&t, &d->tcf_tm); if (nla_put_64bit(skb, TCA_DEF_TM, sizeof(t), &t, TCA_DEF_PAD)) goto nla_put_failure; return skb->len; diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index e928802966bc..8e573c0f8742 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -37,7 +37,7 @@ static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a, struct tcf_skbedit *d = a->priv; spin_lock(&d->tcf_lock); - d->tcf_tm.lastuse = jiffies; + tcf_lastuse_update(&d->tcf_tm); bstats_update(&d->tcf_bstats, skb); if (d->flags & SKBEDIT_F_PRIORITY) @@ -47,6 +47,8 @@ static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a, skb_set_queue_mapping(skb, d->queue_mapping); if (d->flags & SKBEDIT_F_MARK) skb->mark = d->mark; + if (d->flags & SKBEDIT_F_PTYPE) + skb->pkt_type = d->ptype; spin_unlock(&d->tcf_lock); return d->tcf_action; @@ -57,6 +59,7 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = { [TCA_SKBEDIT_PRIORITY] = { .len = sizeof(u32) }, [TCA_SKBEDIT_QUEUE_MAPPING] = { .len = sizeof(u16) }, [TCA_SKBEDIT_MARK] = { .len = sizeof(u32) }, + [TCA_SKBEDIT_PTYPE] = { .len = sizeof(u16) }, }; static int tcf_skbedit_init(struct net *net, struct nlattr *nla, @@ -68,8 +71,9 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, struct tc_skbedit *parm; struct tcf_skbedit *d; u32 flags = 0, *priority = NULL, *mark = NULL; - u16 *queue_mapping = NULL; - int ret = 0, err, exists = 0; + u16 *queue_mapping = NULL, *ptype = NULL; + bool exists = false; + int ret = 0, err; if (nla == NULL) return -EINVAL; @@ -91,6 +95,13 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, queue_mapping = nla_data(tb[TCA_SKBEDIT_QUEUE_MAPPING]); } + if (tb[TCA_SKBEDIT_PTYPE] != NULL) { + ptype = nla_data(tb[TCA_SKBEDIT_PTYPE]); + if (!skb_pkt_type_ok(*ptype)) + return -EINVAL; + flags |= SKBEDIT_F_PTYPE; + } + if (tb[TCA_SKBEDIT_MARK] != NULL) { flags |= SKBEDIT_F_MARK; mark = nla_data(tb[TCA_SKBEDIT_MARK]); @@ -131,6 +142,8 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, d->queue_mapping = *queue_mapping; if (flags & SKBEDIT_F_MARK) d->mark = *mark; + if (flags & SKBEDIT_F_PTYPE) + d->ptype = *ptype; d->tcf_action = parm->action; @@ -157,20 +170,19 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a, if (nla_put(skb, TCA_SKBEDIT_PARMS, sizeof(opt), &opt)) goto nla_put_failure; if ((d->flags & SKBEDIT_F_PRIORITY) && - nla_put(skb, TCA_SKBEDIT_PRIORITY, sizeof(d->priority), - &d->priority)) + nla_put_u32(skb, TCA_SKBEDIT_PRIORITY, d->priority)) goto nla_put_failure; if ((d->flags & SKBEDIT_F_QUEUE_MAPPING) && - nla_put(skb, TCA_SKBEDIT_QUEUE_MAPPING, - sizeof(d->queue_mapping), &d->queue_mapping)) + nla_put_u16(skb, TCA_SKBEDIT_QUEUE_MAPPING, d->queue_mapping)) goto nla_put_failure; if ((d->flags & SKBEDIT_F_MARK) && - nla_put(skb, TCA_SKBEDIT_MARK, sizeof(d->mark), - &d->mark)) + nla_put_u32(skb, TCA_SKBEDIT_MARK, d->mark)) goto nla_put_failure; - t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install); - t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse); - t.expires = jiffies_to_clock_t(d->tcf_tm.expires); + if ((d->flags & SKBEDIT_F_PTYPE) && + nla_put_u16(skb, TCA_SKBEDIT_PTYPE, d->ptype)) + goto nla_put_failure; + + tcf_tm_dump(&t, &d->tcf_tm); if (nla_put_64bit(skb, TCA_SKBEDIT_TM, sizeof(t), &t, TCA_SKBEDIT_PAD)) goto nla_put_failure; return skb->len; diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index ac4adc812c12..db9b7ed570ba 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -31,7 +31,7 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, int err; spin_lock(&v->tcf_lock); - v->tcf_tm.lastuse = jiffies; + tcf_lastuse_update(&v->tcf_tm); bstats_update(&v->tcf_bstats, skb); action = v->tcf_action; @@ -77,8 +77,8 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, int action; __be16 push_vid = 0; __be16 push_proto = 0; - int ret = 0, exists = 0; - int err; + bool exists = false; + int ret = 0, err; if (!nla) return -EINVAL; @@ -179,12 +179,11 @@ static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a, if (v->tcfv_action == TCA_VLAN_ACT_PUSH && (nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, v->tcfv_push_vid) || - nla_put_be16(skb, TCA_VLAN_PUSH_VLAN_PROTOCOL, v->tcfv_push_proto))) + nla_put_be16(skb, TCA_VLAN_PUSH_VLAN_PROTOCOL, + v->tcfv_push_proto))) goto nla_put_failure; - t.install = jiffies_to_clock_t(jiffies - v->tcf_tm.install); - t.lastuse = jiffies_to_clock_t(jiffies - v->tcf_tm.lastuse); - t.expires = jiffies_to_clock_t(v->tcf_tm.expires); + tcf_tm_dump(&t, &v->tcf_tm); if (nla_put_64bit(skb, TCA_VLAN_TM, sizeof(t), &t, TCA_VLAN_PAD)) goto nla_put_failure; return skb->len; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index a75864d93142..843a716a4303 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -103,6 +103,17 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb, struct nlmsghdr *n, struct tcf_proto *tp, unsigned long fh, int event); +static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb, + struct nlmsghdr *n, + struct tcf_proto __rcu **chain, int event) +{ + struct tcf_proto __rcu **it_chain; + struct tcf_proto *tp; + + for (it_chain = chain; (tp = rtnl_dereference(*it_chain)) != NULL; + it_chain = &tp->next) + tfilter_notify(net, oskb, n, tp, 0, event); +} /* Select new prio value from the range, managed by kernel. */ @@ -156,11 +167,23 @@ replay: cl = 0; if (prio == 0) { - /* If no priority is given, user wants we allocated it. */ - if (n->nlmsg_type != RTM_NEWTFILTER || - !(n->nlmsg_flags & NLM_F_CREATE)) + switch (n->nlmsg_type) { + case RTM_DELTFILTER: + if (protocol || t->tcm_handle || tca[TCA_KIND]) + return -ENOENT; + break; + case RTM_NEWTFILTER: + /* If no priority is provided by the user, + * we allocate one. + */ + if (n->nlmsg_flags & NLM_F_CREATE) { + prio = TC_H_MAKE(0x80000000U, 0U); + break; + } + /* fall-through */ + default: return -ENOENT; - prio = TC_H_MAKE(0x80000000U, 0U); + } } /* Find head of filter chain. */ @@ -200,6 +223,12 @@ replay: err = -EINVAL; if (chain == NULL) goto errout; + if (n->nlmsg_type == RTM_DELTFILTER && prio == 0) { + tfilter_notify_chain(net, skb, n, chain, RTM_DELTFILTER); + tcf_destroy_chain(chain); + err = 0; + goto errout; + } /* Check the chain for existence of proto-tcf with this priority */ for (back = chain; @@ -351,8 +380,9 @@ errout: return err; } -static int tcf_fill_node(struct net *net, struct sk_buff *skb, struct tcf_proto *tp, - unsigned long fh, u32 portid, u32 seq, u16 flags, int event) +static int tcf_fill_node(struct net *net, struct sk_buff *skb, + struct tcf_proto *tp, unsigned long fh, u32 portid, + u32 seq, u16 flags, int event) { struct tcmsg *tcm; struct nlmsghdr *nlh; @@ -474,9 +504,11 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) TC_H_MIN(tcm->tcm_info) != tp->protocol) continue; if (t > s_t) - memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); + memset(&cb->args[1], 0, + sizeof(cb->args)-sizeof(cb->args[0])); if (cb->args[1] == 0) { - if (tcf_fill_node(net, skb, tp, 0, NETLINK_CB(cb->skb).portid, + if (tcf_fill_node(net, skb, tp, 0, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER) <= 0) break; diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 7b342c779da7..c3002c2c68bb 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -272,15 +272,10 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog, bpf_fd = nla_get_u32(tb[TCA_BPF_FD]); - fp = bpf_prog_get(bpf_fd); + fp = bpf_prog_get_type(bpf_fd, BPF_PROG_TYPE_SCHED_CLS); if (IS_ERR(fp)) return PTR_ERR(fp); - if (fp->type != BPF_PROG_TYPE_SCHED_CLS) { - bpf_prog_put(fp); - return -EINVAL; - } - if (tb[TCA_BPF_NAME]) { name = kmemdup(nla_data(tb[TCA_BPF_NAME]), nla_len(tb[TCA_BPF_NAME]), diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 730aacafc22d..5060801a2f6d 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -66,6 +66,7 @@ struct cls_fl_filter { struct fl_flow_key key; struct list_head list; u32 handle; + u32 flags; struct rcu_head rcu; }; @@ -123,6 +124,9 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct fl_flow_key skb_key; struct fl_flow_key skb_mkey; + if (!atomic_read(&head->ht.nelems)) + return -1; + fl_clear_masked_range(&skb_key, &head->mask); skb_key.indev_ifindex = skb->skb_iif; /* skb_flow_dissect() does not set n_proto in case an unknown protocol, @@ -136,7 +140,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, f = rhashtable_lookup_fast(&head->ht, fl_key_get_start(&skb_mkey, &head->mask), head->ht_params); - if (f) { + if (f && !tc_skip_sw(f->flags)) { *res = f->res; return tcf_exts_exec(skb, &f->exts, res); } @@ -171,7 +175,7 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, unsigned long cookie) struct tc_cls_flower_offload offload = {0}; struct tc_to_netdev tc; - if (!tc_should_offload(dev, 0)) + if (!tc_should_offload(dev, tp, 0)) return; offload.command = TC_CLSFLOWER_DESTROY; @@ -183,19 +187,20 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, unsigned long cookie) dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc); } -static void fl_hw_replace_filter(struct tcf_proto *tp, - struct flow_dissector *dissector, - struct fl_flow_key *mask, - struct fl_flow_key *key, - struct tcf_exts *actions, - unsigned long cookie, u32 flags) +static int fl_hw_replace_filter(struct tcf_proto *tp, + struct flow_dissector *dissector, + struct fl_flow_key *mask, + struct fl_flow_key *key, + struct tcf_exts *actions, + unsigned long cookie, u32 flags) { struct net_device *dev = tp->q->dev_queue->dev; struct tc_cls_flower_offload offload = {0}; struct tc_to_netdev tc; + int err; - if (!tc_should_offload(dev, flags)) - return; + if (!tc_should_offload(dev, tp, flags)) + return tc_skip_sw(flags) ? -EINVAL : 0; offload.command = TC_CLSFLOWER_REPLACE; offload.cookie = cookie; @@ -207,7 +212,12 @@ static void fl_hw_replace_filter(struct tcf_proto *tp, tc.type = TC_SETUP_CLSFLOWER; tc.cls_flower = &offload; - dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc); + err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc); + + if (tc_skip_sw(flags)) + return err; + + return 0; } static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) @@ -216,7 +226,7 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) struct tc_cls_flower_offload offload = {0}; struct tc_to_netdev tc; - if (!tc_should_offload(dev, 0)) + if (!tc_should_offload(dev, tp, 0)) return; offload.command = TC_CLSFLOWER_STATS; @@ -524,7 +534,6 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, struct cls_fl_filter *fnew; struct nlattr *tb[TCA_FLOWER_MAX + 1]; struct fl_flow_mask mask = {}; - u32 flags = 0; int err; if (!tca[TCA_OPTIONS]) @@ -552,8 +561,14 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, } fnew->handle = handle; - if (tb[TCA_FLOWER_FLAGS]) - flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]); + if (tb[TCA_FLOWER_FLAGS]) { + fnew->flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]); + + if (!tc_flags_valid(fnew->flags)) { + err = -EINVAL; + goto errout; + } + } err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr); if (err) @@ -563,19 +578,23 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, if (err) goto errout; - err = rhashtable_insert_fast(&head->ht, &fnew->ht_node, - head->ht_params); + if (!tc_skip_sw(fnew->flags)) { + err = rhashtable_insert_fast(&head->ht, &fnew->ht_node, + head->ht_params); + if (err) + goto errout; + } + + err = fl_hw_replace_filter(tp, + &head->dissector, + &mask.key, + &fnew->key, + &fnew->exts, + (unsigned long)fnew, + fnew->flags); if (err) goto errout; - fl_hw_replace_filter(tp, - &head->dissector, - &mask.key, - &fnew->key, - &fnew->exts, - (unsigned long)fnew, - flags); - if (fold) { rhashtable_remove_fast(&head->ht, &fold->ht_node, head->ht_params); @@ -734,6 +753,8 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, sizeof(key->tp.dst)))) goto nla_put_failure; + nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags); + if (tcf_exts_dump(skb, &f->exts)) goto nla_put_failure; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 079b43b3c5d2..ffe593efe930 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -440,7 +440,7 @@ static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle) offload.type = TC_SETUP_CLSU32; offload.cls_u32 = &u32_offload; - if (tc_should_offload(dev, 0)) { + if (tc_should_offload(dev, tp, 0)) { offload.cls_u32->command = TC_CLSU32_DELETE_KNODE; offload.cls_u32->knode.handle = handle; dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, @@ -457,20 +457,21 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_to_netdev offload; int err; + if (!tc_should_offload(dev, tp, flags)) + return tc_skip_sw(flags) ? -EINVAL : 0; + offload.type = TC_SETUP_CLSU32; offload.cls_u32 = &u32_offload; - if (tc_should_offload(dev, flags)) { - offload.cls_u32->command = TC_CLSU32_NEW_HNODE; - offload.cls_u32->hnode.divisor = h->divisor; - offload.cls_u32->hnode.handle = h->handle; - offload.cls_u32->hnode.prio = h->prio; + offload.cls_u32->command = TC_CLSU32_NEW_HNODE; + offload.cls_u32->hnode.divisor = h->divisor; + offload.cls_u32->hnode.handle = h->handle; + offload.cls_u32->hnode.prio = h->prio; - err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->protocol, &offload); - if (tc_skip_sw(flags)) - return err; - } + err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, + tp->protocol, &offload); + if (tc_skip_sw(flags)) + return err; return 0; } @@ -484,7 +485,7 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) offload.type = TC_SETUP_CLSU32; offload.cls_u32 = &u32_offload; - if (tc_should_offload(dev, 0)) { + if (tc_should_offload(dev, tp, 0)) { offload.cls_u32->command = TC_CLSU32_DELETE_HNODE; offload.cls_u32->hnode.divisor = h->divisor; offload.cls_u32->hnode.handle = h->handle; @@ -507,27 +508,28 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, offload.type = TC_SETUP_CLSU32; offload.cls_u32 = &u32_offload; - if (tc_should_offload(dev, flags)) { - offload.cls_u32->command = TC_CLSU32_REPLACE_KNODE; - offload.cls_u32->knode.handle = n->handle; - offload.cls_u32->knode.fshift = n->fshift; + if (!tc_should_offload(dev, tp, flags)) + return tc_skip_sw(flags) ? -EINVAL : 0; + + offload.cls_u32->command = TC_CLSU32_REPLACE_KNODE; + offload.cls_u32->knode.handle = n->handle; + offload.cls_u32->knode.fshift = n->fshift; #ifdef CONFIG_CLS_U32_MARK - offload.cls_u32->knode.val = n->val; - offload.cls_u32->knode.mask = n->mask; + offload.cls_u32->knode.val = n->val; + offload.cls_u32->knode.mask = n->mask; #else - offload.cls_u32->knode.val = 0; - offload.cls_u32->knode.mask = 0; + offload.cls_u32->knode.val = 0; + offload.cls_u32->knode.mask = 0; #endif - offload.cls_u32->knode.sel = &n->sel; - offload.cls_u32->knode.exts = &n->exts; - if (n->ht_down) - offload.cls_u32->knode.link_handle = n->ht_down->handle; - - err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->protocol, &offload); - if (tc_skip_sw(flags)) - return err; - } + offload.cls_u32->knode.sel = &n->sel; + offload.cls_u32->knode.exts = &n->exts; + if (n->ht_down) + offload.cls_u32->knode.link_handle = n->ht_down->handle; + + err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, + tp->protocol, &offload); + if (tc_skip_sw(flags)) + return err; return 0; } @@ -863,7 +865,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, if (tb[TCA_U32_FLAGS]) { flags = nla_get_u32(tb[TCA_U32_FLAGS]); if (!tc_flags_valid(flags)) - return err; + return -EINVAL; } n = (struct tc_u_knode *)*arg; @@ -921,11 +923,17 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, ht->divisor = divisor; ht->handle = handle; ht->prio = tp->prio; + + err = u32_replace_hw_hnode(tp, ht, flags); + if (err) { + kfree(ht); + return err; + } + RCU_INIT_POINTER(ht->next, tp_c->hlist); rcu_assign_pointer(tp_c->hlist, ht); *arg = (unsigned long)ht; - u32_replace_hw_hnode(tp, ht, flags); return 0; } diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index ddf047df5361..12ebde845523 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -95,8 +95,6 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb, Expected action: do not backoff, but wait until queue will clear. NET_XMIT_CN - probably this packet enqueued, but another one dropped. Expected action: backoff or ignore - NET_XMIT_POLICED - dropped by police. - Expected action: backoff or error to real-time apps. Auxiliary routines: @@ -583,7 +581,6 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) timer); rcu_read_lock(); - qdisc_unthrottled(wd->qdisc); __netif_schedule(qdisc_root(wd->qdisc)); rcu_read_unlock(); @@ -598,15 +595,12 @@ void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc) } EXPORT_SYMBOL(qdisc_watchdog_init); -void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires, bool throttle) +void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires) { if (test_bit(__QDISC_STATE_DEACTIVATED, &qdisc_root_sleeping(wd->qdisc)->state)) return; - if (throttle) - qdisc_throttled(wd->qdisc); - if (wd->last_expires == expires) return; @@ -620,7 +614,6 @@ EXPORT_SYMBOL(qdisc_watchdog_schedule_ns); void qdisc_watchdog_cancel(struct qdisc_watchdog *wd) { hrtimer_cancel(&wd->timer); - qdisc_unthrottled(wd->qdisc); } EXPORT_SYMBOL(qdisc_watchdog_cancel); @@ -982,7 +975,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, rcu_assign_pointer(sch->stab, stab); } if (tca[TCA_RATE]) { - spinlock_t *root_lock; + seqcount_t *running; err = -EOPNOTSUPP; if (sch->flags & TCQ_F_MQROOT) @@ -991,14 +984,15 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, if ((sch->parent != TC_H_ROOT) && !(sch->flags & TCQ_F_INGRESS) && (!p || !(p->flags & TCQ_F_MQROOT))) - root_lock = qdisc_root_sleeping_lock(sch); + running = qdisc_root_sleeping_running(sch); else - root_lock = qdisc_lock(sch); + running = &sch->running; err = gen_new_estimator(&sch->bstats, sch->cpu_bstats, &sch->rate_est, - root_lock, + NULL, + running, tca[TCA_RATE]); if (err) goto err_out4; @@ -1061,7 +1055,8 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) gen_replace_estimator(&sch->bstats, sch->cpu_bstats, &sch->rate_est, - qdisc_root_sleeping_lock(sch), + NULL, + qdisc_root_sleeping_running(sch), tca[TCA_RATE]); } out: @@ -1369,8 +1364,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, goto nla_put_failure; if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, - qdisc_root_sleeping_lock(q), &d, - TCA_PAD) < 0) + NULL, &d, TCA_PAD) < 0) goto nla_put_failure; if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0) @@ -1381,7 +1375,8 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, cpu_qstats = q->cpu_qstats; } - if (gnet_stats_copy_basic(&d, cpu_bstats, &q->bstats) < 0 || + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q), + &d, cpu_bstats, &q->bstats) < 0 || gnet_stats_copy_rate_est(&d, &q->bstats, &q->rate_est) < 0 || gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0) goto nla_put_failure; @@ -1684,8 +1679,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, goto nla_put_failure; if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, - qdisc_root_sleeping_lock(q), &d, - TCA_PAD) < 0) + NULL, &d, TCA_PAD) < 0) goto nla_put_failure; if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0) diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 1911af3ca7c0..481e4f12aeb4 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -357,16 +357,17 @@ static struct tcf_proto __rcu **atm_tc_find_tcf(struct Qdisc *sch, /* --------------------------- Qdisc operations ---------------------------- */ -static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch) +static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) { struct atm_qdisc_data *p = qdisc_priv(sch); struct atm_flow_data *flow; struct tcf_result res; int result; - int ret = NET_XMIT_POLICED; + int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; pr_debug("atm_tc_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p); - result = TC_POLICE_OK; /* be nice to gcc */ + result = TC_ACT_OK; /* be nice to gcc */ flow = NULL; if (TC_H_MAJ(skb->priority) != sch->handle || !(flow = (struct atm_flow_data *)atm_tc_get(sch, skb->priority))) { @@ -398,12 +399,12 @@ done: switch (result) { case TC_ACT_QUEUED: case TC_ACT_STOLEN: - kfree_skb(skb); + __qdisc_drop(skb, to_free); return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: - kfree_skb(skb); + __qdisc_drop(skb, to_free); goto drop; - case TC_POLICE_RECLASSIFY: + case TC_ACT_RECLASSIFY: if (flow->excess) flow = flow->excess; else @@ -413,7 +414,7 @@ done: #endif } - ret = qdisc_enqueue(skb, flow->q); + ret = qdisc_enqueue(skb, flow->q, to_free); if (ret != NET_XMIT_SUCCESS) { drop: __maybe_unused if (net_xmit_drop_count(ret)) { @@ -519,20 +520,6 @@ static struct sk_buff *atm_tc_peek(struct Qdisc *sch) return p->link.q->ops->peek(p->link.q); } -static unsigned int atm_tc_drop(struct Qdisc *sch) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow; - unsigned int len; - - pr_debug("atm_tc_drop(sch %p,[qdisc %p])\n", sch, p); - list_for_each_entry(flow, &p->flows, list) { - if (flow->q->ops->drop && (len = flow->q->ops->drop(flow->q))) - return len; - } - return 0; -} - static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt) { struct atm_qdisc_data *p = qdisc_priv(sch); @@ -637,7 +624,8 @@ atm_tc_dump_class_stats(struct Qdisc *sch, unsigned long arg, { struct atm_flow_data *flow = (struct atm_flow_data *)arg; - if (gnet_stats_copy_basic(d, NULL, &flow->bstats) < 0 || + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), + d, NULL, &flow->bstats) < 0 || gnet_stats_copy_queue(d, NULL, &flow->qstats, flow->q->q.qlen) < 0) return -1; @@ -671,7 +659,6 @@ static struct Qdisc_ops atm_qdisc_ops __read_mostly = { .enqueue = atm_tc_enqueue, .dequeue = atm_tc_dequeue, .peek = atm_tc_peek, - .drop = atm_tc_drop, .init = atm_tc_init, .reset = atm_tc_reset, .destroy = atm_tc_destroy, diff --git a/net/sched/sch_blackhole.c b/net/sched/sch_blackhole.c index 3fee70d9814f..c98a61e980ba 100644 --- a/net/sched/sch_blackhole.c +++ b/net/sched/sch_blackhole.c @@ -17,9 +17,10 @@ #include <linux/skbuff.h> #include <net/pkt_sched.h> -static int blackhole_enqueue(struct sk_buff *skb, struct Qdisc *sch) +static int blackhole_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) { - qdisc_drop(skb, sch); + qdisc_drop(skb, sch, to_free); return NET_XMIT_SUCCESS; } diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index baafddf229ce..beb554aa8cfb 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -80,10 +80,6 @@ struct cbq_class { unsigned char priority; /* class priority */ unsigned char priority2; /* priority to be used after overlimit */ unsigned char ewma_log; /* time constant for idle time calculation */ - unsigned char ovl_strategy; -#ifdef CONFIG_NET_CLS_ACT - unsigned char police; -#endif u32 defmap; @@ -94,10 +90,6 @@ struct cbq_class { u32 avpkt; struct qdisc_rate_table *R_tab; - /* Overlimit strategy parameters */ - void (*overlimit)(struct cbq_class *cl); - psched_tdiff_t penalty; - /* General scheduler (WRR) parameters */ long allot; long quantum; /* Allotment per WRR round */ @@ -353,7 +345,7 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl) { int toplevel = q->toplevel; - if (toplevel > cl->level && !(qdisc_is_throttled(cl->q))) { + if (toplevel > cl->level) { psched_time_t now = psched_get_time(); do { @@ -366,7 +358,8 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl) } static int -cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch) +cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) { struct cbq_sched_data *q = qdisc_priv(sch); int uninitialized_var(ret); @@ -378,14 +371,11 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (cl == NULL) { if (ret & __NET_XMIT_BYPASS) qdisc_qstats_drop(sch); - kfree_skb(skb); + __qdisc_drop(skb, to_free); return ret; } -#ifdef CONFIG_NET_CLS_ACT - cl->q->__parent = sch; -#endif - ret = qdisc_enqueue(skb, cl->q); + ret = qdisc_enqueue(skb, cl->q, to_free); if (ret == NET_XMIT_SUCCESS) { sch->q.qlen++; cbq_mark_toplevel(q, cl); @@ -402,11 +392,8 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch) return ret; } -/* Overlimit actions */ - -/* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */ - -static void cbq_ovl_classic(struct cbq_class *cl) +/* Overlimit action: penalize leaf class by adding offtime */ +static void cbq_overlimit(struct cbq_class *cl) { struct cbq_sched_data *q = qdisc_priv(cl->qdisc); psched_tdiff_t delay = cl->undertime - q->now; @@ -456,99 +443,6 @@ static void cbq_ovl_classic(struct cbq_class *cl) } } -/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when - * they go overlimit - */ - -static void cbq_ovl_rclassic(struct cbq_class *cl) -{ - struct cbq_sched_data *q = qdisc_priv(cl->qdisc); - struct cbq_class *this = cl; - - do { - if (cl->level > q->toplevel) { - cl = NULL; - break; - } - } while ((cl = cl->borrow) != NULL); - - if (cl == NULL) - cl = this; - cbq_ovl_classic(cl); -} - -/* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */ - -static void cbq_ovl_delay(struct cbq_class *cl) -{ - struct cbq_sched_data *q = qdisc_priv(cl->qdisc); - psched_tdiff_t delay = cl->undertime - q->now; - - if (test_bit(__QDISC_STATE_DEACTIVATED, - &qdisc_root_sleeping(cl->qdisc)->state)) - return; - - if (!cl->delayed) { - psched_time_t sched = q->now; - ktime_t expires; - - delay += cl->offtime; - if (cl->avgidle < 0) - delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log); - if (cl->avgidle < cl->minidle) - cl->avgidle = cl->minidle; - cl->undertime = q->now + delay; - - if (delay > 0) { - sched += delay + cl->penalty; - cl->penalized = sched; - cl->cpriority = TC_CBQ_MAXPRIO; - q->pmask |= (1<<TC_CBQ_MAXPRIO); - - expires = ns_to_ktime(PSCHED_TICKS2NS(sched)); - if (hrtimer_try_to_cancel(&q->delay_timer) && - ktime_to_ns(ktime_sub( - hrtimer_get_expires(&q->delay_timer), - expires)) > 0) - hrtimer_set_expires(&q->delay_timer, expires); - hrtimer_restart(&q->delay_timer); - cl->delayed = 1; - cl->xstats.overactions++; - return; - } - delay = 1; - } - if (q->wd_expires == 0 || q->wd_expires > delay) - q->wd_expires = delay; -} - -/* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */ - -static void cbq_ovl_lowprio(struct cbq_class *cl) -{ - struct cbq_sched_data *q = qdisc_priv(cl->qdisc); - - cl->penalized = q->now + cl->penalty; - - if (cl->cpriority != cl->priority2) { - cl->cpriority = cl->priority2; - q->pmask |= (1<<cl->cpriority); - cl->xstats.overactions++; - } - cbq_ovl_classic(cl); -} - -/* TC_CBQ_OVL_DROP: penalize class by dropping */ - -static void cbq_ovl_drop(struct cbq_class *cl) -{ - if (cl->q->ops->drop) - if (cl->q->ops->drop(cl->q)) - cl->qdisc->q.qlen--; - cl->xstats.overactions++; - cbq_ovl_classic(cl); -} - static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio, psched_time_t now) { @@ -620,45 +514,10 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer) hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS_PINNED); } - qdisc_unthrottled(sch); __netif_schedule(qdisc_root(sch)); return HRTIMER_NORESTART; } -#ifdef CONFIG_NET_CLS_ACT -static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child) -{ - struct Qdisc *sch = child->__parent; - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl = q->rx_class; - - q->rx_class = NULL; - - if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) { - int ret; - - cbq_mark_toplevel(q, cl); - - q->rx_class = cl; - cl->q->__parent = sch; - - ret = qdisc_enqueue(skb, cl->q); - if (ret == NET_XMIT_SUCCESS) { - sch->q.qlen++; - if (!cl->next_alive) - cbq_activate_class(cl); - return 0; - } - if (net_xmit_drop_count(ret)) - qdisc_qstats_drop(sch); - return 0; - } - - qdisc_qstats_drop(sch); - return -1; -} -#endif - /* * It is mission critical procedure. * @@ -807,7 +666,7 @@ cbq_under_limit(struct cbq_class *cl) cl = cl->borrow; if (!cl) { this_cl->qstats.overlimits++; - this_cl->overlimit(this_cl); + cbq_overlimit(this_cl); return NULL; } if (cl->level > q->toplevel) @@ -960,7 +819,6 @@ cbq_dequeue(struct Qdisc *sch) if (skb) { qdisc_bstats_update(sch, skb); sch->q.qlen--; - qdisc_unthrottled(sch); return skb; } @@ -1166,31 +1024,6 @@ static void cbq_link_class(struct cbq_class *this) } } -static unsigned int cbq_drop(struct Qdisc *sch) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl, *cl_head; - int prio; - unsigned int len; - - for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) { - cl_head = q->active[prio]; - if (!cl_head) - continue; - - cl = cl_head; - do { - if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) { - sch->q.qlen--; - if (!cl->q->q.qlen) - cbq_deactivate_class(cl); - return len; - } - } while ((cl = cl->next_alive) != cl_head); - } - return 0; -} - static void cbq_reset(struct Qdisc *sch) { @@ -1280,50 +1113,6 @@ static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr) return 0; } -static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl) -{ - switch (ovl->strategy) { - case TC_CBQ_OVL_CLASSIC: - cl->overlimit = cbq_ovl_classic; - break; - case TC_CBQ_OVL_DELAY: - cl->overlimit = cbq_ovl_delay; - break; - case TC_CBQ_OVL_LOWPRIO: - if (ovl->priority2 - 1 >= TC_CBQ_MAXPRIO || - ovl->priority2 - 1 <= cl->priority) - return -EINVAL; - cl->priority2 = ovl->priority2 - 1; - cl->overlimit = cbq_ovl_lowprio; - break; - case TC_CBQ_OVL_DROP: - cl->overlimit = cbq_ovl_drop; - break; - case TC_CBQ_OVL_RCLASSIC: - cl->overlimit = cbq_ovl_rclassic; - break; - default: - return -EINVAL; - } - cl->penalty = ovl->penalty; - return 0; -} - -#ifdef CONFIG_NET_CLS_ACT -static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p) -{ - cl->police = p->police; - - if (cl->q->handle) { - if (p->police == TC_POLICE_RECLASSIFY) - cl->q->reshape_fail = cbq_reshape_fail; - else - cl->q->reshape_fail = NULL; - } - return 0; -} -#endif - static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt) { cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange); @@ -1375,8 +1164,6 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) q->link.priority = TC_CBQ_MAXPRIO - 1; q->link.priority2 = TC_CBQ_MAXPRIO - 1; q->link.cpriority = TC_CBQ_MAXPRIO - 1; - q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC; - q->link.overlimit = cbq_ovl_classic; q->link.allot = psched_mtu(qdisc_dev(sch)); q->link.quantum = q->link.allot; q->link.weight = q->link.R_tab->rate.rate; @@ -1463,24 +1250,6 @@ nla_put_failure: return -1; } -static int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl) -{ - unsigned char *b = skb_tail_pointer(skb); - struct tc_cbq_ovl opt; - - opt.strategy = cl->ovl_strategy; - opt.priority2 = cl->priority2 + 1; - opt.pad = 0; - opt.penalty = cl->penalty; - if (nla_put(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt)) - goto nla_put_failure; - return skb->len; - -nla_put_failure: - nlmsg_trim(skb, b); - return -1; -} - static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl) { unsigned char *b = skb_tail_pointer(skb); @@ -1500,36 +1269,11 @@ nla_put_failure: return -1; } -#ifdef CONFIG_NET_CLS_ACT -static int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl) -{ - unsigned char *b = skb_tail_pointer(skb); - struct tc_cbq_police opt; - - if (cl->police) { - opt.police = cl->police; - opt.__res1 = 0; - opt.__res2 = 0; - if (nla_put(skb, TCA_CBQ_POLICE, sizeof(opt), &opt)) - goto nla_put_failure; - } - return skb->len; - -nla_put_failure: - nlmsg_trim(skb, b); - return -1; -} -#endif - static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl) { if (cbq_dump_lss(skb, cl) < 0 || cbq_dump_rate(skb, cl) < 0 || cbq_dump_wrr(skb, cl) < 0 || - cbq_dump_ovl(skb, cl) < 0 || -#ifdef CONFIG_NET_CLS_ACT - cbq_dump_police(skb, cl) < 0 || -#endif cbq_dump_fopt(skb, cl) < 0) return -1; return 0; @@ -1600,7 +1344,8 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg, if (cl->undertime != PSCHED_PASTPERFECT) cl->xstats.undertime = cl->undertime - q->now; - if (gnet_stats_copy_basic(d, NULL, &cl->bstats) < 0 || + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), + d, NULL, &cl->bstats) < 0 || gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 || gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->q->q.qlen) < 0) return -1; @@ -1618,11 +1363,6 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, &pfifo_qdisc_ops, cl->common.classid); if (new == NULL) return -ENOBUFS; - } else { -#ifdef CONFIG_NET_CLS_ACT - if (cl->police == TC_POLICE_RECLASSIFY) - new->reshape_fail = cbq_reshape_fail; -#endif } *old = qdisc_replace(sch, new, &cl->q); @@ -1735,6 +1475,9 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t if (err < 0) return err; + if (tb[TCA_CBQ_OVL_STRATEGY] || tb[TCA_CBQ_POLICE]) + return -EOPNOTSUPP; + if (cl) { /* Check parent */ if (parentid) { @@ -1755,7 +1498,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t if (tca[TCA_RATE]) { err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est, - qdisc_root_sleeping_lock(sch), + NULL, + qdisc_root_sleeping_running(sch), tca[TCA_RATE]); if (err) { qdisc_put_rtab(rtab); @@ -1782,14 +1526,6 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT])); } - if (tb[TCA_CBQ_OVL_STRATEGY]) - cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY])); - -#ifdef CONFIG_NET_CLS_ACT - if (tb[TCA_CBQ_POLICE]) - cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE])); -#endif - if (tb[TCA_CBQ_FOPT]) cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT])); @@ -1848,7 +1584,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t if (tca[TCA_RATE]) { err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est, - qdisc_root_sleeping_lock(sch), + NULL, + qdisc_root_sleeping_running(sch), tca[TCA_RATE]); if (err) { kfree(cl); @@ -1884,13 +1621,6 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t cl->maxidle = q->link.maxidle; if (cl->avpkt == 0) cl->avpkt = q->link.avpkt; - cl->overlimit = cbq_ovl_classic; - if (tb[TCA_CBQ_OVL_STRATEGY]) - cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY])); -#ifdef CONFIG_NET_CLS_ACT - if (tb[TCA_CBQ_POLICE]) - cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE])); -#endif if (tb[TCA_CBQ_FOPT]) cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT])); sch_tree_unlock(sch); @@ -2035,7 +1765,6 @@ static struct Qdisc_ops cbq_qdisc_ops __read_mostly = { .enqueue = cbq_enqueue, .dequeue = cbq_dequeue, .peek = qdisc_peek_dequeued, - .drop = cbq_drop, .init = cbq_init, .reset = cbq_reset, .destroy = cbq_destroy, diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index 0a08c860eee4..3b6d5bd69101 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -115,7 +115,8 @@ static void choke_zap_tail_holes(struct choke_sched_data *q) } /* Drop packet from queue array by creating a "hole" */ -static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx) +static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx, + struct sk_buff **to_free) { struct choke_sched_data *q = qdisc_priv(sch); struct sk_buff *skb = q->tab[idx]; @@ -129,7 +130,7 @@ static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx) qdisc_qstats_backlog_dec(sch, skb); qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb)); - qdisc_drop(skb, sch); + qdisc_drop(skb, sch, to_free); --sch->q.qlen; } @@ -261,7 +262,8 @@ static bool choke_match_random(const struct choke_sched_data *q, return choke_match_flow(oskb, nskb); } -static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch) +static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) { int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; struct choke_sched_data *q = qdisc_priv(sch); @@ -288,7 +290,7 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch) /* Draw a packet at random from queue and compare flow */ if (choke_match_random(q, skb, &idx)) { q->stats.matched++; - choke_drop_by_idx(sch, idx); + choke_drop_by_idx(sch, idx, to_free); goto congestion_drop; } @@ -331,16 +333,16 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch) } q->stats.pdrop++; - return qdisc_drop(skb, sch); + return qdisc_drop(skb, sch, to_free); congestion_drop: - qdisc_drop(skb, sch); + qdisc_drop(skb, sch, to_free); return NET_XMIT_CN; other_drop: if (ret & __NET_XMIT_BYPASS) qdisc_qstats_drop(sch); - kfree_skb(skb); + __qdisc_drop(skb, to_free); return ret; } @@ -365,22 +367,6 @@ static struct sk_buff *choke_dequeue(struct Qdisc *sch) return skb; } -static unsigned int choke_drop(struct Qdisc *sch) -{ - struct choke_sched_data *q = qdisc_priv(sch); - unsigned int len; - - len = qdisc_queue_drop(sch); - if (len > 0) - q->stats.other++; - else { - if (!red_is_idling(&q->vars)) - red_start_of_idle_period(&q->vars); - } - - return len; -} - static void choke_reset(struct Qdisc *sch) { struct choke_sched_data *q = qdisc_priv(sch); @@ -391,11 +377,11 @@ static void choke_reset(struct Qdisc *sch) q->head = (q->head + 1) & q->tab_mask; if (!skb) continue; - qdisc_qstats_backlog_dec(sch, skb); - --sch->q.qlen; - qdisc_drop(skb, sch); + rtnl_qdisc_drop(skb, sch); } + sch->q.qlen = 0; + sch->qstats.backlog = 0; memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *)); q->head = q->tail = 0; red_restart(&q->vars); @@ -471,7 +457,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt) dropped += qdisc_pkt_len(skb); qdisc_qstats_backlog_dec(sch, skb); --sch->q.qlen; - qdisc_drop(skb, sch); + rtnl_qdisc_drop(skb, sch); } qdisc_tree_reduce_backlog(sch, oqlen - sch->q.qlen, dropped); q->head = 0; @@ -569,7 +555,6 @@ static struct Qdisc_ops choke_qdisc_ops __read_mostly = { .enqueue = choke_enqueue, .dequeue = choke_dequeue, .peek = choke_peek_head, - .drop = choke_drop, .init = choke_init, .destroy = choke_destroy, .reset = choke_reset, diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index dddf3bb65a32..4002df3c7d9f 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -82,7 +82,8 @@ static void drop_func(struct sk_buff *skb, void *ctx) { struct Qdisc *sch = ctx; - qdisc_drop(skb, sch); + kfree_skb(skb); + qdisc_qstats_drop(sch); } static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch) @@ -107,7 +108,8 @@ static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch) return skb; } -static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch) +static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) { struct codel_sched_data *q; @@ -117,7 +119,7 @@ static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch) } q = qdisc_priv(sch); q->drop_overlimit++; - return qdisc_drop(skb, sch); + return qdisc_drop(skb, sch, to_free); } static const struct nla_policy codel_policy[TCA_CODEL_MAX + 1] = { @@ -174,7 +176,7 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt) dropped += qdisc_pkt_len(skb); qdisc_qstats_backlog_dec(sch, skb); - qdisc_drop(skb, sch); + rtnl_qdisc_drop(skb, sch); } qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped); diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index a63e879e8975..8af5c59eef84 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -91,7 +91,8 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (tca[TCA_RATE]) { err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est, - qdisc_root_sleeping_lock(sch), + NULL, + qdisc_root_sleeping_running(sch), tca[TCA_RATE]); if (err) return err; @@ -119,7 +120,8 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (tca[TCA_RATE]) { err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est, - qdisc_root_sleeping_lock(sch), + NULL, + qdisc_root_sleeping_running(sch), tca[TCA_RATE]); if (err) { qdisc_destroy(cl->qdisc); @@ -279,7 +281,8 @@ static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg, if (qlen) xstats.deficit = cl->deficit; - if (gnet_stats_copy_basic(d, NULL, &cl->bstats) < 0 || + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), + d, NULL, &cl->bstats) < 0 || gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 || gnet_stats_copy_queue(d, NULL, &cl->qdisc->qstats, qlen) < 0) return -1; @@ -347,7 +350,8 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch, return NULL; } -static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch) +static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) { struct drr_sched *q = qdisc_priv(sch); struct drr_class *cl; @@ -357,11 +361,11 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (cl == NULL) { if (err & __NET_XMIT_BYPASS) qdisc_qstats_drop(sch); - kfree_skb(skb); + __qdisc_drop(skb, to_free); return err; } - err = qdisc_enqueue(skb, cl->qdisc); + err = qdisc_enqueue(skb, cl->qdisc, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { if (net_xmit_drop_count(err)) { cl->qstats.drops++; @@ -375,6 +379,7 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch) cl->deficit = cl->quantum; } + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; return err; } @@ -407,6 +412,7 @@ static struct sk_buff *drr_dequeue(struct Qdisc *sch) bstats_update(&cl->bstats, skb); qdisc_bstats_update(sch, skb); + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; return skb; } @@ -418,26 +424,6 @@ out: return NULL; } -static unsigned int drr_drop(struct Qdisc *sch) -{ - struct drr_sched *q = qdisc_priv(sch); - struct drr_class *cl; - unsigned int len; - - list_for_each_entry(cl, &q->active, alist) { - if (cl->qdisc->ops->drop) { - len = cl->qdisc->ops->drop(cl->qdisc); - if (len > 0) { - sch->q.qlen--; - if (cl->qdisc->q.qlen == 0) - list_del(&cl->alist); - return len; - } - } - } - return 0; -} - static int drr_init_qdisc(struct Qdisc *sch, struct nlattr *opt) { struct drr_sched *q = qdisc_priv(sch); @@ -463,6 +449,7 @@ static void drr_reset_qdisc(struct Qdisc *sch) qdisc_reset(cl->qdisc); } } + sch->qstats.backlog = 0; sch->q.qlen = 0; } @@ -506,7 +493,6 @@ static struct Qdisc_ops drr_qdisc_ops __read_mostly = { .enqueue = drr_enqueue, .dequeue = drr_dequeue, .peek = qdisc_peek_dequeued, - .drop = drr_drop, .init = drr_init_qdisc, .reset = drr_reset_qdisc, .destroy = drr_destroy_qdisc, diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 34b4ddaca27c..1308bbf460f7 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -191,7 +191,8 @@ static inline struct tcf_proto __rcu **dsmark_find_tcf(struct Qdisc *sch, /* --------------------------- Qdisc operations ---------------------------- */ -static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch) +static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) { struct dsmark_qdisc_data *p = qdisc_priv(sch); int err; @@ -234,7 +235,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch) #ifdef CONFIG_NET_CLS_ACT case TC_ACT_QUEUED: case TC_ACT_STOLEN: - kfree_skb(skb); + __qdisc_drop(skb, to_free); return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: @@ -251,7 +252,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch) } } - err = qdisc_enqueue(skb, p->q); + err = qdisc_enqueue(skb, p->q, to_free); if (err != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(err)) qdisc_qstats_drop(sch); @@ -264,7 +265,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch) return NET_XMIT_SUCCESS; drop: - qdisc_drop(skb, sch); + qdisc_drop(skb, sch, to_free); return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; } @@ -320,23 +321,6 @@ static struct sk_buff *dsmark_peek(struct Qdisc *sch) return p->q->ops->peek(p->q); } -static unsigned int dsmark_drop(struct Qdisc *sch) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - unsigned int len; - - pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); - - if (p->q->ops->drop == NULL) - return 0; - - len = p->q->ops->drop(p->q); - if (len) - sch->q.qlen--; - - return len; -} - static int dsmark_init(struct Qdisc *sch, struct nlattr *opt) { struct dsmark_qdisc_data *p = qdisc_priv(sch); @@ -489,7 +473,6 @@ static struct Qdisc_ops dsmark_qdisc_ops __read_mostly = { .enqueue = dsmark_enqueue, .dequeue = dsmark_dequeue, .peek = dsmark_peek, - .drop = dsmark_drop, .init = dsmark_init, .reset = dsmark_reset, .destroy = dsmark_destroy, diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index 2177eac0a61e..baeed6a78d28 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -19,32 +19,39 @@ /* 1 band FIFO pseudo-"scheduler" */ -static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch) +static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) { if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= sch->limit)) return qdisc_enqueue_tail(skb, sch); - return qdisc_reshape_fail(skb, sch); + return qdisc_drop(skb, sch, to_free); } -static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch) +static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) { if (likely(skb_queue_len(&sch->q) < sch->limit)) return qdisc_enqueue_tail(skb, sch); - return qdisc_reshape_fail(skb, sch); + return qdisc_drop(skb, sch, to_free); } -static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch) +static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) { + unsigned int prev_backlog; + if (likely(skb_queue_len(&sch->q) < sch->limit)) return qdisc_enqueue_tail(skb, sch); + prev_backlog = sch->qstats.backlog; /* queue full, remove one skb to fulfill the limit */ - __qdisc_queue_drop_head(sch, &sch->q); + __qdisc_queue_drop_head(sch, &sch->q, to_free); qdisc_qstats_drop(sch); qdisc_enqueue_tail(skb, sch); + qdisc_tree_reduce_backlog(sch, 0, prev_backlog - sch->qstats.backlog); return NET_XMIT_CN; } @@ -99,7 +106,6 @@ struct Qdisc_ops pfifo_qdisc_ops __read_mostly = { .enqueue = pfifo_enqueue, .dequeue = qdisc_dequeue_head, .peek = qdisc_peek_head, - .drop = qdisc_queue_drop, .init = fifo_init, .reset = qdisc_reset_queue, .change = fifo_init, @@ -114,7 +120,6 @@ struct Qdisc_ops bfifo_qdisc_ops __read_mostly = { .enqueue = bfifo_enqueue, .dequeue = qdisc_dequeue_head, .peek = qdisc_peek_head, - .drop = qdisc_queue_drop, .init = fifo_init, .reset = qdisc_reset_queue, .change = fifo_init, @@ -129,7 +134,6 @@ struct Qdisc_ops pfifo_head_drop_qdisc_ops __read_mostly = { .enqueue = pfifo_tail_enqueue, .dequeue = qdisc_dequeue_head, .peek = qdisc_peek_head, - .drop = qdisc_queue_drop_head, .init = fifo_init, .reset = qdisc_reset_queue, .change = fifo_init, diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 3c6a47d66a04..e5458b99e09c 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -368,18 +368,19 @@ static void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb) } } -static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch) +static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) { struct fq_sched_data *q = qdisc_priv(sch); struct fq_flow *f; if (unlikely(sch->q.qlen >= sch->limit)) - return qdisc_drop(skb, sch); + return qdisc_drop(skb, sch, to_free); f = fq_classify(skb, q); if (unlikely(f->qlen >= q->flow_plimit && f != &q->internal)) { q->stat_flows_plimit++; - return qdisc_drop(skb, sch); + return qdisc_drop(skb, sch, to_free); } f->qlen++; @@ -445,8 +446,7 @@ begin: if (!head->first) { if (q->time_next_delayed_flow != ~0ULL) qdisc_watchdog_schedule_ns(&q->watchdog, - q->time_next_delayed_flow, - false); + q->time_next_delayed_flow); return NULL; } } @@ -515,17 +515,25 @@ out: return skb; } +static void fq_flow_purge(struct fq_flow *flow) +{ + rtnl_kfree_skbs(flow->head, flow->tail); + flow->head = NULL; + flow->qlen = 0; +} + static void fq_reset(struct Qdisc *sch) { struct fq_sched_data *q = qdisc_priv(sch); struct rb_root *root; - struct sk_buff *skb; struct rb_node *p; struct fq_flow *f; unsigned int idx; - while ((skb = fq_dequeue_head(sch, &q->internal)) != NULL) - kfree_skb(skb); + sch->q.qlen = 0; + sch->qstats.backlog = 0; + + fq_flow_purge(&q->internal); if (!q->fq_root) return; @@ -536,8 +544,7 @@ static void fq_reset(struct Qdisc *sch) f = container_of(p, struct fq_flow, fq_node); rb_erase(p, root); - while ((skb = fq_dequeue_head(sch, f)) != NULL) - kfree_skb(skb); + fq_flow_purge(f); kmem_cache_free(fq_flow_cachep, f); } @@ -738,7 +745,7 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) if (!skb) break; drop_len += qdisc_pkt_len(skb); - kfree_skb(skb); + rtnl_kfree_skbs(skb, skb); drop_count++; } qdisc_tree_reduce_backlog(sch, drop_count, drop_len); diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 6883a8971562..a5ea0e9b6be4 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -139,7 +139,8 @@ static inline void flow_queue_add(struct fq_codel_flow *flow, skb->next = NULL; } -static unsigned int fq_codel_drop(struct Qdisc *sch, unsigned int max_packets) +static unsigned int fq_codel_drop(struct Qdisc *sch, unsigned int max_packets, + struct sk_buff **to_free) { struct fq_codel_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; @@ -171,8 +172,8 @@ static unsigned int fq_codel_drop(struct Qdisc *sch, unsigned int max_packets) do { skb = dequeue_head(flow); len += qdisc_pkt_len(skb); - mem += skb->truesize; - kfree_skb(skb); + mem += get_codel_cb(skb)->mem_usage; + __qdisc_drop(skb, to_free); } while (++i < max_packets && len < threshold); flow->dropped += i; @@ -184,28 +185,21 @@ static unsigned int fq_codel_drop(struct Qdisc *sch, unsigned int max_packets) return idx; } -static unsigned int fq_codel_qdisc_drop(struct Qdisc *sch) -{ - unsigned int prev_backlog; - - prev_backlog = sch->qstats.backlog; - fq_codel_drop(sch, 1U); - return prev_backlog - sch->qstats.backlog; -} - -static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) +static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) { struct fq_codel_sched_data *q = qdisc_priv(sch); unsigned int idx, prev_backlog, prev_qlen; struct fq_codel_flow *flow; int uninitialized_var(ret); + unsigned int pkt_len; bool memory_limited; idx = fq_codel_classify(skb, sch, &ret); if (idx == 0) { if (ret & __NET_XMIT_BYPASS) qdisc_qstats_drop(sch); - kfree_skb(skb); + __qdisc_drop(skb, to_free); return ret; } idx--; @@ -222,7 +216,8 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) flow->deficit = q->quantum; flow->dropped = 0; } - q->memory_usage += skb->truesize; + get_codel_cb(skb)->mem_usage = skb->truesize; + q->memory_usage += get_codel_cb(skb)->mem_usage; memory_limited = q->memory_usage > q->memory_limit; if (++sch->q.qlen <= sch->limit && !memory_limited) return NET_XMIT_SUCCESS; @@ -230,21 +225,32 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) prev_backlog = sch->qstats.backlog; prev_qlen = sch->q.qlen; + /* save this packet length as it might be dropped by fq_codel_drop() */ + pkt_len = qdisc_pkt_len(skb); /* fq_codel_drop() is quite expensive, as it performs a linear search * in q->backlogs[] to find a fat flow. * So instead of dropping a single packet, drop half of its backlog * with a 64 packets limit to not add a too big cpu spike here. */ - ret = fq_codel_drop(sch, q->drop_batch_size); + ret = fq_codel_drop(sch, q->drop_batch_size, to_free); - q->drop_overlimit += prev_qlen - sch->q.qlen; + prev_qlen -= sch->q.qlen; + prev_backlog -= sch->qstats.backlog; + q->drop_overlimit += prev_qlen; if (memory_limited) - q->drop_overmemory += prev_qlen - sch->q.qlen; - /* As we dropped packet(s), better let upper stack know this */ - qdisc_tree_reduce_backlog(sch, prev_qlen - sch->q.qlen, - prev_backlog - sch->qstats.backlog); + q->drop_overmemory += prev_qlen; - return ret == idx ? NET_XMIT_CN : NET_XMIT_SUCCESS; + /* As we dropped packet(s), better let upper stack know this. + * If we dropped a packet for this flow, return NET_XMIT_CN, + * but in this case, our parents wont increase their backlogs. + */ + if (ret == idx) { + qdisc_tree_reduce_backlog(sch, prev_qlen - 1, + prev_backlog - pkt_len); + return NET_XMIT_CN; + } + qdisc_tree_reduce_backlog(sch, prev_qlen, prev_backlog); + return NET_XMIT_SUCCESS; } /* This is the specific function called from codel_dequeue() @@ -262,7 +268,7 @@ static struct sk_buff *dequeue_func(struct codel_vars *vars, void *ctx) if (flow->head) { skb = dequeue_head(flow); q->backlogs[flow - q->flows] -= qdisc_pkt_len(skb); - q->memory_usage -= skb->truesize; + q->memory_usage -= get_codel_cb(skb)->mem_usage; sch->q.qlen--; sch->qstats.backlog -= qdisc_pkt_len(skb); } @@ -273,7 +279,8 @@ static void drop_func(struct sk_buff *skb, void *ctx) { struct Qdisc *sch = ctx; - qdisc_drop(skb, sch); + kfree_skb(skb); + qdisc_qstats_drop(sch); } static struct sk_buff *fq_codel_dequeue(struct Qdisc *sch) @@ -333,6 +340,12 @@ begin: return skb; } +static void fq_codel_flow_purge(struct fq_codel_flow *flow) +{ + rtnl_kfree_skbs(flow->head, flow->tail); + flow->head = NULL; +} + static void fq_codel_reset(struct Qdisc *sch) { struct fq_codel_sched_data *q = qdisc_priv(sch); @@ -343,18 +356,13 @@ static void fq_codel_reset(struct Qdisc *sch) for (i = 0; i < q->flows_cnt; i++) { struct fq_codel_flow *flow = q->flows + i; - while (flow->head) { - struct sk_buff *skb = dequeue_head(flow); - - qdisc_qstats_backlog_dec(sch, skb); - kfree_skb(skb); - } - + fq_codel_flow_purge(flow); INIT_LIST_HEAD(&flow->flowchain); codel_vars_init(&flow->cvars); } memset(q->backlogs, 0, q->flows_cnt * sizeof(u32)); sch->q.qlen = 0; + sch->qstats.backlog = 0; q->memory_usage = 0; } @@ -430,7 +438,7 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt) struct sk_buff *skb = fq_codel_dequeue(sch); q->cstats.drop_len += qdisc_pkt_len(skb); - kfree_skb(skb); + rtnl_kfree_skbs(skb, skb); q->cstats.drop_count++; } qdisc_tree_reduce_backlog(sch, q->cstats.drop_count, q->cstats.drop_len); @@ -566,11 +574,13 @@ static int fq_codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d) st.qdisc_stats.memory_usage = q->memory_usage; st.qdisc_stats.drop_overmemory = q->drop_overmemory; + sch_tree_lock(sch); list_for_each(pos, &q->new_flows) st.qdisc_stats.new_flows_len++; list_for_each(pos, &q->old_flows) st.qdisc_stats.old_flows_len++; + sch_tree_unlock(sch); return gnet_stats_copy_app(d, &st, sizeof(st)); } @@ -624,7 +634,7 @@ static int fq_codel_dump_class_stats(struct Qdisc *sch, unsigned long cl, if (idx < q->flows_cnt) { const struct fq_codel_flow *flow = &q->flows[idx]; - const struct sk_buff *skb = flow->head; + const struct sk_buff *skb; memset(&xstats, 0, sizeof(xstats)); xstats.type = TCA_FQ_CODEL_XSTATS_CLASS; @@ -642,14 +652,19 @@ static int fq_codel_dump_class_stats(struct Qdisc *sch, unsigned long cl, codel_time_to_us(delta) : -codel_time_to_us(-delta); } - while (skb) { - qs.qlen++; - skb = skb->next; + if (flow->head) { + sch_tree_lock(sch); + skb = flow->head; + while (skb) { + qs.qlen++; + skb = skb->next; + } + sch_tree_unlock(sch); } qs.backlog = q->backlogs[idx]; qs.drops = flow->dropped; } - if (gnet_stats_copy_queue(d, NULL, &qs, 0) < 0) + if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0) return -1; if (idx < q->flows_cnt) return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); @@ -697,7 +712,6 @@ static struct Qdisc_ops fq_codel_qdisc_ops __read_mostly = { .enqueue = fq_codel_enqueue, .dequeue = fq_codel_dequeue, .peek = qdisc_peek_dequeued, - .drop = fq_codel_qdisc_drop, .init = fq_codel_init, .reset = fq_codel_reset, .destroy = fq_codel_destroy, diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 269dd71b3828..e95b67cd5718 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -49,6 +49,7 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) { q->gso_skb = skb; q->qstats.requeues++; + qdisc_qstats_backlog_inc(q, skb); q->q.qlen++; /* it's still part of the queue */ __netif_schedule(q); @@ -76,6 +77,34 @@ static void try_bulk_dequeue_skb(struct Qdisc *q, skb->next = NULL; } +/* This variant of try_bulk_dequeue_skb() makes sure + * all skbs in the chain are for the same txq + */ +static void try_bulk_dequeue_skb_slow(struct Qdisc *q, + struct sk_buff *skb, + int *packets) +{ + int mapping = skb_get_queue_mapping(skb); + struct sk_buff *nskb; + int cnt = 0; + + do { + nskb = q->dequeue(q); + if (!nskb) + break; + if (unlikely(skb_get_queue_mapping(nskb) != mapping)) { + q->skb_bad_txq = nskb; + qdisc_qstats_backlog_inc(q, nskb); + q->q.qlen++; + break; + } + skb->next = nskb; + skb = nskb; + } while (++cnt < 8); + (*packets) += cnt; + skb->next = NULL; +} + /* Note that dequeue_skb can possibly return a SKB list (via skb->next). * A requeued skb (via q->gso_skb) can also be a SKB list. */ @@ -86,31 +115,48 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate, const struct netdev_queue *txq = q->dev_queue; *packets = 1; - *validate = true; if (unlikely(skb)) { + /* skb in gso_skb were already validated */ + *validate = false; /* check the reason of requeuing without tx lock first */ txq = skb_get_tx_queue(txq->dev, skb); if (!netif_xmit_frozen_or_stopped(txq)) { q->gso_skb = NULL; + qdisc_qstats_backlog_dec(q, skb); q->q.qlen--; } else skb = NULL; - /* skb in gso_skb were already validated */ - *validate = false; - } else { - if (!(q->flags & TCQ_F_ONETXQUEUE) || - !netif_xmit_frozen_or_stopped(txq)) { - skb = q->dequeue(q); - if (skb && qdisc_may_bulk(q)) - try_bulk_dequeue_skb(q, skb, txq, packets); + return skb; + } + *validate = true; + skb = q->skb_bad_txq; + if (unlikely(skb)) { + /* check the reason of requeuing without tx lock first */ + txq = skb_get_tx_queue(txq->dev, skb); + if (!netif_xmit_frozen_or_stopped(txq)) { + q->skb_bad_txq = NULL; + qdisc_qstats_backlog_dec(q, skb); + q->q.qlen--; + goto bulk; } + return NULL; + } + if (!(q->flags & TCQ_F_ONETXQUEUE) || + !netif_xmit_frozen_or_stopped(txq)) + skb = q->dequeue(q); + if (skb) { +bulk: + if (qdisc_may_bulk(q)) + try_bulk_dequeue_skb(q, skb, txq, packets); + else + try_bulk_dequeue_skb_slow(q, skb, packets); } return skb; } /* * Transmit possibly several skbs, and handle the return status as - * required. Holding the __QDISC___STATE_RUNNING bit guarantees that + * required. Owning running seqcount bit guarantees that * only one CPU can execute this function. * * Returns to the caller: @@ -163,7 +209,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, /* * NOTE: Called under qdisc_lock(q) with locally disabled BH. * - * __QDISC___STATE_RUNNING guarantees only one CPU can process + * running seqcount guarantees only one CPU can process * this qdisc at a time. qdisc_lock(q) serializes queue accesses for * this queue. * @@ -346,9 +392,10 @@ EXPORT_SYMBOL(netif_carrier_off); cheaper. */ -static int noop_enqueue(struct sk_buff *skb, struct Qdisc *qdisc) +static int noop_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, + struct sk_buff **to_free) { - kfree_skb(skb); + __qdisc_drop(skb, to_free); return NET_XMIT_CN; } @@ -379,6 +426,7 @@ struct Qdisc noop_qdisc = { .list = LIST_HEAD_INIT(noop_qdisc.list), .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), .dev_queue = &noop_netdev_queue, + .running = SEQCNT_ZERO(noop_qdisc.running), .busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock), }; EXPORT_SYMBOL(noop_qdisc); @@ -436,7 +484,8 @@ static inline struct sk_buff_head *band2list(struct pfifo_fast_priv *priv, return priv->q + band; } -static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc) +static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, + struct sk_buff **to_free) { if (skb_queue_len(&qdisc->q) < qdisc_dev(qdisc)->tx_queue_len) { int band = prio2band[skb->priority & TC_PRIO_MAX]; @@ -448,7 +497,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc) return __qdisc_enqueue_tail(skb, qdisc, list); } - return qdisc_drop(skb, qdisc); + return qdisc_drop(skb, qdisc, to_free); } static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc) @@ -490,7 +539,7 @@ static void pfifo_fast_reset(struct Qdisc *qdisc) struct pfifo_fast_priv *priv = qdisc_priv(qdisc); for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) - __qdisc_reset_queue(qdisc, band2list(priv, prio)); + __qdisc_reset_queue(band2list(priv, prio)); priv->bitmap = 0; qdisc->qstats.backlog = 0; @@ -537,6 +586,7 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = { EXPORT_SYMBOL(pfifo_fast_ops); static struct lock_class_key qdisc_tx_busylock; +static struct lock_class_key qdisc_running_key; struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, const struct Qdisc_ops *ops) @@ -570,6 +620,10 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, lockdep_set_class(&sch->busylock, dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); + seqcount_init(&sch->running); + lockdep_set_class(&sch->running, + dev->qdisc_running_key ?: &qdisc_running_key); + sch->ops = ops; sch->enqueue = ops->enqueue; sch->dequeue = ops->dequeue; @@ -614,11 +668,14 @@ void qdisc_reset(struct Qdisc *qdisc) if (ops->reset) ops->reset(qdisc); + kfree_skb(qdisc->skb_bad_txq); + qdisc->skb_bad_txq = NULL; + if (qdisc->gso_skb) { kfree_skb_list(qdisc->gso_skb); qdisc->gso_skb = NULL; - qdisc->q.qlen = 0; } + qdisc->q.qlen = 0; } EXPORT_SYMBOL(qdisc_reset); @@ -657,6 +714,7 @@ void qdisc_destroy(struct Qdisc *qdisc) dev_put(qdisc_dev(qdisc)); kfree_skb_list(qdisc->gso_skb); + kfree_skb(qdisc->skb_bad_txq); /* * gen_estimator est_timer() might access qdisc->q.lock, * wait a RCU grace period before freeing qdisc. diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 80105109f756..c78a093c551a 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -149,7 +149,8 @@ static inline int gred_use_harddrop(struct gred_sched *t) return t->red_flags & TC_RED_HARDDROP; } -static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch) +static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) { struct gred_sched_data *q = NULL; struct gred_sched *t = qdisc_priv(sch); @@ -237,10 +238,10 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch) q->stats.pdrop++; drop: - return qdisc_drop(skb, sch); + return qdisc_drop(skb, sch, to_free); congestion_drop: - qdisc_drop(skb, sch); + qdisc_drop(skb, sch, to_free); return NET_XMIT_CN; } @@ -276,40 +277,6 @@ static struct sk_buff *gred_dequeue(struct Qdisc *sch) return NULL; } -static unsigned int gred_drop(struct Qdisc *sch) -{ - struct sk_buff *skb; - struct gred_sched *t = qdisc_priv(sch); - - skb = qdisc_dequeue_tail(sch); - if (skb) { - unsigned int len = qdisc_pkt_len(skb); - struct gred_sched_data *q; - u16 dp = tc_index_to_dp(skb); - - if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { - net_warn_ratelimited("GRED: Unable to relocate VQ 0x%x while dropping, screwing up backlog\n", - tc_index_to_dp(skb)); - } else { - q->backlog -= len; - q->stats.other++; - - if (gred_wred_mode(t)) { - if (!sch->qstats.backlog) - red_start_of_idle_period(&t->wred_set); - } else { - if (!q->backlog) - red_start_of_idle_period(&q->vars); - } - } - - qdisc_drop(skb, sch); - return len; - } - - return 0; -} - static void gred_reset(struct Qdisc *sch) { int i; @@ -623,7 +590,6 @@ static struct Qdisc_ops gred_qdisc_ops __read_mostly = { .enqueue = gred_enqueue, .dequeue = gred_dequeue, .peek = qdisc_peek_head, - .drop = gred_drop, .init = gred_init, .reset = gred_reset, .destroy = gred_destroy, diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index d783d7cc3348..dff92ea772fe 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -130,7 +130,6 @@ struct hfsc_class { struct rb_node vt_node; /* parent's vt_tree member */ struct rb_root cf_tree; /* active children sorted by cl_f */ struct rb_node cf_node; /* parent's cf_heap member */ - struct list_head dlist; /* drop list member */ u64 cl_total; /* total work in bytes */ u64 cl_cumul; /* cumulative work in bytes done by @@ -177,8 +176,6 @@ struct hfsc_sched { struct hfsc_class root; /* root class */ struct Qdisc_class_hash clhash; /* class hash */ struct rb_root eligible; /* eligible tree */ - struct list_head droplist; /* active leaf class list (for - dropping) */ struct qdisc_watchdog watchdog; /* watchdog timer */ }; @@ -781,6 +778,20 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time) else go_passive = 0; + /* update vt */ + cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total) + - cl->cl_vtoff + cl->cl_vtadj; + + /* + * if vt of the class is smaller than cvtmin, + * the class was skipped in the past due to non-fit. + * if so, we need to adjust vtadj. + */ + if (cl->cl_vt < cl->cl_parent->cl_cvtmin) { + cl->cl_vtadj += cl->cl_parent->cl_cvtmin - cl->cl_vt; + cl->cl_vt = cl->cl_parent->cl_cvtmin; + } + if (go_passive) { /* no more active child, going passive */ @@ -797,25 +808,10 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time) continue; } - /* - * update vt and f - */ - cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total) - - cl->cl_vtoff + cl->cl_vtadj; - - /* - * if vt of the class is smaller than cvtmin, - * the class was skipped in the past due to non-fit. - * if so, we need to adjust vtadj. - */ - if (cl->cl_vt < cl->cl_parent->cl_cvtmin) { - cl->cl_vtadj += cl->cl_parent->cl_cvtmin - cl->cl_vt; - cl->cl_vt = cl->cl_parent->cl_cvtmin; - } - /* update the vt tree */ vttree_update(cl); + /* update f */ if (cl->cl_flags & HFSC_USC) { cl->cl_myf = cl->cl_myfadj + rtsc_y2x(&cl->cl_ulimit, cl->cl_total); @@ -858,7 +854,6 @@ set_active(struct hfsc_class *cl, unsigned int len) if (cl->cl_flags & HFSC_FSC) init_vf(cl, len); - list_add_tail(&cl->dlist, &cl->sched->droplist); } static void @@ -867,8 +862,6 @@ set_passive(struct hfsc_class *cl) if (cl->cl_flags & HFSC_RSC) eltree_remove(cl); - list_del(&cl->dlist); - /* * vttree is now handled in update_vf() so that update_vf(cl, 0, 0) * needs to be called explicitly to remove a class from vttree. @@ -882,7 +875,7 @@ qdisc_peek_len(struct Qdisc *sch) unsigned int len; skb = sch->ops->peek(sch); - if (skb == NULL) { + if (unlikely(skb == NULL)) { qdisc_warn_nonwc("qdisc_peek_len", sch); return 0; } @@ -947,7 +940,7 @@ static void hfsc_change_fsc(struct hfsc_class *cl, struct tc_service_curve *fsc) { sc2isc(fsc, &cl->cl_fsc); - rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vt, cl->cl_total); + rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vtoff + cl->cl_vt, cl->cl_total); cl->cl_flags |= HFSC_FSC; } @@ -1015,11 +1008,10 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, cur_time = psched_get_time(); if (tca[TCA_RATE]) { - spinlock_t *lock = qdisc_root_sleeping_lock(sch); - err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est, - lock, + NULL, + qdisc_root_sleeping_running(sch), tca[TCA_RATE]); if (err) return err; @@ -1068,7 +1060,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (tca[TCA_RATE]) { err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est, - qdisc_root_sleeping_lock(sch), + NULL, + qdisc_root_sleeping_running(sch), tca[TCA_RATE]); if (err) { kfree(cl); @@ -1373,7 +1366,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg, xstats.work = cl->cl_total; xstats.rtwork = cl->cl_cumul; - if (gnet_stats_copy_basic(d, NULL, &cl->bstats) < 0 || + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 || gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 || gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->qdisc->q.qlen) < 0) return -1; @@ -1443,7 +1436,6 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt) if (err < 0) return err; q->eligible = RB_ROOT; - INIT_LIST_HEAD(&q->droplist); q->root.cl_common.classid = sch->handle; q->root.refcnt = 1; @@ -1527,8 +1519,8 @@ hfsc_reset_qdisc(struct Qdisc *sch) hfsc_reset_class(cl); } q->eligible = RB_ROOT; - INIT_LIST_HEAD(&q->droplist); qdisc_watchdog_cancel(&q->watchdog); + sch->qstats.backlog = 0; sch->q.qlen = 0; } @@ -1559,14 +1551,6 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb) struct hfsc_sched *q = qdisc_priv(sch); unsigned char *b = skb_tail_pointer(skb); struct tc_hfsc_qopt qopt; - struct hfsc_class *cl; - unsigned int i; - - sch->qstats.backlog = 0; - for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode) - sch->qstats.backlog += cl->qdisc->qstats.backlog; - } qopt.defcls = q->defcls; if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt)) @@ -1579,7 +1563,7 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb) } static int -hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch) +hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct hfsc_class *cl; int uninitialized_var(err); @@ -1588,11 +1572,11 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (cl == NULL) { if (err & __NET_XMIT_BYPASS) qdisc_qstats_drop(sch); - kfree_skb(skb); + __qdisc_drop(skb, to_free); return err; } - err = qdisc_enqueue(skb, cl->qdisc); + err = qdisc_enqueue(skb, cl->qdisc, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { if (net_xmit_drop_count(err)) { cl->qstats.drops++; @@ -1601,9 +1585,19 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch) return err; } - if (cl->qdisc->q.qlen == 1) + if (cl->qdisc->q.qlen == 1) { set_active(cl, qdisc_pkt_len(skb)); + /* + * If this is the first packet, isolate the head so an eventual + * head drop before the first dequeue operation has no chance + * to invalidate the deadline. + */ + if (cl->cl_flags & HFSC_RSC) + cl->qdisc->ops->peek(cl->qdisc); + } + + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; @@ -1670,38 +1664,13 @@ hfsc_dequeue(struct Qdisc *sch) set_passive(cl); } - qdisc_unthrottled(sch); qdisc_bstats_update(sch, skb); + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; return skb; } -static unsigned int -hfsc_drop(struct Qdisc *sch) -{ - struct hfsc_sched *q = qdisc_priv(sch); - struct hfsc_class *cl; - unsigned int len; - - list_for_each_entry(cl, &q->droplist, dlist) { - if (cl->qdisc->ops->drop != NULL && - (len = cl->qdisc->ops->drop(cl->qdisc)) > 0) { - if (cl->qdisc->q.qlen == 0) { - update_vf(cl, 0, 0); - set_passive(cl); - } else { - list_move_tail(&cl->dlist, &q->droplist); - } - cl->qstats.drops++; - qdisc_qstats_drop(sch); - sch->q.qlen--; - return len; - } - } - return 0; -} - static const struct Qdisc_class_ops hfsc_class_ops = { .change = hfsc_change_class, .delete = hfsc_delete_class, @@ -1728,7 +1697,6 @@ static struct Qdisc_ops hfsc_qdisc_ops __read_mostly = { .enqueue = hfsc_enqueue, .dequeue = hfsc_dequeue, .peek = qdisc_peek_dequeued, - .drop = hfsc_drop, .cl_ops = &hfsc_class_ops, .priv_size = sizeof(struct hfsc_sched), .owner = THIS_MODULE diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index 13d6f83ec491..e3d0458af17b 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -345,7 +345,7 @@ static void bucket_add(struct wdrr_bucket *bucket, struct sk_buff *skb) skb->next = NULL; } -static unsigned int hhf_drop(struct Qdisc *sch) +static unsigned int hhf_drop(struct Qdisc *sch, struct sk_buff **to_free) { struct hhf_sched_data *q = qdisc_priv(sch); struct wdrr_bucket *bucket; @@ -359,25 +359,16 @@ static unsigned int hhf_drop(struct Qdisc *sch) struct sk_buff *skb = dequeue_head(bucket); sch->q.qlen--; - qdisc_qstats_drop(sch); qdisc_qstats_backlog_dec(sch, skb); - kfree_skb(skb); + qdisc_drop(skb, sch, to_free); } /* Return id of the bucket from which the packet was dropped. */ return bucket - q->buckets; } -static unsigned int hhf_qdisc_drop(struct Qdisc *sch) -{ - unsigned int prev_backlog; - - prev_backlog = sch->qstats.backlog; - hhf_drop(sch); - return prev_backlog - sch->qstats.backlog; -} - -static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch) +static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) { struct hhf_sched_data *q = qdisc_priv(sch); enum wdrr_bucket_idx idx; @@ -415,7 +406,7 @@ static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch) /* Return Congestion Notification only if we dropped a packet from this * bucket. */ - if (hhf_drop(sch) == idx) + if (hhf_drop(sch, to_free) == idx) return NET_XMIT_CN; /* As we dropped a packet, better let upper stack know this. */ @@ -473,7 +464,7 @@ static void hhf_reset(struct Qdisc *sch) struct sk_buff *skb; while ((skb = hhf_dequeue(sch)) != NULL) - kfree_skb(skb); + rtnl_kfree_skbs(skb, skb); } static void *hhf_zalloc(size_t sz) @@ -583,7 +574,7 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt) while (sch->q.qlen > sch->limit) { struct sk_buff *skb = hhf_dequeue(sch); - kfree_skb(skb); + rtnl_kfree_skbs(skb, skb); } qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, prev_backlog - sch->qstats.backlog); @@ -709,7 +700,6 @@ static struct Qdisc_ops hhf_qdisc_ops __read_mostly = { .enqueue = hhf_enqueue, .dequeue = hhf_dequeue, .peek = qdisc_peek_dequeued, - .drop = hhf_qdisc_drop, .init = hhf_init, .reset = hhf_reset, .destroy = hhf_destroy, diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index d4b4218af6b1..91982d9784b3 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -117,7 +117,6 @@ struct htb_class { * Written often fields */ struct gnet_stats_basic_packed bstats; - struct gnet_stats_queue qstats; struct tc_htb_xstats xstats; /* our special stats */ /* token bucket parameters */ @@ -140,6 +139,8 @@ struct htb_class { enum htb_cmode cmode; /* current mode of the class */ struct rb_node pq_node; /* node for event queue */ struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */ + + unsigned int drops ____cacheline_aligned_in_smp; }; struct htb_level { @@ -569,7 +570,8 @@ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl) list_del_init(&cl->un.leaf.drop_list); } -static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) +static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) { int uninitialized_var(ret); struct htb_sched *q = qdisc_priv(sch); @@ -581,19 +583,20 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) __skb_queue_tail(&q->direct_queue, skb); q->direct_pkts++; } else { - return qdisc_drop(skb, sch); + return qdisc_drop(skb, sch, to_free); } #ifdef CONFIG_NET_CLS_ACT } else if (!cl) { if (ret & __NET_XMIT_BYPASS) qdisc_qstats_drop(sch); - kfree_skb(skb); + __qdisc_drop(skb, to_free); return ret; #endif - } else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q)) != NET_XMIT_SUCCESS) { + } else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q, + to_free)) != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(ret)) { qdisc_qstats_drop(sch); - cl->qstats.drops++; + cl->drops++; } return ret; } else { @@ -889,7 +892,6 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) if (skb != NULL) { ok: qdisc_bstats_update(sch, skb); - qdisc_unthrottled(sch); qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; return skb; @@ -929,38 +931,13 @@ ok: } qdisc_qstats_overlimit(sch); if (likely(next_event > q->now)) - qdisc_watchdog_schedule_ns(&q->watchdog, next_event, true); + qdisc_watchdog_schedule_ns(&q->watchdog, next_event); else schedule_work(&q->work); fin: return skb; } -/* try to drop from each class (by prio) until one succeed */ -static unsigned int htb_drop(struct Qdisc *sch) -{ - struct htb_sched *q = qdisc_priv(sch); - int prio; - - for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) { - struct list_head *p; - list_for_each(p, q->drops + prio) { - struct htb_class *cl = list_entry(p, struct htb_class, - un.leaf.drop_list); - unsigned int len; - if (cl->un.leaf.q->ops->drop && - (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) { - sch->qstats.backlog -= len; - sch->q.qlen--; - if (!cl->un.leaf.q->q.qlen) - htb_deactivate(q, cl); - return len; - } - } - } - return 0; -} - /* reset all classes */ /* always caled under BH & queue lock */ static void htb_reset(struct Qdisc *sch) @@ -983,7 +960,7 @@ static void htb_reset(struct Qdisc *sch) } } qdisc_watchdog_cancel(&q->watchdog); - __skb_queue_purge(&q->direct_queue); + __qdisc_reset_queue(&q->direct_queue); sch->q.qlen = 0; sch->qstats.backlog = 0; memset(q->hlevel, 0, sizeof(q->hlevel)); @@ -1007,7 +984,9 @@ static void htb_work_func(struct work_struct *work) struct htb_sched *q = container_of(work, struct htb_sched, work); struct Qdisc *sch = q->watchdog.qdisc; + rcu_read_lock(); __netif_schedule(qdisc_root(sch)); + rcu_read_unlock(); } static int htb_init(struct Qdisc *sch, struct nlattr *opt) @@ -1134,16 +1113,22 @@ static int htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) { struct htb_class *cl = (struct htb_class *)arg; + struct gnet_stats_queue qs = { + .drops = cl->drops, + }; __u32 qlen = 0; - if (!cl->level && cl->un.leaf.q) + if (!cl->level && cl->un.leaf.q) { qlen = cl->un.leaf.q->q.qlen; + qs.backlog = cl->un.leaf.q->qstats.backlog; + } cl->xstats.tokens = PSCHED_NS2TICKS(cl->tokens); cl->xstats.ctokens = PSCHED_NS2TICKS(cl->ctokens); - if (gnet_stats_copy_basic(d, NULL, &cl->bstats) < 0 || + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), + d, NULL, &cl->bstats) < 0 || gnet_stats_copy_rate_est(d, NULL, &cl->rate_est) < 0 || - gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0) + gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0) return -1; return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats)); @@ -1256,7 +1241,7 @@ static void htb_destroy(struct Qdisc *sch) htb_destroy_class(sch, cl); } qdisc_class_hash_destroy(&q->clhash); - __skb_queue_purge(&q->direct_queue); + __qdisc_reset_queue(&q->direct_queue); } static int htb_delete(struct Qdisc *sch, unsigned long arg) @@ -1395,7 +1380,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (htb_rate_est || tca[TCA_RATE]) { err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est, - qdisc_root_sleeping_lock(sch), + NULL, + qdisc_root_sleeping_running(sch), tca[TCA_RATE] ? : &est.nla); if (err) { kfree(cl); @@ -1457,11 +1443,10 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, parent->children++; } else { if (tca[TCA_RATE]) { - spinlock_t *lock = qdisc_root_sleeping_lock(sch); - err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est, - lock, + NULL, + qdisc_root_sleeping_running(sch), tca[TCA_RATE]); if (err) return err; @@ -1599,7 +1584,6 @@ static struct Qdisc_ops htb_qdisc_ops __read_mostly = { .enqueue = htb_enqueue, .dequeue = htb_dequeue, .peek = qdisc_peek_dequeued, - .drop = htb_drop, .init = htb_init, .reset = htb_reset, .destroy = htb_destroy, diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index 10adbc617905..8fe6999b642a 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -27,6 +27,11 @@ static unsigned long ingress_get(struct Qdisc *sch, u32 classid) return TC_H_MIN(classid) + 1; } +static bool ingress_cl_offload(u32 classid) +{ + return true; +} + static unsigned long ingress_bind_filter(struct Qdisc *sch, unsigned long parent, u32 classid) { @@ -86,6 +91,7 @@ static const struct Qdisc_class_ops ingress_class_ops = { .put = ingress_put, .walk = ingress_walk, .tcf_chain = ingress_find_tcf, + .tcf_cl_offload = ingress_cl_offload, .bind_tcf = ingress_bind_filter, .unbind_tcf = ingress_put, }; @@ -110,6 +116,11 @@ static unsigned long clsact_get(struct Qdisc *sch, u32 classid) } } +static bool clsact_cl_offload(u32 classid) +{ + return TC_H_MIN(classid) == TC_H_MIN(TC_H_MIN_INGRESS); +} + static unsigned long clsact_bind_filter(struct Qdisc *sch, unsigned long parent, u32 classid) { @@ -158,6 +169,7 @@ static const struct Qdisc_class_ops clsact_class_ops = { .put = ingress_put, .walk = ingress_walk, .tcf_chain = clsact_find_tcf, + .tcf_cl_offload = clsact_cl_offload, .bind_tcf = clsact_bind_filter, .unbind_tcf = ingress_put, }; diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index 56a77b878eb3..b9439827c172 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -199,7 +199,7 @@ static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct netdev_queue *dev_queue = mq_queue_get(sch, cl); sch = dev_queue->qdisc_sleeping; - if (gnet_stats_copy_basic(d, NULL, &sch->bstats) < 0 || + if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 || gnet_stats_copy_queue(d, NULL, &sch->qstats, sch->q.qlen) < 0) return -1; return 0; diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index b8002ce3d010..549c66359924 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -342,7 +342,8 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, * hold here is the look on dev_queue->qdisc_sleeping * also acquired below. */ - spin_unlock_bh(d->lock); + if (d->lock) + spin_unlock_bh(d->lock); for (i = tc.offset; i < tc.offset + tc.count; i++) { struct netdev_queue *q = netdev_get_tx_queue(dev, i); @@ -359,15 +360,17 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, spin_unlock_bh(qdisc_lock(qdisc)); } /* Reclaim root sleeping lock before completing stats */ - spin_lock_bh(d->lock); - if (gnet_stats_copy_basic(d, NULL, &bstats) < 0 || + if (d->lock) + spin_lock_bh(d->lock); + if (gnet_stats_copy_basic(NULL, d, NULL, &bstats) < 0 || gnet_stats_copy_queue(d, NULL, &qstats, qlen) < 0) return -1; } else { struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl); sch = dev_queue->qdisc_sleeping; - if (gnet_stats_copy_basic(d, NULL, &sch->bstats) < 0 || + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), + d, NULL, &sch->bstats) < 0 || gnet_stats_copy_queue(d, NULL, &sch->qstats, sch->q.qlen) < 0) return -1; diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index bcdd54bb101c..9ffbb025b37e 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -65,7 +65,8 @@ multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) } static int -multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch) +multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) { struct Qdisc *qdisc; int ret; @@ -76,12 +77,12 @@ multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (ret & __NET_XMIT_BYPASS) qdisc_qstats_drop(sch); - kfree_skb(skb); + __qdisc_drop(skb, to_free); return ret; } #endif - ret = qdisc_enqueue(skb, qdisc); + ret = qdisc_enqueue(skb, qdisc, to_free); if (ret == NET_XMIT_SUCCESS) { sch->q.qlen++; return NET_XMIT_SUCCESS; @@ -151,27 +152,6 @@ static struct sk_buff *multiq_peek(struct Qdisc *sch) } -static unsigned int multiq_drop(struct Qdisc *sch) -{ - struct multiq_sched_data *q = qdisc_priv(sch); - int band; - unsigned int len; - struct Qdisc *qdisc; - - for (band = q->bands - 1; band >= 0; band--) { - qdisc = q->queues[band]; - if (qdisc->ops->drop) { - len = qdisc->ops->drop(qdisc); - if (len != 0) { - sch->q.qlen--; - return len; - } - } - } - return 0; -} - - static void multiq_reset(struct Qdisc *sch) { @@ -356,7 +336,8 @@ static int multiq_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct Qdisc *cl_q; cl_q = q->queues[cl - 1]; - if (gnet_stats_copy_basic(d, NULL, &cl_q->bstats) < 0 || + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), + d, NULL, &cl_q->bstats) < 0 || gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0) return -1; @@ -415,7 +396,6 @@ static struct Qdisc_ops multiq_qdisc_ops __read_mostly = { .enqueue = multiq_enqueue, .dequeue = multiq_dequeue, .peek = multiq_peek, - .drop = multiq_drop, .init = multiq_init, .reset = multiq_reset, .destroy = multiq_destroy, diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 205bed00dd34..aaaf02175338 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -368,9 +368,7 @@ static void tfifo_reset(struct Qdisc *sch) struct sk_buff *skb = netem_rb_to_skb(p); rb_erase(p, &q->t_root); - skb->next = NULL; - skb->prev = NULL; - kfree_skb(skb); + rtnl_kfree_skbs(skb, skb); } } @@ -399,7 +397,8 @@ static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) * when we statistically choose to corrupt one, we instead segment it, returning * the first packet to be corrupted, and re-enqueue the remaining frames */ -static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch) +static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) { struct sk_buff *segs; netdev_features_t features = netif_skb_features(skb); @@ -407,7 +406,7 @@ static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch) segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); if (IS_ERR_OR_NULL(segs)) { - qdisc_reshape_fail(skb, sch); + qdisc_drop(skb, sch, to_free); return NULL; } consume_skb(skb); @@ -420,7 +419,8 @@ static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch) * NET_XMIT_DROP: queue length didn't change. * NET_XMIT_SUCCESS: one skb was queued. */ -static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) +static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) { struct netem_sched_data *q = qdisc_priv(sch); /* We don't fill cb now as skb_unshare() may invalidate it */ @@ -445,7 +445,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) } if (count == 0) { qdisc_qstats_drop(sch); - kfree_skb(skb); + __qdisc_drop(skb, to_free); return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; } @@ -465,7 +465,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ q->duplicate = 0; - rootq->enqueue(skb2, rootq); + rootq->enqueue(skb2, rootq, to_free); q->duplicate = dupsave; } @@ -477,7 +477,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) */ if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) { if (skb_is_gso(skb)) { - segs = netem_segment(skb, sch); + segs = netem_segment(skb, sch, to_free); if (!segs) return NET_XMIT_DROP; } else { @@ -487,10 +487,14 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) skb = segs; segs = segs->next; - if (!(skb = skb_unshare(skb, GFP_ATOMIC)) || - (skb->ip_summed == CHECKSUM_PARTIAL && - skb_checksum_help(skb))) { - rc = qdisc_drop(skb, sch); + skb = skb_unshare(skb, GFP_ATOMIC); + if (unlikely(!skb)) { + qdisc_qstats_drop(sch); + goto finish_segs; + } + if (skb->ip_summed == CHECKSUM_PARTIAL && + skb_checksum_help(skb)) { + qdisc_drop(skb, sch, to_free); goto finish_segs; } @@ -499,7 +503,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) } if (unlikely(skb_queue_len(&sch->q) >= sch->limit)) - return qdisc_reshape_fail(skb, sch); + return qdisc_drop(skb, sch, to_free); qdisc_qstats_backlog_inc(sch, skb); @@ -559,7 +563,7 @@ finish_segs: segs->next = NULL; qdisc_skb_cb(segs)->pkt_len = segs->len; last_len = segs->len; - rc = qdisc_enqueue(segs, sch); + rc = qdisc_enqueue(segs, sch, to_free); if (rc != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(rc)) qdisc_qstats_drop(sch); @@ -576,50 +580,17 @@ finish_segs: return NET_XMIT_SUCCESS; } -static unsigned int netem_drop(struct Qdisc *sch) -{ - struct netem_sched_data *q = qdisc_priv(sch); - unsigned int len; - - len = qdisc_queue_drop(sch); - - if (!len) { - struct rb_node *p = rb_first(&q->t_root); - - if (p) { - struct sk_buff *skb = netem_rb_to_skb(p); - - rb_erase(p, &q->t_root); - sch->q.qlen--; - skb->next = NULL; - skb->prev = NULL; - qdisc_qstats_backlog_dec(sch, skb); - kfree_skb(skb); - } - } - if (!len && q->qdisc && q->qdisc->ops->drop) - len = q->qdisc->ops->drop(q->qdisc); - if (len) - qdisc_qstats_drop(sch); - - return len; -} - static struct sk_buff *netem_dequeue(struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; struct rb_node *p; - if (qdisc_is_throttled(sch)) - return NULL; - tfifo_dequeue: skb = __skb_dequeue(&sch->q); if (skb) { qdisc_qstats_backlog_dec(sch, skb); deliver: - qdisc_unthrottled(sch); qdisc_bstats_update(sch, skb); return skb; } @@ -650,14 +621,17 @@ deliver: #endif if (q->qdisc) { - int err = qdisc_enqueue(skb, q->qdisc); - - if (unlikely(err != NET_XMIT_SUCCESS)) { - if (net_xmit_drop_count(err)) { - qdisc_qstats_drop(sch); - qdisc_tree_reduce_backlog(sch, 1, - qdisc_pkt_len(skb)); - } + unsigned int pkt_len = qdisc_pkt_len(skb); + struct sk_buff *to_free = NULL; + int err; + + err = qdisc_enqueue(skb, q->qdisc, &to_free); + kfree_skb_list(to_free); + if (err != NET_XMIT_SUCCESS && + net_xmit_drop_count(err)) { + qdisc_qstats_drop(sch); + qdisc_tree_reduce_backlog(sch, 1, + pkt_len); } goto tfifo_dequeue; } @@ -1143,7 +1117,6 @@ static struct Qdisc_ops netem_qdisc_ops __read_mostly = { .enqueue = netem_enqueue, .dequeue = netem_dequeue, .peek = qdisc_peek_dequeued, - .drop = netem_drop, .init = netem_init, .reset = netem_reset, .destroy = netem_destroy, diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index 71ae3b9629f9..a570b0bb254c 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -134,7 +134,8 @@ static bool drop_early(struct Qdisc *sch, u32 packet_size) return false; } -static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch) +static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) { struct pie_sched_data *q = qdisc_priv(sch); bool enqueue = false; @@ -166,7 +167,7 @@ static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch) out: q->stats.dropped++; - return qdisc_drop(skb, sch); + return qdisc_drop(skb, sch, to_free); } static const struct nla_policy pie_policy[TCA_PIE_MAX + 1] = { @@ -234,7 +235,7 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt) dropped += qdisc_pkt_len(skb); qdisc_qstats_backlog_dec(sch, skb); - qdisc_drop(skb, sch); + rtnl_qdisc_drop(skb, sch); } qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped); diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c index 5abfe44678d4..1c6cbab3e7b9 100644 --- a/net/sched/sch_plug.c +++ b/net/sched/sch_plug.c @@ -64,6 +64,8 @@ struct plug_sched_data { */ bool unplug_indefinite; + bool throttled; + /* Queue Limit in bytes */ u32 limit; @@ -86,7 +88,8 @@ struct plug_sched_data { u32 pkts_to_release; }; -static int plug_enqueue(struct sk_buff *skb, struct Qdisc *sch) +static int plug_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) { struct plug_sched_data *q = qdisc_priv(sch); @@ -96,14 +99,14 @@ static int plug_enqueue(struct sk_buff *skb, struct Qdisc *sch) return qdisc_enqueue_tail(skb, sch); } - return qdisc_reshape_fail(skb, sch); + return qdisc_drop(skb, sch, to_free); } static struct sk_buff *plug_dequeue(struct Qdisc *sch) { struct plug_sched_data *q = qdisc_priv(sch); - if (qdisc_is_throttled(sch)) + if (q->throttled) return NULL; if (!q->unplug_indefinite) { @@ -111,7 +114,7 @@ static struct sk_buff *plug_dequeue(struct Qdisc *sch) /* No more packets to dequeue. Block the queue * and wait for the next release command. */ - qdisc_throttled(sch); + q->throttled = true; return NULL; } q->pkts_to_release--; @@ -141,7 +144,7 @@ static int plug_init(struct Qdisc *sch, struct nlattr *opt) q->limit = ctl->limit; } - qdisc_throttled(sch); + q->throttled = true; return 0; } @@ -173,7 +176,7 @@ static int plug_change(struct Qdisc *sch, struct nlattr *opt) q->pkts_last_epoch = q->pkts_current_epoch; q->pkts_current_epoch = 0; if (q->unplug_indefinite) - qdisc_throttled(sch); + q->throttled = true; q->unplug_indefinite = false; break; case TCQ_PLUG_RELEASE_ONE: @@ -182,7 +185,7 @@ static int plug_change(struct Qdisc *sch, struct nlattr *opt) */ q->pkts_to_release += q->pkts_last_epoch; q->pkts_last_epoch = 0; - qdisc_unthrottled(sch); + q->throttled = false; netif_schedule_queue(sch->dev_queue); break; case TCQ_PLUG_RELEASE_INDEFINITE: @@ -190,7 +193,7 @@ static int plug_change(struct Qdisc *sch, struct nlattr *opt) q->pkts_to_release = 0; q->pkts_last_epoch = 0; q->pkts_current_epoch = 0; - qdisc_unthrottled(sch); + q->throttled = false; netif_schedule_queue(sch->dev_queue); break; case TCQ_PLUG_LIMIT: diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index fee1b15506b2..8f575899adfa 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -67,7 +67,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) } static int -prio_enqueue(struct sk_buff *skb, struct Qdisc *sch) +prio_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct Qdisc *qdisc; int ret; @@ -83,8 +83,9 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch) } #endif - ret = qdisc_enqueue(skb, qdisc); + ret = qdisc_enqueue(skb, qdisc, to_free); if (ret == NET_XMIT_SUCCESS) { + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; } @@ -117,6 +118,7 @@ static struct sk_buff *prio_dequeue(struct Qdisc *sch) struct sk_buff *skb = qdisc_dequeue_peeked(qdisc); if (skb) { qdisc_bstats_update(sch, skb); + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; return skb; } @@ -125,24 +127,6 @@ static struct sk_buff *prio_dequeue(struct Qdisc *sch) } -static unsigned int prio_drop(struct Qdisc *sch) -{ - struct prio_sched_data *q = qdisc_priv(sch); - int prio; - unsigned int len; - struct Qdisc *qdisc; - - for (prio = q->bands-1; prio >= 0; prio--) { - qdisc = q->queues[prio]; - if (qdisc->ops->drop && (len = qdisc->ops->drop(qdisc)) != 0) { - sch->q.qlen--; - return len; - } - } - return 0; -} - - static void prio_reset(struct Qdisc *sch) { @@ -151,6 +135,7 @@ prio_reset(struct Qdisc *sch) for (prio = 0; prio < q->bands; prio++) qdisc_reset(q->queues[prio]); + sch->qstats.backlog = 0; sch->q.qlen = 0; } @@ -168,8 +153,9 @@ prio_destroy(struct Qdisc *sch) static int prio_tune(struct Qdisc *sch, struct nlattr *opt) { struct prio_sched_data *q = qdisc_priv(sch); + struct Qdisc *queues[TCQ_PRIO_BANDS]; + int oldbands = q->bands, i; struct tc_prio_qopt *qopt; - int i; if (nla_len(opt) < sizeof(*qopt)) return -EINVAL; @@ -183,62 +169,42 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) return -EINVAL; } + /* Before commit, make sure we can allocate all new qdiscs */ + for (i = oldbands; i < qopt->bands; i++) { + queues[i] = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, + TC_H_MAKE(sch->handle, i + 1)); + if (!queues[i]) { + while (i > oldbands) + qdisc_destroy(queues[--i]); + return -ENOMEM; + } + } + sch_tree_lock(sch); q->bands = qopt->bands; memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); - for (i = q->bands; i < TCQ_PRIO_BANDS; i++) { + for (i = q->bands; i < oldbands; i++) { struct Qdisc *child = q->queues[i]; - q->queues[i] = &noop_qdisc; - if (child != &noop_qdisc) { - qdisc_tree_reduce_backlog(child, child->q.qlen, child->qstats.backlog); - qdisc_destroy(child); - } - } - sch_tree_unlock(sch); - for (i = 0; i < q->bands; i++) { - if (q->queues[i] == &noop_qdisc) { - struct Qdisc *child, *old; - - child = qdisc_create_dflt(sch->dev_queue, - &pfifo_qdisc_ops, - TC_H_MAKE(sch->handle, i + 1)); - if (child) { - sch_tree_lock(sch); - old = q->queues[i]; - q->queues[i] = child; - - if (old != &noop_qdisc) { - qdisc_tree_reduce_backlog(old, - old->q.qlen, - old->qstats.backlog); - qdisc_destroy(old); - } - sch_tree_unlock(sch); - } - } + qdisc_tree_reduce_backlog(child, child->q.qlen, + child->qstats.backlog); + qdisc_destroy(child); } + + for (i = oldbands; i < q->bands; i++) + q->queues[i] = queues[i]; + + sch_tree_unlock(sch); return 0; } static int prio_init(struct Qdisc *sch, struct nlattr *opt) { - struct prio_sched_data *q = qdisc_priv(sch); - int i; - - for (i = 0; i < TCQ_PRIO_BANDS; i++) - q->queues[i] = &noop_qdisc; - - if (opt == NULL) { + if (!opt) return -EINVAL; - } else { - int err; - if ((err = prio_tune(sch, opt)) != 0) - return err; - } - return 0; + return prio_tune(sch, opt); } static int prio_dump(struct Qdisc *sch, struct sk_buff *skb) @@ -319,7 +285,8 @@ static int prio_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct Qdisc *cl_q; cl_q = q->queues[cl - 1]; - if (gnet_stats_copy_basic(d, NULL, &cl_q->bstats) < 0 || + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), + d, NULL, &cl_q->bstats) < 0 || gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0) return -1; @@ -378,7 +345,6 @@ static struct Qdisc_ops prio_qdisc_ops __read_mostly = { .enqueue = prio_enqueue, .dequeue = prio_dequeue, .peek = prio_peek, - .drop = prio_drop, .init = prio_init, .reset = prio_reset, .destroy = prio_destroy, diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 8d2d8d953432..f27ffee106f6 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -460,7 +460,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (tca[TCA_RATE]) { err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est, - qdisc_root_sleeping_lock(sch), + NULL, + qdisc_root_sleeping_running(sch), tca[TCA_RATE]); if (err) return err; @@ -486,7 +487,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (tca[TCA_RATE]) { err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est, - qdisc_root_sleeping_lock(sch), + NULL, + qdisc_root_sleeping_running(sch), tca[TCA_RATE]); if (err) goto destroy_class; @@ -663,7 +665,8 @@ static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg, xstats.weight = cl->agg->class_weight; xstats.lmax = cl->agg->lmax; - if (gnet_stats_copy_basic(d, NULL, &cl->bstats) < 0 || + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), + d, NULL, &cl->bstats) < 0 || gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 || gnet_stats_copy_queue(d, NULL, &cl->qdisc->qstats, cl->qdisc->q.qlen) < 0) @@ -1214,7 +1217,8 @@ static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *q) return agg; } -static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) +static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) { struct qfq_sched *q = qdisc_priv(sch); struct qfq_class *cl; @@ -1235,11 +1239,13 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) cl->agg->lmax, qdisc_pkt_len(skb), cl->common.classid); err = qfq_change_agg(sch, cl, cl->agg->class_weight, qdisc_pkt_len(skb)); - if (err) - return err; + if (err) { + cl->qstats.drops++; + return qdisc_drop(skb, sch, to_free); + } } - err = qdisc_enqueue(skb, cl->qdisc); + err = qdisc_enqueue(skb, cl->qdisc, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { pr_debug("qfq_enqueue: enqueue failed %d\n", err); if (net_xmit_drop_count(err)) { @@ -1420,52 +1426,6 @@ static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg) qfq_deactivate_class(q, cl); } -static unsigned int qfq_drop_from_slot(struct qfq_sched *q, - struct hlist_head *slot) -{ - struct qfq_aggregate *agg; - struct qfq_class *cl; - unsigned int len; - - hlist_for_each_entry(agg, slot, next) { - list_for_each_entry(cl, &agg->active, alist) { - - if (!cl->qdisc->ops->drop) - continue; - - len = cl->qdisc->ops->drop(cl->qdisc); - if (len > 0) { - if (cl->qdisc->q.qlen == 0) - qfq_deactivate_class(q, cl); - - return len; - } - } - } - return 0; -} - -static unsigned int qfq_drop(struct Qdisc *sch) -{ - struct qfq_sched *q = qdisc_priv(sch); - struct qfq_group *grp; - unsigned int i, j, len; - - for (i = 0; i <= QFQ_MAX_INDEX; i++) { - grp = &q->groups[i]; - for (j = 0; j < QFQ_MAX_SLOTS; j++) { - len = qfq_drop_from_slot(q, &grp->slots[j]); - if (len > 0) { - sch->q.qlen--; - return len; - } - } - - } - - return 0; -} - static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt) { struct qfq_sched *q = qdisc_priv(sch); @@ -1560,7 +1520,6 @@ static struct Qdisc_ops qfq_qdisc_ops __read_mostly = { .enqueue = qfq_enqueue, .dequeue = qfq_dequeue, .peek = qdisc_peek_dequeued, - .drop = qfq_drop, .init = qfq_init_qdisc, .reset = qfq_reset_qdisc, .destroy = qfq_destroy_qdisc, diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 8c0508c0e287..249b2a18acbd 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -56,7 +56,8 @@ static inline int red_use_harddrop(struct red_sched_data *q) return q->flags & TC_RED_HARDDROP; } -static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch) +static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) { struct red_sched_data *q = qdisc_priv(sch); struct Qdisc *child = q->qdisc; @@ -95,8 +96,9 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch) break; } - ret = qdisc_enqueue(skb, child); + ret = qdisc_enqueue(skb, child, to_free); if (likely(ret == NET_XMIT_SUCCESS)) { + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; } else if (net_xmit_drop_count(ret)) { q->stats.pdrop++; @@ -105,7 +107,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch) return ret; congestion_drop: - qdisc_drop(skb, sch); + qdisc_drop(skb, sch, to_free); return NET_XMIT_CN; } @@ -118,6 +120,7 @@ static struct sk_buff *red_dequeue(struct Qdisc *sch) skb = child->dequeue(child); if (skb) { qdisc_bstats_update(sch, skb); + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; } else { if (!red_is_idling(&q->vars)) @@ -134,30 +137,12 @@ static struct sk_buff *red_peek(struct Qdisc *sch) return child->ops->peek(child); } -static unsigned int red_drop(struct Qdisc *sch) -{ - struct red_sched_data *q = qdisc_priv(sch); - struct Qdisc *child = q->qdisc; - unsigned int len; - - if (child->ops->drop && (len = child->ops->drop(child)) > 0) { - q->stats.other++; - qdisc_qstats_drop(sch); - sch->q.qlen--; - return len; - } - - if (!red_is_idling(&q->vars)) - red_start_of_idle_period(&q->vars); - - return 0; -} - static void red_reset(struct Qdisc *sch) { struct red_sched_data *q = qdisc_priv(sch); qdisc_reset(q->qdisc); + sch->qstats.backlog = 0; sch->q.qlen = 0; red_restart(&q->vars); } @@ -361,7 +346,6 @@ static struct Qdisc_ops red_qdisc_ops __read_mostly = { .enqueue = red_enqueue, .dequeue = red_dequeue, .peek = red_peek, - .drop = red_drop, .init = red_init, .reset = red_reset, .destroy = red_destroy, diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index c69611640fa5..add3cc7d37ec 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -275,7 +275,8 @@ static bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl, return false; } -static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch) +static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) { struct sfb_sched_data *q = qdisc_priv(sch); @@ -397,7 +398,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch) } enqueue: - ret = qdisc_enqueue(skb, child); + ret = qdisc_enqueue(skb, child, to_free); if (likely(ret == NET_XMIT_SUCCESS)) { sch->q.qlen++; increment_qlen(skb, q); @@ -408,7 +409,7 @@ enqueue: return ret; drop: - qdisc_drop(skb, sch); + qdisc_drop(skb, sch, to_free); return NET_XMIT_CN; other_drop: if (ret & __NET_XMIT_BYPASS) diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 498f0a2cb47f..7f195ed4d568 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -343,7 +343,7 @@ static int sfq_headdrop(const struct sfq_sched_data *q) } static int -sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) +sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct sfq_sched_data *q = qdisc_priv(sch); unsigned int hash, dropped; @@ -367,7 +367,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (x == SFQ_EMPTY_SLOT) { x = q->dep[0].next; /* get a free slot */ if (x >= SFQ_MAX_FLOWS) - return qdisc_drop(skb, sch); + return qdisc_drop(skb, sch, to_free); q->ht[hash] = x; slot = &q->slots[x]; slot->hash = hash; @@ -424,14 +424,14 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (slot->qlen >= q->maxdepth) { congestion_drop: if (!sfq_headdrop(q)) - return qdisc_drop(skb, sch); + return qdisc_drop(skb, sch, to_free); /* We know we have at least one packet in queue */ head = slot_dequeue_head(slot); delta = qdisc_pkt_len(head) - qdisc_pkt_len(skb); sch->qstats.backlog -= delta; slot->backlog -= delta; - qdisc_drop(head, sch); + qdisc_drop(head, sch, to_free); slot_queue_add(slot, skb); return NET_XMIT_CN; @@ -520,7 +520,7 @@ sfq_reset(struct Qdisc *sch) struct sk_buff *skb; while ((skb = sfq_dequeue(sch)) != NULL) - kfree_skb(skb); + rtnl_kfree_skbs(skb, skb); } /* @@ -896,7 +896,6 @@ static struct Qdisc_ops sfq_qdisc_ops __read_mostly = { .enqueue = sfq_enqueue, .dequeue = sfq_dequeue, .peek = qdisc_peek_dequeued, - .drop = sfq_drop, .init = sfq_init, .reset = sfq_reset, .destroy = sfq_destroy, diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 83b90b584fae..303355c449ab 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -155,7 +155,8 @@ static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb) /* GSO packet is too big, segment it so that tbf can transmit * each segment in time */ -static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch) +static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) { struct tbf_sched_data *q = qdisc_priv(sch); struct sk_buff *segs, *nskb; @@ -166,7 +167,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch) segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); if (IS_ERR_OR_NULL(segs)) - return qdisc_reshape_fail(skb, sch); + return qdisc_drop(skb, sch, to_free); nb = 0; while (segs) { @@ -174,7 +175,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch) segs->next = NULL; qdisc_skb_cb(segs)->pkt_len = segs->len; len += segs->len; - ret = qdisc_enqueue(segs, q->qdisc); + ret = qdisc_enqueue(segs, q->qdisc, to_free); if (ret != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(ret)) qdisc_qstats_drop(sch); @@ -190,39 +191,29 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch) return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP; } -static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch) +static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) { struct tbf_sched_data *q = qdisc_priv(sch); int ret; if (qdisc_pkt_len(skb) > q->max_size) { if (skb_is_gso(skb) && skb_gso_mac_seglen(skb) <= q->max_size) - return tbf_segment(skb, sch); - return qdisc_reshape_fail(skb, sch); + return tbf_segment(skb, sch, to_free); + return qdisc_drop(skb, sch, to_free); } - ret = qdisc_enqueue(skb, q->qdisc); + ret = qdisc_enqueue(skb, q->qdisc, to_free); if (ret != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(ret)) qdisc_qstats_drop(sch); return ret; } + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; } -static unsigned int tbf_drop(struct Qdisc *sch) -{ - struct tbf_sched_data *q = qdisc_priv(sch); - unsigned int len = 0; - - if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) { - sch->q.qlen--; - qdisc_qstats_drop(sch); - } - return len; -} - static bool tbf_peak_present(const struct tbf_sched_data *q) { return q->peak.rate_bytes_ps; @@ -263,15 +254,14 @@ static struct sk_buff *tbf_dequeue(struct Qdisc *sch) q->t_c = now; q->tokens = toks; q->ptokens = ptoks; + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; - qdisc_unthrottled(sch); qdisc_bstats_update(sch, skb); return skb; } qdisc_watchdog_schedule_ns(&q->watchdog, - now + max_t(long, -toks, -ptoks), - true); + now + max_t(long, -toks, -ptoks)); /* Maybe we have a shorter packet in the queue, which can be sent now. It sounds cool, @@ -294,6 +284,7 @@ static void tbf_reset(struct Qdisc *sch) struct tbf_sched_data *q = qdisc_priv(sch); qdisc_reset(q->qdisc); + sch->qstats.backlog = 0; sch->q.qlen = 0; q->t_c = ktime_get_ns(); q->tokens = q->buffer; @@ -555,7 +546,6 @@ static struct Qdisc_ops tbf_qdisc_ops __read_mostly = { .enqueue = tbf_enqueue, .dequeue = tbf_dequeue, .peek = qdisc_peek_dequeued, - .drop = tbf_drop, .init = tbf_init, .reset = tbf_reset, .destroy = tbf_destroy, diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index e02687185a59..2cd9b4478b92 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -77,7 +77,7 @@ struct teql_sched_data { /* "teql*" qdisc routines */ static int -teql_enqueue(struct sk_buff *skb, struct Qdisc *sch) +teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct net_device *dev = qdisc_dev(sch); struct teql_sched_data *q = qdisc_priv(sch); @@ -87,7 +87,7 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc *sch) return NET_XMIT_SUCCESS; } - return qdisc_drop(skb, sch); + return qdisc_drop(skb, sch, to_free); } static struct sk_buff * |