diff options
Diffstat (limited to 'net')
36 files changed, 2113 insertions, 307 deletions
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 9e14767500ea..00573cc46c98 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -915,7 +915,8 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br, /* Add new permanent fdb entry with RTM_NEWNEIGH */ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, - const unsigned char *addr, u16 vid, u16 nlh_flags) + const unsigned char *addr, u16 vid, u16 nlh_flags, + struct netlink_ext_ack *extack) { struct net_bridge_vlan_group *vg; struct net_bridge_port *p = NULL; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index eabf8bf28a3f..00deef7fc1f3 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -573,7 +573,8 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid); int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev, - const unsigned char *addr, u16 vid, u16 nlh_flags); + const unsigned char *addr, u16 vid, u16 nlh_flags, + struct netlink_ext_ack *extack); int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *fdev, int *idx); int br_fdb_get(struct sk_buff *skb, struct nlattr *tb[], struct net_device *dev, diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 035ff59d9cbd..4d2b9eb7604a 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -113,7 +113,7 @@ br_switchdev_fdb_call_notifiers(bool adding, const unsigned char *mac, info.added_by_user = added_by_user; info.offloaded = offloaded; notifier_type = adding ? SWITCHDEV_FDB_ADD_TO_DEVICE : SWITCHDEV_FDB_DEL_TO_DEVICE; - call_switchdev_notifiers(notifier_type, dev, &info.info); + call_switchdev_notifiers(notifier_type, dev, &info.info, NULL); } void diff --git a/net/core/devlink.c b/net/core/devlink.c index abb0da9d7b4b..60248a53c0ad 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -3597,6 +3597,1012 @@ out: return 0; } +#define DEVLINK_HEALTH_BUFFER_SIZE (4096 - GENL_HDRLEN) +#define DEVLINK_HEALTH_BUFFER_DATA_SIZE (DEVLINK_HEALTH_BUFFER_SIZE / 2) +#define DEVLINK_HEALTH_SIZE_TO_BUFFERS(size) DIV_ROUND_UP(size, DEVLINK_HEALTH_BUFFER_DATA_SIZE) +#define DEVLINK_HEALTH_BUFFER_MAX_CHUNK 1024 + +struct devlink_health_buffer { + void *data; + u64 offset; + u64 bytes_left; + u64 bytes_left_metadata; + u64 max_nested_depth; + u64 curr_nest; +}; + +struct devlink_health_buffer_desc { + int attrtype; + u16 len; + u8 nla_type; + u8 nest_end; + int value[0]; +}; + +static void +devlink_health_buffers_reset(struct devlink_health_buffer **buffers_list, + u64 num_of_buffers) +{ + u64 i; + + for (i = 0; i < num_of_buffers; i++) { + memset(buffers_list[i]->data, 0, DEVLINK_HEALTH_BUFFER_SIZE); + buffers_list[i]->offset = 0; + buffers_list[i]->bytes_left = DEVLINK_HEALTH_BUFFER_DATA_SIZE; + buffers_list[i]->bytes_left_metadata = + DEVLINK_HEALTH_BUFFER_DATA_SIZE; + buffers_list[i]->max_nested_depth = 0; + buffers_list[i]->curr_nest = 0; + } +} + +static void +devlink_health_buffers_destroy(struct devlink_health_buffer **buffers_list, + u64 size); + +static struct devlink_health_buffer ** +devlink_health_buffers_create(u64 size) +{ + struct devlink_health_buffer **buffers_list; + u64 num_of_buffers = DEVLINK_HEALTH_SIZE_TO_BUFFERS(size); + u64 i; + + buffers_list = kcalloc(num_of_buffers, + sizeof(struct devlink_health_buffer *), + GFP_KERNEL); + if (!buffers_list) + return NULL; + + for (i = 0; i < num_of_buffers; i++) { + struct devlink_health_buffer *buffer; + void *data; + + buffer = kzalloc(sizeof(*buffer), GFP_KERNEL); + data = kzalloc(DEVLINK_HEALTH_BUFFER_SIZE, GFP_KERNEL); + if (!buffer || !data) { + kfree(buffer); + kfree(data); + goto buffers_cleanup; + } + buffers_list[i] = buffer; + buffer->data = data; + } + devlink_health_buffers_reset(buffers_list, num_of_buffers); + + return buffers_list; + +buffers_cleanup: + devlink_health_buffers_destroy(buffers_list, --i); + kfree(buffers_list); + return NULL; +} + +static void +devlink_health_buffers_destroy(struct devlink_health_buffer **buffers_list, + u64 num_of_buffers) +{ + u64 i; + + for (i = 0; i < num_of_buffers; i++) { + kfree(buffers_list[i]->data); + kfree(buffers_list[i]); + } +} + +void +devlink_health_buffer_offset_inc(struct devlink_health_buffer *buffer, + int len) +{ + buffer->offset += len; +} + +/* In order to store a nest, need two descriptors, for start and end */ +#define DEVLINK_HEALTH_BUFFER_NEST_SIZE (sizeof(struct devlink_health_buffer_desc) * 2) + +int devlink_health_buffer_verify_len(struct devlink_health_buffer *buffer, + int len, int metadata_len) +{ + if (len > DEVLINK_HEALTH_BUFFER_DATA_SIZE) + return -EINVAL; + + if (buffer->bytes_left < len || + buffer->bytes_left_metadata < metadata_len) + return -ENOMEM; + + return 0; +} + +static struct devlink_health_buffer_desc * +devlink_health_buffer_get_desc_from_offset(struct devlink_health_buffer *buffer) +{ + return buffer->data + buffer->offset; +} + +int +devlink_health_buffer_nest_start(struct devlink_health_buffer *buffer, + int attrtype) +{ + struct devlink_health_buffer_desc *desc; + int err; + + err = devlink_health_buffer_verify_len(buffer, 0, + DEVLINK_HEALTH_BUFFER_NEST_SIZE); + if (err) + return err; + + if (attrtype != DEVLINK_ATTR_HEALTH_BUFFER_OBJECT && + attrtype != DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR && + attrtype != DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE && + attrtype != DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_ARRAY) + return -EINVAL; + + desc = devlink_health_buffer_get_desc_from_offset(buffer); + + desc->attrtype = attrtype; + buffer->bytes_left_metadata -= DEVLINK_HEALTH_BUFFER_NEST_SIZE; + devlink_health_buffer_offset_inc(buffer, sizeof(*desc)); + + buffer->curr_nest++; + buffer->max_nested_depth = max(buffer->max_nested_depth, + buffer->curr_nest); + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_health_buffer_nest_start); + +enum devlink_health_buffer_nest_end_cancel { + DEVLINK_HEALTH_BUFFER_NEST_END = 1, + DEVLINK_HEALTH_BUFFER_NEST_CANCEL, +}; + +static void +devlink_health_buffer_nest_end_cancel(struct devlink_health_buffer *buffer, + enum devlink_health_buffer_nest_end_cancel nest) +{ + struct devlink_health_buffer_desc *desc; + + WARN_ON(!buffer->curr_nest); + buffer->curr_nest--; + + desc = devlink_health_buffer_get_desc_from_offset(buffer); + desc->nest_end = nest; + devlink_health_buffer_offset_inc(buffer, sizeof(*desc)); +} + +void devlink_health_buffer_nest_end(struct devlink_health_buffer *buffer) +{ + devlink_health_buffer_nest_end_cancel(buffer, + DEVLINK_HEALTH_BUFFER_NEST_END); +} +EXPORT_SYMBOL_GPL(devlink_health_buffer_nest_end); + +void devlink_health_buffer_nest_cancel(struct devlink_health_buffer *buffer) +{ + devlink_health_buffer_nest_end_cancel(buffer, + DEVLINK_HEALTH_BUFFER_NEST_CANCEL); +} +EXPORT_SYMBOL_GPL(devlink_health_buffer_nest_cancel); + +int +devlink_health_buffer_put_object_name(struct devlink_health_buffer *buffer, + char *name) +{ + struct devlink_health_buffer_desc *desc; + int err; + + err = devlink_health_buffer_verify_len(buffer, strlen(name) + 1, + sizeof(*desc)); + if (err) + return err; + + desc = devlink_health_buffer_get_desc_from_offset(buffer); + desc->attrtype = DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_NAME; + desc->nla_type = NLA_NUL_STRING; + desc->len = strlen(name) + 1; + memcpy(&desc->value, name, desc->len); + devlink_health_buffer_offset_inc(buffer, sizeof(*desc) + desc->len); + + buffer->bytes_left_metadata -= sizeof(*desc); + buffer->bytes_left -= (strlen(name) + 1); + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_health_buffer_put_object_name); + +static int +devlink_health_buffer_put_value(struct devlink_health_buffer *buffer, + u8 nla_type, void *value, int len) +{ + struct devlink_health_buffer_desc *desc; + int err; + + err = devlink_health_buffer_verify_len(buffer, len, sizeof(*desc)); + if (err) + return err; + + desc = devlink_health_buffer_get_desc_from_offset(buffer); + desc->attrtype = DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA; + desc->nla_type = nla_type; + desc->len = len; + memcpy(&desc->value, value, len); + devlink_health_buffer_offset_inc(buffer, sizeof(*desc) + desc->len); + + buffer->bytes_left_metadata -= sizeof(*desc); + buffer->bytes_left -= len; + + return 0; +} + +int +devlink_health_buffer_put_value_u8(struct devlink_health_buffer *buffer, + u8 value) +{ + int err; + + err = devlink_health_buffer_put_value(buffer, NLA_U8, &value, + sizeof(value)); + if (err) + return err; + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_health_buffer_put_value_u8); + +int +devlink_health_buffer_put_value_u32(struct devlink_health_buffer *buffer, + u32 value) +{ + int err; + + err = devlink_health_buffer_put_value(buffer, NLA_U32, &value, + sizeof(value)); + if (err) + return err; + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_health_buffer_put_value_u32); + +int +devlink_health_buffer_put_value_u64(struct devlink_health_buffer *buffer, + u64 value) +{ + int err; + + err = devlink_health_buffer_put_value(buffer, NLA_U64, &value, + sizeof(value)); + if (err) + return err; + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_health_buffer_put_value_u64); + +int +devlink_health_buffer_put_value_string(struct devlink_health_buffer *buffer, + char *name) +{ + int err; + + if (strlen(name) + 1 > DEVLINK_HEALTH_BUFFER_MAX_CHUNK) + return -EINVAL; + + err = devlink_health_buffer_put_value(buffer, NLA_NUL_STRING, name, + strlen(name) + 1); + if (err) + return err; + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_health_buffer_put_value_string); + +int +devlink_health_buffer_put_value_data(struct devlink_health_buffer *buffer, + void *data, int len) +{ + int err; + + if (len > DEVLINK_HEALTH_BUFFER_MAX_CHUNK) + return -EINVAL; + + err = devlink_health_buffer_put_value(buffer, NLA_BINARY, data, len); + if (err) + return err; + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_health_buffer_put_value_data); + +static int +devlink_health_buffer_fill_data(struct sk_buff *skb, + struct devlink_health_buffer_desc *desc) +{ + int err = -EINVAL; + + switch (desc->nla_type) { + case NLA_U8: + err = nla_put_u8(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA, + *(u8 *)desc->value); + break; + case NLA_U32: + err = nla_put_u32(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA, + *(u32 *)desc->value); + break; + case NLA_U64: + err = nla_put_u64_64bit(skb, + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA, + *(u64 *)desc->value, DEVLINK_ATTR_PAD); + break; + case NLA_NUL_STRING: + err = nla_put_string(skb, + DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA, + (char *)&desc->value); + break; + case NLA_BINARY: + err = nla_put(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA, + desc->len, (void *)&desc->value); + break; + } + + return err; +} + +static int +devlink_health_buffer_fill_type(struct sk_buff *skb, + struct devlink_health_buffer_desc *desc) +{ + int err = -EINVAL; + + switch (desc->nla_type) { + case NLA_U8: + err = nla_put_u8(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE, + NLA_U8); + break; + case NLA_U32: + err = nla_put_u8(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE, + NLA_U32); + break; + case NLA_U64: + err = nla_put_u8(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE, + NLA_U64); + break; + case NLA_NUL_STRING: + err = nla_put_u8(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE, + NLA_NUL_STRING); + break; + case NLA_BINARY: + err = nla_put_u8(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE, + NLA_BINARY); + break; + } + + return err; +} + +static inline struct devlink_health_buffer_desc * +devlink_health_buffer_get_next_desc(struct devlink_health_buffer_desc *desc) +{ + return (void *)&desc->value + desc->len; +} + +static int +devlink_health_buffer_prepare_skb(struct sk_buff *skb, + struct devlink_health_buffer *buffer) +{ + struct devlink_health_buffer_desc *last_desc, *desc; + struct nlattr **buffer_nlattr; + int err; + int i = 0; + + buffer_nlattr = kcalloc(buffer->max_nested_depth, + sizeof(*buffer_nlattr), GFP_KERNEL); + if (!buffer_nlattr) + return -EINVAL; + + last_desc = devlink_health_buffer_get_desc_from_offset(buffer); + desc = buffer->data; + while (desc != last_desc) { + switch (desc->attrtype) { + case DEVLINK_ATTR_HEALTH_BUFFER_OBJECT: + case DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR: + case DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE: + case DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_ARRAY: + buffer_nlattr[i] = nla_nest_start(skb, desc->attrtype); + if (!buffer_nlattr[i]) + goto nla_put_failure; + i++; + break; + case DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA: + err = devlink_health_buffer_fill_data(skb, desc); + if (err) + goto nla_put_failure; + err = devlink_health_buffer_fill_type(skb, desc); + if (err) + goto nla_put_failure; + break; + case DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_NAME: + err = nla_put_string(skb, desc->attrtype, + (char *)&desc->value); + if (err) + goto nla_put_failure; + break; + default: + WARN_ON(!desc->nest_end); + WARN_ON(i <= 0); + if (desc->nest_end == DEVLINK_HEALTH_BUFFER_NEST_END) + nla_nest_end(skb, buffer_nlattr[--i]); + else + nla_nest_cancel(skb, buffer_nlattr[--i]); + break; + } + desc = devlink_health_buffer_get_next_desc(desc); + } + + return 0; + +nla_put_failure: + kfree(buffer_nlattr); + return err; +} + +static int +devlink_health_buffer_snd(struct genl_info *info, + enum devlink_command cmd, int flags, + struct devlink_health_buffer **buffers_array, + u64 num_of_buffers) +{ + struct sk_buff *skb; + struct nlmsghdr *nlh; + void *hdr; + int err; + u64 i; + + for (i = 0; i < num_of_buffers; i++) { + /* Skip buffer if driver did not fill it up with any data */ + if (!buffers_array[i]->offset) + continue; + + skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq, + &devlink_nl_family, NLM_F_MULTI, cmd); + if (!hdr) + goto nla_put_failure; + + err = devlink_health_buffer_prepare_skb(skb, buffers_array[i]); + if (err) + goto nla_put_failure; + + genlmsg_end(skb, hdr); + err = genlmsg_reply(skb, info); + if (err) + return err; + } + + skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb) + return -ENOMEM; + nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq, + NLMSG_DONE, 0, flags | NLM_F_MULTI); + err = genlmsg_reply(skb, info); + if (err) + return err; + + return 0; + +nla_put_failure: + err = -EIO; + nlmsg_free(skb); + return err; +} + +struct devlink_health_reporter { + struct list_head list; + struct devlink_health_buffer **dump_buffers_array; + struct mutex dump_lock; /* lock parallel read/write from dump buffers */ + struct devlink_health_buffer **diagnose_buffers_array; + struct mutex diagnose_lock; /* lock parallel read/write from diagnose buffers */ + void *priv; + const struct devlink_health_reporter_ops *ops; + struct devlink *devlink; + u64 graceful_period; + bool auto_recover; + u8 health_state; + u8 dump_avail; + u64 dump_ts; + u64 error_count; + u64 recovery_count; + u64 last_recovery_ts; +}; + +enum devlink_health_reporter_state { + DEVLINK_HEALTH_REPORTER_STATE_HEALTHY, + DEVLINK_HEALTH_REPORTER_STATE_ERROR, +}; + +void * +devlink_health_reporter_priv(struct devlink_health_reporter *reporter) +{ + return reporter->priv; +} +EXPORT_SYMBOL_GPL(devlink_health_reporter_priv); + +static struct devlink_health_reporter * +devlink_health_reporter_find_by_name(struct devlink *devlink, + const char *reporter_name) +{ + struct devlink_health_reporter *reporter; + + list_for_each_entry(reporter, &devlink->reporter_list, list) + if (!strcmp(reporter->ops->name, reporter_name)) + return reporter; + return NULL; +} + +/** + * devlink_health_reporter_create - create devlink health reporter + * + * @devlink: devlink + * @ops: ops + * @graceful_period: to avoid recovery loops, in msecs + * @auto_recover: auto recover when error occurs + * @priv: priv + */ +struct devlink_health_reporter * +devlink_health_reporter_create(struct devlink *devlink, + const struct devlink_health_reporter_ops *ops, + u64 graceful_period, bool auto_recover, + void *priv) +{ + struct devlink_health_reporter *reporter; + + mutex_lock(&devlink->lock); + if (devlink_health_reporter_find_by_name(devlink, ops->name)) { + reporter = ERR_PTR(-EEXIST); + goto unlock; + } + + if (WARN_ON(ops->dump && !ops->dump_size) || + WARN_ON(ops->diagnose && !ops->diagnose_size) || + WARN_ON(auto_recover && !ops->recover) || + WARN_ON(graceful_period && !ops->recover)) { + reporter = ERR_PTR(-EINVAL); + goto unlock; + } + + reporter = kzalloc(sizeof(*reporter), GFP_KERNEL); + if (!reporter) { + reporter = ERR_PTR(-ENOMEM); + goto unlock; + } + + if (ops->dump) { + reporter->dump_buffers_array = + devlink_health_buffers_create(ops->dump_size); + if (!reporter->dump_buffers_array) { + kfree(reporter); + reporter = ERR_PTR(-ENOMEM); + goto unlock; + } + } + + if (ops->diagnose) { + reporter->diagnose_buffers_array = + devlink_health_buffers_create(ops->diagnose_size); + if (!reporter->diagnose_buffers_array) { + devlink_health_buffers_destroy(reporter->dump_buffers_array, + DEVLINK_HEALTH_SIZE_TO_BUFFERS(ops->dump_size)); + kfree(reporter); + reporter = ERR_PTR(-ENOMEM); + goto unlock; + } + } + + list_add_tail(&reporter->list, &devlink->reporter_list); + mutex_init(&reporter->dump_lock); + mutex_init(&reporter->diagnose_lock); + + reporter->priv = priv; + reporter->ops = ops; + reporter->devlink = devlink; + reporter->graceful_period = graceful_period; + reporter->auto_recover = auto_recover; +unlock: + mutex_unlock(&devlink->lock); + return reporter; +} +EXPORT_SYMBOL_GPL(devlink_health_reporter_create); + +/** + * devlink_health_reporter_destroy - destroy devlink health reporter + * + * @reporter: devlink health reporter to destroy + */ +void +devlink_health_reporter_destroy(struct devlink_health_reporter *reporter) +{ + mutex_lock(&reporter->devlink->lock); + list_del(&reporter->list); + devlink_health_buffers_destroy(reporter->dump_buffers_array, + DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->dump_size)); + devlink_health_buffers_destroy(reporter->diagnose_buffers_array, + DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->diagnose_size)); + kfree(reporter); + mutex_unlock(&reporter->devlink->lock); +} +EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy); + +static int +devlink_health_reporter_recover(struct devlink_health_reporter *reporter, + void *priv_ctx) +{ + int err; + + if (!reporter->ops->recover) + return -EOPNOTSUPP; + + err = reporter->ops->recover(reporter, priv_ctx); + if (err) + return err; + + reporter->recovery_count++; + reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY; + reporter->last_recovery_ts = jiffies; + + return 0; +} + +static int devlink_health_do_dump(struct devlink_health_reporter *reporter, + void *priv_ctx) +{ + int err; + + if (!reporter->ops->dump) + return 0; + + if (reporter->dump_avail) + return 0; + + devlink_health_buffers_reset(reporter->dump_buffers_array, + DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->dump_size)); + err = reporter->ops->dump(reporter, reporter->dump_buffers_array, + DEVLINK_HEALTH_BUFFER_SIZE, + DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->dump_size), + priv_ctx); + if (!err) { + reporter->dump_avail = true; + reporter->dump_ts = jiffies; + } + + return err; +} + +int devlink_health_report(struct devlink_health_reporter *reporter, + const char *msg, void *priv_ctx) +{ + struct devlink *devlink = reporter->devlink; + int err = 0; + + /* write a log message of the current error */ + WARN_ON(!msg); + trace_devlink_health_report(devlink, reporter->ops->name, msg); + reporter->error_count++; + + /* abort if the previous error wasn't recovered */ + if (reporter->auto_recover && + (reporter->health_state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY || + jiffies - reporter->last_recovery_ts < + msecs_to_jiffies(reporter->graceful_period))) { + trace_devlink_health_recover_aborted(devlink, + reporter->ops->name, + reporter->health_state, + jiffies - + reporter->last_recovery_ts); + return -ECANCELED; + } + + reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR; + + mutex_lock(&reporter->dump_lock); + /* store current dump of current error, for later analysis */ + devlink_health_do_dump(reporter, priv_ctx); + mutex_unlock(&reporter->dump_lock); + + if (reporter->auto_recover) + err = devlink_health_reporter_recover(reporter, priv_ctx); + + return err; +} +EXPORT_SYMBOL_GPL(devlink_health_report); + +static struct devlink_health_reporter * +devlink_health_reporter_get_from_info(struct devlink *devlink, + struct genl_info *info) +{ + char *reporter_name; + + if (!info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]) + return NULL; + + reporter_name = + nla_data(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]); + return devlink_health_reporter_find_by_name(devlink, reporter_name); +} + +static int +devlink_nl_health_reporter_fill(struct sk_buff *msg, + struct devlink *devlink, + struct devlink_health_reporter *reporter, + enum devlink_command cmd, u32 portid, + u32 seq, int flags) +{ + struct nlattr *reporter_attr; + void *hdr; + + hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); + if (!hdr) + return -EMSGSIZE; + + if (devlink_nl_put_handle(msg, devlink)) + goto genlmsg_cancel; + + reporter_attr = nla_nest_start(msg, DEVLINK_ATTR_HEALTH_REPORTER); + if (!reporter_attr) + goto genlmsg_cancel; + if (nla_put_string(msg, DEVLINK_ATTR_HEALTH_REPORTER_NAME, + reporter->ops->name)) + goto reporter_nest_cancel; + if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_STATE, + reporter->health_state)) + goto reporter_nest_cancel; + if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_ERR, + reporter->error_count, DEVLINK_ATTR_PAD)) + goto reporter_nest_cancel; + if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_RECOVER, + reporter->recovery_count, DEVLINK_ATTR_PAD)) + goto reporter_nest_cancel; + if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD, + reporter->graceful_period, + DEVLINK_ATTR_PAD)) + goto reporter_nest_cancel; + if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER, + reporter->auto_recover)) + goto reporter_nest_cancel; + if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_AVAIL, + reporter->dump_avail)) + goto reporter_nest_cancel; + if (reporter->dump_avail && + nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS, + jiffies_to_msecs(reporter->dump_ts), + DEVLINK_ATTR_PAD)) + goto reporter_nest_cancel; + + nla_nest_end(msg, reporter_attr); + genlmsg_end(msg, hdr); + return 0; + +reporter_nest_cancel: + nla_nest_end(msg, reporter_attr); +genlmsg_cancel: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_health_reporter *reporter; + struct sk_buff *msg; + int err; + + reporter = devlink_health_reporter_get_from_info(devlink, info); + if (!reporter) + return -EINVAL; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + err = devlink_nl_health_reporter_fill(msg, devlink, reporter, + DEVLINK_CMD_HEALTH_REPORTER_GET, + info->snd_portid, info->snd_seq, + 0); + if (err) { + nlmsg_free(msg); + return err; + } + + return genlmsg_reply(msg, info); +} + +static int +devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) +{ + struct devlink_health_reporter *reporter; + struct devlink *devlink; + int start = cb->args[0]; + int idx = 0; + int err; + + mutex_lock(&devlink_mutex); + list_for_each_entry(devlink, &devlink_list, list) { + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + continue; + mutex_lock(&devlink->lock); + list_for_each_entry(reporter, &devlink->reporter_list, + list) { + if (idx < start) { + idx++; + continue; + } + err = devlink_nl_health_reporter_fill(msg, devlink, + reporter, + DEVLINK_CMD_HEALTH_REPORTER_GET, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI); + if (err) { + mutex_unlock(&devlink->lock); + goto out; + } + idx++; + } + mutex_unlock(&devlink->lock); + } +out: + mutex_unlock(&devlink_mutex); + + cb->args[0] = idx; + return msg->len; +} + +static int +devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_health_reporter *reporter; + + reporter = devlink_health_reporter_get_from_info(devlink, info); + if (!reporter) + return -EINVAL; + + if (!reporter->ops->recover && + (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] || + info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER])) + return -EINVAL; + + if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD]) + reporter->graceful_period = + nla_get_u64(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD]); + + if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]) + reporter->auto_recover = + nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]); + + return 0; +} + +static int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_health_reporter *reporter; + + reporter = devlink_health_reporter_get_from_info(devlink, info); + if (!reporter) + return -EINVAL; + + return devlink_health_reporter_recover(reporter, NULL); +} + +static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_health_reporter *reporter; + u64 num_of_buffers; + int err; + + reporter = devlink_health_reporter_get_from_info(devlink, info); + if (!reporter) + return -EINVAL; + + if (!reporter->ops->diagnose) + return -EOPNOTSUPP; + + num_of_buffers = + DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->diagnose_size); + + mutex_lock(&reporter->diagnose_lock); + devlink_health_buffers_reset(reporter->diagnose_buffers_array, + num_of_buffers); + + err = reporter->ops->diagnose(reporter, + reporter->diagnose_buffers_array, + DEVLINK_HEALTH_BUFFER_SIZE, + num_of_buffers); + if (err) + goto out; + + err = devlink_health_buffer_snd(info, + DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, + 0, reporter->diagnose_buffers_array, + num_of_buffers); + if (err) + goto out; + + mutex_unlock(&reporter->diagnose_lock); + return 0; + +out: + mutex_unlock(&reporter->diagnose_lock); + return err; +} + +static void +devlink_health_dump_clear(struct devlink_health_reporter *reporter) +{ + reporter->dump_avail = false; + reporter->dump_ts = 0; + devlink_health_buffers_reset(reporter->dump_buffers_array, + DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->dump_size)); +} + +static int devlink_nl_cmd_health_reporter_dump_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_health_reporter *reporter; + u64 num_of_buffers; + int err; + + reporter = devlink_health_reporter_get_from_info(devlink, info); + if (!reporter) + return -EINVAL; + + if (!reporter->ops->dump) + return -EOPNOTSUPP; + + num_of_buffers = + DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->dump_size); + + mutex_lock(&reporter->dump_lock); + err = devlink_health_do_dump(reporter, NULL); + if (err) + goto out; + + err = devlink_health_buffer_snd(info, + DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET, + 0, reporter->dump_buffers_array, + num_of_buffers); + +out: + mutex_unlock(&reporter->dump_lock); + return err; +} + +static int +devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_health_reporter *reporter; + + reporter = devlink_health_reporter_get_from_info(devlink, info); + if (!reporter) + return -EINVAL; + + mutex_lock(&reporter->dump_lock); + devlink_health_dump_clear(reporter); + mutex_unlock(&reporter->dump_lock); + return 0; +} + static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING }, @@ -3622,6 +4628,9 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .type = NLA_U8 }, [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32 }, + [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING }, + [DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64 }, + [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 }, }; static const struct genl_ops devlink_nl_ops[] = { @@ -3842,6 +4851,51 @@ static const struct genl_ops devlink_nl_ops[] = { .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, + { + .cmd = DEVLINK_CMD_HEALTH_REPORTER_GET, + .doit = devlink_nl_cmd_health_reporter_get_doit, + .dumpit = devlink_nl_cmd_health_reporter_get_dumpit, + .policy = devlink_nl_policy, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + /* can be retrieved by unprivileged users */ + }, + { + .cmd = DEVLINK_CMD_HEALTH_REPORTER_SET, + .doit = devlink_nl_cmd_health_reporter_set_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, + { + .cmd = DEVLINK_CMD_HEALTH_REPORTER_RECOVER, + .doit = devlink_nl_cmd_health_reporter_recover_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, + { + .cmd = DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, + .doit = devlink_nl_cmd_health_reporter_diagnose_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, + { + .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET, + .doit = devlink_nl_cmd_health_reporter_dump_get_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + DEVLINK_NL_FLAG_NO_LOCK, + }, + { + .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR, + .doit = devlink_nl_cmd_health_reporter_dump_clear_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + DEVLINK_NL_FLAG_NO_LOCK, + }, }; static struct genl_family devlink_nl_family __ro_after_init = { @@ -3882,6 +4936,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size) INIT_LIST_HEAD(&devlink->resource_list); INIT_LIST_HEAD(&devlink->param_list); INIT_LIST_HEAD(&devlink->region_list); + INIT_LIST_HEAD(&devlink->reporter_list); mutex_init(&devlink->lock); return devlink; } diff --git a/net/core/dst.c b/net/core/dst.c index 81ccf20e2826..a263309df115 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -98,8 +98,12 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, struct dst_entry *dst; if (ops->gc && dst_entries_get_fast(ops) > ops->gc_thresh) { - if (ops->gc(ops)) + if (ops->gc(ops)) { + printk_ratelimited(KERN_NOTICE "Route cache is full: " + "consider increasing sysctl " + "net.ipv[4|6].route.max_size.\n"); return NULL; + } } dst = kmem_cache_alloc(ops->kmem_cachep, GFP_ATOMIC); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index b02fb19df2cc..17f36317363d 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -778,6 +778,41 @@ nla_put_failure: return -EMSGSIZE; } +static int rtnl_net_valid_getid_req(struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + int i, err; + + if (!netlink_strict_get_check(skb)) + return nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, + rtnl_net_policy, extack); + + err = nlmsg_parse_strict(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, + rtnl_net_policy, extack); + if (err) + return err; + + for (i = 0; i <= NETNSA_MAX; i++) { + if (!tb[i]) + continue; + + switch (i) { + case NETNSA_PID: + case NETNSA_FD: + case NETNSA_NSID: + case NETNSA_TARGET_NSID: + break; + default: + NL_SET_ERR_MSG(extack, "Unsupported attribute in peer netns getid request"); + return -EINVAL; + } + } + + return 0; +} + static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -793,8 +828,7 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh, struct sk_buff *msg; int err; - err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, - rtnl_net_policy, extack); + err = rtnl_net_valid_getid_req(skb, nlh, tb, extack); if (err < 0) return err; if (tb[NETNSA_PID]) { diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 5ea1bed08ede..f5a98082ac7a 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3242,6 +3242,53 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, return ret; } +static int rtnl_valid_getlink_req(struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + struct ifinfomsg *ifm; + int i, err; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) { + NL_SET_ERR_MSG(extack, "Invalid header for get link"); + return -EINVAL; + } + + if (!netlink_strict_get_check(skb)) + return nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, + extack); + + ifm = nlmsg_data(nlh); + if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags || + ifm->ifi_change) { + NL_SET_ERR_MSG(extack, "Invalid values in header for get link request"); + return -EINVAL; + } + + err = nlmsg_parse_strict(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, + extack); + if (err) + return err; + + for (i = 0; i <= IFLA_MAX; i++) { + if (!tb[i]) + continue; + + switch (i) { + case IFLA_IFNAME: + case IFLA_EXT_MASK: + case IFLA_TARGET_NETNSID: + break; + default: + NL_SET_ERR_MSG(extack, "Unsupported attribute in get link request"); + return -EINVAL; + } + } + + return 0; +} + static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -3256,7 +3303,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh, int err; u32 ext_filter_mask = 0; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack); + err = rtnl_valid_getlink_req(skb, nlh, tb, extack); if (err < 0) return err; @@ -3639,7 +3686,7 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, const struct net_device_ops *ops = br_dev->netdev_ops; err = ops->ndo_fdb_add(ndm, tb, dev, addr, vid, - nlh->nlmsg_flags); + nlh->nlmsg_flags, extack); if (err) goto out; else @@ -3651,7 +3698,8 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, if (dev->netdev_ops->ndo_fdb_add) err = dev->netdev_ops->ndo_fdb_add(ndm, tb, dev, addr, vid, - nlh->nlmsg_flags); + nlh->nlmsg_flags, + extack); else err = ndo_dflt_fdb_add(ndm, tb, dev, addr, vid, nlh->nlmsg_flags); @@ -4901,6 +4949,40 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev, return size; } +static int rtnl_valid_stats_req(const struct nlmsghdr *nlh, bool strict_check, + bool is_dump, struct netlink_ext_ack *extack) +{ + struct if_stats_msg *ifsm; + + if (nlh->nlmsg_len < sizeof(*ifsm)) { + NL_SET_ERR_MSG(extack, "Invalid header for stats dump"); + return -EINVAL; + } + + if (!strict_check) + return 0; + + ifsm = nlmsg_data(nlh); + + /* only requests using strict checks can pass data to influence + * the dump. The legacy exception is filter_mask. + */ + if (ifsm->pad1 || ifsm->pad2 || (is_dump && ifsm->ifindex)) { + NL_SET_ERR_MSG(extack, "Invalid values in header for stats dump request"); + return -EINVAL; + } + if (nlmsg_attrlen(nlh, sizeof(*ifsm))) { + NL_SET_ERR_MSG(extack, "Invalid attributes after stats header"); + return -EINVAL; + } + if (ifsm->filter_mask >= IFLA_STATS_FILTER_BIT(IFLA_STATS_MAX + 1)) { + NL_SET_ERR_MSG(extack, "Invalid stats requested through filter mask"); + return -EINVAL; + } + + return 0; +} + static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -4912,8 +4994,10 @@ static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh, u32 filter_mask; int err; - if (nlmsg_len(nlh) < sizeof(*ifsm)) - return -EINVAL; + err = rtnl_valid_stats_req(nlh, netlink_strict_get_check(skb), + false, extack); + if (err) + return err; ifsm = nlmsg_data(nlh); if (ifsm->ifindex > 0) @@ -4965,27 +5049,11 @@ static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->seq = net->dev_base_seq; - if (nlmsg_len(cb->nlh) < sizeof(*ifsm)) { - NL_SET_ERR_MSG(extack, "Invalid header for stats dump"); - return -EINVAL; - } + err = rtnl_valid_stats_req(cb->nlh, cb->strict_check, true, extack); + if (err) + return err; ifsm = nlmsg_data(cb->nlh); - - /* only requests using strict checks can pass data to influence - * the dump. The legacy exception is filter_mask. - */ - if (cb->strict_check) { - if (ifsm->pad1 || ifsm->pad2 || ifsm->ifindex) { - NL_SET_ERR_MSG(extack, "Invalid values in header for stats dump request"); - return -EINVAL; - } - if (nlmsg_attrlen(cb->nlh, sizeof(*ifsm))) { - NL_SET_ERR_MSG(extack, "Invalid attributes after stats header"); - return -EINVAL; - } - } - filter_mask = ifsm->filter_mask; if (!filter_mask) { NL_SET_ERR_MSG(extack, "Filter mask must be set for stats dump"); diff --git a/net/core/skmsg.c b/net/core/skmsg.c index d6d5c20d7044..e76ed8df9f13 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -78,11 +78,9 @@ int sk_msg_clone(struct sock *sk, struct sk_msg *dst, struct sk_msg *src, { int i = src->sg.start; struct scatterlist *sge = sk_msg_elem(src, i); + struct scatterlist *sgd = NULL; u32 sge_len, sge_off; - if (sk_msg_full(dst)) - return -ENOSPC; - while (off) { if (sge->length > off) break; @@ -94,16 +92,27 @@ int sk_msg_clone(struct sock *sk, struct sk_msg *dst, struct sk_msg *src, } while (len) { - if (sk_msg_full(dst)) - return -ENOSPC; - sge_len = sge->length - off; - sge_off = sge->offset + off; if (sge_len > len) sge_len = len; + + if (dst->sg.end) + sgd = sk_msg_elem(dst, dst->sg.end - 1); + + if (sgd && + (sg_page(sge) == sg_page(sgd)) && + (sg_virt(sge) + off == sg_virt(sgd) + sgd->length)) { + sgd->length += sge_len; + dst->sg.size += sge_len; + } else if (!sk_msg_full(dst)) { + sge_off = sge->offset + off; + sk_msg_page_add(dst, sg_page(sge), sge_len, sge_off); + } else { + return -ENOSPC; + } + off = 0; len -= sge_len; - sk_msg_page_add(dst, sg_page(sge), sge_len, sge_off); sk_mem_charge(sk, sge_len); sk_msg_iter_var_next(i); if (i == src->sg.end && len) diff --git a/net/core/sock.c b/net/core/sock.c index 6aa2e7e0b4fb..900e8a9435f5 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -520,14 +520,11 @@ struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie) } EXPORT_SYMBOL(sk_dst_check); -static int sock_setbindtodevice(struct sock *sk, char __user *optval, - int optlen) +static int sock_setbindtodevice_locked(struct sock *sk, int ifindex) { int ret = -ENOPROTOOPT; #ifdef CONFIG_NETDEVICES struct net *net = sock_net(sk); - char devname[IFNAMSIZ]; - int index; /* Sorry... */ ret = -EPERM; @@ -535,6 +532,32 @@ static int sock_setbindtodevice(struct sock *sk, char __user *optval, goto out; ret = -EINVAL; + if (ifindex < 0) + goto out; + + sk->sk_bound_dev_if = ifindex; + if (sk->sk_prot->rehash) + sk->sk_prot->rehash(sk); + sk_dst_reset(sk); + + ret = 0; + +out: +#endif + + return ret; +} + +static int sock_setbindtodevice(struct sock *sk, char __user *optval, + int optlen) +{ + int ret = -ENOPROTOOPT; +#ifdef CONFIG_NETDEVICES + struct net *net = sock_net(sk); + char devname[IFNAMSIZ]; + int index; + + ret = -EINVAL; if (optlen < 0) goto out; @@ -566,14 +589,9 @@ static int sock_setbindtodevice(struct sock *sk, char __user *optval, } lock_sock(sk); - sk->sk_bound_dev_if = index; - if (sk->sk_prot->rehash) - sk->sk_prot->rehash(sk); - sk_dst_reset(sk); + ret = sock_setbindtodevice_locked(sk, index); release_sock(sk); - ret = 0; - out: #endif @@ -1055,6 +1073,10 @@ set_rcvbuf: } break; + case SO_BINDTOIFINDEX: + ret = sock_setbindtodevice_locked(sk, val); + break; + default: ret = -ENOPROTOOPT; break; @@ -1399,6 +1421,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, SOF_TXTIME_REPORT_ERRORS : 0; break; + case SO_BINDTOIFINDEX: + v.val = sk->sk_bound_dev_if; + break; + default: /* We implement the SO_SNDLOWAT etc to not be settable * (1003.1g 7). @@ -1726,7 +1752,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) newsk->sk_err_soft = 0; newsk->sk_priority = 0; newsk->sk_incoming_cpu = raw_smp_processor_id(); - atomic64_set(&newsk->sk_cookie, 0); if (likely(newsk->sk_net_refcnt)) sock_inuse_add(sock_net(newsk), 1); diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index f78fe58eafc8..6cd3737593a6 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -282,7 +282,7 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct nlattr *att (nhs = dn_fib_count_nhs(attrs[RTA_MULTIPATH])) == 0) goto err_inval; - fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct dn_fib_nh), GFP_KERNEL); + fi = kzalloc(struct_size(fi, fib_nh, nhs), GFP_KERNEL); err = -ENOBUFS; if (fi == NULL) goto failure; diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 026a05774bf7..1f4972dab9f2 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -103,7 +103,8 @@ static inline void dsa_legacy_unregister(void) { } int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, - u16 flags); + u16 flags, + struct netlink_ext_ack *extack); int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid); diff --git a/net/dsa/master.c b/net/dsa/master.c index 71bb15f491c8..79e97d2f2d9b 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -126,6 +126,17 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset, } } +static int dsa_master_get_phys_port_name(struct net_device *dev, + char *name, size_t len) +{ + struct dsa_port *cpu_dp = dev->dsa_ptr; + + if (snprintf(name, len, "p%d", cpu_dp->index) >= len) + return -EINVAL; + + return 0; +} + static int dsa_master_ethtool_setup(struct net_device *dev) { struct dsa_port *cpu_dp = dev->dsa_ptr; @@ -158,6 +169,38 @@ static void dsa_master_ethtool_teardown(struct net_device *dev) cpu_dp->orig_ethtool_ops = NULL; } +static int dsa_master_ndo_setup(struct net_device *dev) +{ + struct dsa_port *cpu_dp = dev->dsa_ptr; + struct dsa_switch *ds = cpu_dp->ds; + struct net_device_ops *ops; + + if (dev->netdev_ops->ndo_get_phys_port_name) + return 0; + + ops = devm_kzalloc(ds->dev, sizeof(*ops), GFP_KERNEL); + if (!ops) + return -ENOMEM; + + cpu_dp->orig_ndo_ops = dev->netdev_ops; + if (cpu_dp->orig_ndo_ops) + memcpy(ops, cpu_dp->orig_ndo_ops, sizeof(*ops)); + + ops->ndo_get_phys_port_name = dsa_master_get_phys_port_name; + + dev->netdev_ops = ops; + + return 0; +} + +static void dsa_master_ndo_teardown(struct net_device *dev) +{ + struct dsa_port *cpu_dp = dev->dsa_ptr; + + dev->netdev_ops = cpu_dp->orig_ndo_ops; + cpu_dp->orig_ndo_ops = NULL; +} + static ssize_t tagging_show(struct device *d, struct device_attribute *attr, char *buf) { @@ -223,16 +266,27 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) if (ret) return ret; + ret = dsa_master_ndo_setup(dev); + if (ret) + goto out_err_ethtool_teardown; + ret = sysfs_create_group(&dev->dev.kobj, &dsa_group); if (ret) - dsa_master_ethtool_teardown(dev); + goto out_err_ndo_teardown; + + return ret; +out_err_ndo_teardown: + dsa_master_ndo_teardown(dev); +out_err_ethtool_teardown: + dsa_master_ethtool_teardown(dev); return ret; } void dsa_master_teardown(struct net_device *dev) { sysfs_remove_group(&dev->dev.kobj, &dsa_group); + dsa_master_ndo_teardown(dev); dsa_master_ethtool_teardown(dev); dsa_master_reset_mtu(dev); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index a3fcc1d01615..91de3a663226 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1009,7 +1009,8 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = { int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, - u16 flags) + u16 flags, + struct netlink_ext_ack *extack) { struct dsa_port *dp = dsa_slave_to_port(dev); @@ -1450,7 +1451,7 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work) } fdb_info->offloaded = true; call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, dev, - &fdb_info->info); + &fdb_info->info, NULL); break; case SWITCHDEV_FDB_DEL_TO_DEVICE: diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index e258a00b4a3d..cd027639df2f 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -2063,13 +2063,49 @@ static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = { [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN] = { .len = sizeof(int) }, }; +static int inet_netconf_valid_get_req(struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + int i, err; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) { + NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request"); + return -EINVAL; + } + + if (!netlink_strict_get_check(skb)) + return nlmsg_parse(nlh, sizeof(struct netconfmsg), tb, + NETCONFA_MAX, devconf_ipv4_policy, extack); + + err = nlmsg_parse_strict(nlh, sizeof(struct netconfmsg), tb, + NETCONFA_MAX, devconf_ipv4_policy, extack); + if (err) + return err; + + for (i = 0; i <= NETCONFA_MAX; i++) { + if (!tb[i]) + continue; + + switch (i) { + case NETCONFA_IFINDEX: + break; + default: + NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request"); + return -EINVAL; + } + } + + return 0; +} + static int inet_netconf_get_devconf(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(in_skb->sk); struct nlattr *tb[NETCONFA_MAX+1]; - struct netconfmsg *ncm; struct sk_buff *skb; struct ipv4_devconf *devconf; struct in_device *in_dev; @@ -2077,9 +2113,8 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb, int ifindex; int err; - err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX, - devconf_ipv4_policy, extack); - if (err < 0) + err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack); + if (err) goto errout; err = -EINVAL; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index ddbf8c9a1abb..fb99002c3d4e 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2467,6 +2467,61 @@ errout: rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE_R, -ENOBUFS); } +static int ipmr_rtm_valid_getroute_req(struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + struct rtmsg *rtm; + int i, err; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) { + NL_SET_ERR_MSG(extack, "ipv4: Invalid header for multicast route get request"); + return -EINVAL; + } + + if (!netlink_strict_get_check(skb)) + return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv4_policy, extack); + + rtm = nlmsg_data(nlh); + if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) || + (rtm->rtm_dst_len && rtm->rtm_dst_len != 32) || + rtm->rtm_tos || rtm->rtm_table || rtm->rtm_protocol || + rtm->rtm_scope || rtm->rtm_type || rtm->rtm_flags) { + NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for multicast route get request"); + return -EINVAL; + } + + err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv4_policy, extack); + if (err) + return err; + + if ((tb[RTA_SRC] && !rtm->rtm_src_len) || + (tb[RTA_DST] && !rtm->rtm_dst_len)) { + NL_SET_ERR_MSG(extack, "ipv4: rtm_src_len and rtm_dst_len must be 32 for IPv4"); + return -EINVAL; + } + + for (i = 0; i <= RTA_MAX; i++) { + if (!tb[i]) + continue; + + switch (i) { + case RTA_SRC: + case RTA_DST: + case RTA_TABLE: + break; + default: + NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in multicast route get request"); + return -EINVAL; + } + } + + return 0; +} + static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -2475,18 +2530,14 @@ static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct sk_buff *skb = NULL; struct mfc_cache *cache; struct mr_table *mrt; - struct rtmsg *rtm; __be32 src, grp; u32 tableid; int err; - err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, - rtm_ipv4_policy, extack); + err = ipmr_rtm_valid_getroute_req(in_skb, nlh, tb, extack); if (err < 0) goto errout; - rtm = nlmsg_data(nlh); - src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0; grp = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0; tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ce92f73cf104..99be68b15da0 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2763,6 +2763,75 @@ static struct sk_buff *inet_rtm_getroute_build_skb(__be32 src, __be32 dst, return skb; } +static int inet_rtm_valid_getroute_req(struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + struct rtmsg *rtm; + int i, err; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) { + NL_SET_ERR_MSG(extack, + "ipv4: Invalid header for route get request"); + return -EINVAL; + } + + if (!netlink_strict_get_check(skb)) + return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv4_policy, extack); + + rtm = nlmsg_data(nlh); + if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) || + (rtm->rtm_dst_len && rtm->rtm_dst_len != 32) || + rtm->rtm_table || rtm->rtm_protocol || + rtm->rtm_scope || rtm->rtm_type) { + NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for route get request"); + return -EINVAL; + } + + if (rtm->rtm_flags & ~(RTM_F_NOTIFY | + RTM_F_LOOKUP_TABLE | + RTM_F_FIB_MATCH)) { + NL_SET_ERR_MSG(extack, "ipv4: Unsupported rtm_flags for route get request"); + return -EINVAL; + } + + err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv4_policy, extack); + if (err) + return err; + + if ((tb[RTA_SRC] && !rtm->rtm_src_len) || + (tb[RTA_DST] && !rtm->rtm_dst_len)) { + NL_SET_ERR_MSG(extack, "ipv4: rtm_src_len and rtm_dst_len must be 32 for IPv4"); + return -EINVAL; + } + + for (i = 0; i <= RTA_MAX; i++) { + if (!tb[i]) + continue; + + switch (i) { + case RTA_IIF: + case RTA_OIF: + case RTA_SRC: + case RTA_DST: + case RTA_IP_PROTO: + case RTA_SPORT: + case RTA_DPORT: + case RTA_MARK: + case RTA_UID: + break; + default: + NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in route get request"); + return -EINVAL; + } + } + + return 0; +} + static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -2783,8 +2852,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err; int mark; - err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy, - extack); + err = inet_rtm_valid_getroute_req(in_skb, nlh, tb, extack); if (err < 0) return err; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2079145a3b7c..5f099c9d04e5 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2572,14 +2572,16 @@ int tcp_disconnect(struct sock *sk, int flags) sk->sk_shutdown = 0; sock_reset_flag(sk, SOCK_DONE); tp->srtt_us = 0; + tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); tp->rcv_rtt_last_tsecr = 0; tp->write_seq += tp->max_window + 2; if (tp->write_seq == 0) tp->write_seq = 1; icsk->icsk_backoff = 0; - tp->snd_cwnd = 2; icsk->icsk_probes_out = 0; + icsk->icsk_rto = TCP_TIMEOUT_INIT; tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; + tp->snd_cwnd = TCP_INIT_CWND; tp->snd_cwnd_cnt = 0; tp->window_clamp = 0; tp->delivered_ce = 0; @@ -2603,6 +2605,23 @@ int tcp_disconnect(struct sock *sk, int flags) tp->duplicate_sack[0].end_seq = 0; tp->dsack_dups = 0; tp->reord_seen = 0; + tp->retrans_out = 0; + tp->sacked_out = 0; + tp->tlp_high_seq = 0; + tp->last_oow_ack_time = 0; + /* There's a bubble in the pipe until at least the first ACK. */ + tp->app_limited = ~0U; + tp->rack.mstamp = 0; + tp->rack.advanced = 0; + tp->rack.reo_wnd_steps = 1; + tp->rack.last_delivered = 0; + tp->rack.reo_wnd_persist = 0; + tp->rack.dsack_seen = 0; + tp->syn_data_acked = 0; + tp->rx_opt.saw_tstamp = 0; + tp->rx_opt.dsack = 0; + tp->rx_opt.num_sacks = 0; + /* Clean up fastopen related fields */ tcp_free_fastopen_req(tp); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 12affb7864d9..182595e2d40f 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -479,43 +479,16 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, tcp_init_wl(newtp, treq->rcv_isn); - newtp->srtt_us = 0; - newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); minmax_reset(&newtp->rtt_min, tcp_jiffies32, ~0U); - newicsk->icsk_rto = TCP_TIMEOUT_INIT; newicsk->icsk_ack.lrcvtime = tcp_jiffies32; - newtp->packets_out = 0; - newtp->retrans_out = 0; - newtp->sacked_out = 0; - newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH; - newtp->tlp_high_seq = 0; newtp->lsndtime = tcp_jiffies32; newsk->sk_txhash = treq->txhash; - newtp->last_oow_ack_time = 0; newtp->total_retrans = req->num_retrans; - /* So many TCP implementations out there (incorrectly) count the - * initial SYN frame in their delayed-ACK and congestion control - * algorithms that we must have the following bandaid to talk - * efficiently to them. -DaveM - */ - newtp->snd_cwnd = TCP_INIT_CWND; - newtp->snd_cwnd_cnt = 0; - - /* There's a bubble in the pipe until at least the first ACK. */ - newtp->app_limited = ~0U; - tcp_init_xmit_timers(newsk); newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1; - newtp->rx_opt.saw_tstamp = 0; - - newtp->rx_opt.dsack = 0; - newtp->rx_opt.num_sacks = 0; - - newtp->urg_data = 0; - if (sock_flag(newsk, SOCK_KEEPOPEN)) inet_csk_reset_keepalive_timer(newsk, keepalive_time_when(newtp)); @@ -556,13 +529,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, tcp_ecn_openreq_child(newtp, req); newtp->fastopen_req = NULL; newtp->fastopen_rsk = NULL; - newtp->syn_data_acked = 0; - newtp->rack.mstamp = 0; - newtp->rack.advanced = 0; - newtp->rack.reo_wnd_steps = 1; - newtp->rack.last_delivered = 0; - newtp->rack.reo_wnd_persist = 0; - newtp->rack.dsack_seen = 0; __TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 730bc44dbad9..6527f61f59ff 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -980,7 +980,6 @@ static void tcp_update_skb_after_send(struct sock *sk, struct sk_buff *skb, { struct tcp_sock *tp = tcp_sk(sk); - skb->skb_mstamp_ns = tp->tcp_wstamp_ns; if (sk->sk_pacing_status != SK_PACING_NONE) { unsigned long rate = sk->sk_pacing_rate; @@ -1028,7 +1027,9 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, BUG_ON(!skb || !tcp_skb_pcount(skb)); tp = tcp_sk(sk); - + prior_wstamp = tp->tcp_wstamp_ns; + tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache); + skb->skb_mstamp_ns = tp->tcp_wstamp_ns; if (clone_it) { TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq - tp->snd_una; @@ -1045,11 +1046,6 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, return -ENOBUFS; } - prior_wstamp = tp->tcp_wstamp_ns; - tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache); - - skb->skb_mstamp_ns = tp->tcp_wstamp_ns; - inet = inet_sk(sk); tcb = TCP_SKB_CB(skb); memset(&opts, 0, sizeof(opts)); @@ -2937,12 +2933,16 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); } + /* To avoid taking spuriously low RTT samples based on a timestamp + * for a transmit that never happened, always mark EVER_RETRANS + */ + TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS; + if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RETRANS_CB_FLAG)) tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RETRANS_CB, TCP_SKB_CB(skb)->seq, segs, err); if (likely(!err)) { - TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS; trace_tcp_retransmit_skb(sk, skb); } else if (err != -EBUSY) { NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL, segs); @@ -2963,13 +2963,12 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) #endif TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS; tp->retrans_out += tcp_skb_pcount(skb); - - /* Save stamp of the first retransmit. */ - if (!tp->retrans_stamp) - tp->retrans_stamp = tcp_skb_timestamp(skb); - } + /* Save stamp of the first (attempted) retransmit. */ + if (!tp->retrans_stamp) + tp->retrans_stamp = tcp_skb_timestamp(skb); + if (tp->undo_retrans < 0) tp->undo_retrans = 0; tp->undo_retrans += tcp_skb_pcount(skb); @@ -3750,7 +3749,7 @@ void tcp_send_probe0(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); - unsigned long probe_max; + unsigned long timeout; int err; err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE); @@ -3762,26 +3761,18 @@ void tcp_send_probe0(struct sock *sk) return; } + icsk->icsk_probes_out++; if (err <= 0) { if (icsk->icsk_backoff < net->ipv4.sysctl_tcp_retries2) icsk->icsk_backoff++; - icsk->icsk_probes_out++; - probe_max = TCP_RTO_MAX; + timeout = tcp_probe0_when(sk, TCP_RTO_MAX); } else { /* If packet was not sent due to local congestion, - * do not backoff and do not remember icsk_probes_out. - * Let local senders to fight for local resources. - * - * Use accumulated backoff yet. + * Let senders fight for local resources conservatively. */ - if (!icsk->icsk_probes_out) - icsk->icsk_probes_out = 1; - probe_max = TCP_RESOURCE_PROBE_INTERVAL; - } - tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, - tcp_probe0_when(sk, probe_max), - TCP_RTO_MAX, - NULL); + timeout = TCP_RESOURCE_PROBE_INTERVAL; + } + tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, timeout, TCP_RTO_MAX, NULL); } int tcp_rtx_synack(const struct sock *sk, struct request_sock *req) diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 71a29e9c0620..d7399a89469d 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -22,28 +22,14 @@ #include <linux/gfp.h> #include <net/tcp.h> -static u32 tcp_retransmit_stamp(const struct sock *sk) -{ - u32 start_ts = tcp_sk(sk)->retrans_stamp; - - if (unlikely(!start_ts)) { - struct sk_buff *head = tcp_rtx_queue_head(sk); - - if (!head) - return 0; - start_ts = tcp_skb_timestamp(head); - } - return start_ts; -} - static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); u32 elapsed, start_ts; s32 remaining; - start_ts = tcp_retransmit_stamp(sk); - if (!icsk->icsk_user_timeout || !start_ts) + start_ts = tcp_sk(sk)->retrans_stamp; + if (!icsk->icsk_user_timeout) return icsk->icsk_rto; elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts; remaining = icsk->icsk_user_timeout - elapsed; @@ -173,7 +159,20 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); } - +static unsigned int tcp_model_timeout(struct sock *sk, + unsigned int boundary, + unsigned int rto_base) +{ + unsigned int linear_backoff_thresh, timeout; + + linear_backoff_thresh = ilog2(TCP_RTO_MAX / rto_base); + if (boundary <= linear_backoff_thresh) + timeout = ((2 << boundary) - 1) * rto_base; + else + timeout = ((2 << linear_backoff_thresh) - 1) * rto_base + + (boundary - linear_backoff_thresh) * TCP_RTO_MAX; + return jiffies_to_msecs(timeout); +} /** * retransmits_timed_out() - returns true if this connection has timed out * @sk: The current socket @@ -191,26 +190,15 @@ static bool retransmits_timed_out(struct sock *sk, unsigned int boundary, unsigned int timeout) { - const unsigned int rto_base = TCP_RTO_MIN; - unsigned int linear_backoff_thresh, start_ts; + unsigned int start_ts; if (!inet_csk(sk)->icsk_retransmits) return false; - start_ts = tcp_retransmit_stamp(sk); - if (!start_ts) - return false; - - if (likely(timeout == 0)) { - linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base); + start_ts = tcp_sk(sk)->retrans_stamp; + if (likely(timeout == 0)) + timeout = tcp_model_timeout(sk, boundary, TCP_RTO_MIN); - if (boundary <= linear_backoff_thresh) - timeout = ((2 << boundary) - 1) * rto_base; - else - timeout = ((2 << linear_backoff_thresh) - 1) * rto_base + - (boundary - linear_backoff_thresh) * TCP_RTO_MAX; - timeout = jiffies_to_msecs(timeout); - } return (s32)(tcp_time_stamp(tcp_sk(sk)) - start_ts - timeout) >= 0; } @@ -345,7 +333,6 @@ static void tcp_probe_timer(struct sock *sk) struct sk_buff *skb = tcp_send_head(sk); struct tcp_sock *tp = tcp_sk(sk); int max_probes; - u32 start_ts; if (tp->packets_out || !skb) { icsk->icsk_probes_out = 0; @@ -360,12 +347,13 @@ static void tcp_probe_timer(struct sock *sk) * corresponding system limit. We also implement similar policy when * we use RTO to probe window in tcp_retransmit_timer(). */ - start_ts = tcp_skb_timestamp(skb); - if (!start_ts) - skb->skb_mstamp_ns = tp->tcp_clock_cache; - else if (icsk->icsk_user_timeout && - (s32)(tcp_time_stamp(tp) - start_ts) > icsk->icsk_user_timeout) - goto abort; + if (icsk->icsk_user_timeout) { + u32 elapsed = tcp_model_timeout(sk, icsk->icsk_probes_out, + tcp_probe0_base(sk)); + + if (elapsed >= icsk->icsk_user_timeout) + goto abort; + } max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2; if (sock_flag(sk, SOCK_DEAD)) { @@ -395,6 +383,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); int max_retries = icsk->icsk_syn_retries ? : sock_net(sk)->ipv4.sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */ + struct tcp_sock *tp = tcp_sk(sk); struct request_sock *req; req = tcp_sk(sk)->fastopen_rsk; @@ -412,6 +401,8 @@ static void tcp_fastopen_synack_timer(struct sock *sk) inet_rtx_syn_ack(sk, req); req->num_timeout++; icsk->icsk_retransmits++; + if (!tp->retrans_stamp) + tp->retrans_stamp = tcp_time_stamp(tp); inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX); } @@ -443,10 +434,8 @@ void tcp_retransmit_timer(struct sock *sk) */ return; } - if (!tp->packets_out) - goto out; - - WARN_ON(tcp_rtx_queue_empty(sk)); + if (!tp->packets_out || WARN_ON_ONCE(tcp_rtx_queue_empty(sk))) + return; tp->tlp_high_seq = 0; @@ -511,14 +500,13 @@ void tcp_retransmit_timer(struct sock *sk) tcp_enter_loss(sk); + icsk->icsk_retransmits++; if (tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1) > 0) { /* Retransmission failed because of local congestion, - * do not backoff. + * Let senders fight for local resources conservatively. */ - if (!icsk->icsk_retransmits) - icsk->icsk_retransmits = 1; inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - min(icsk->icsk_rto, TCP_RESOURCE_PROBE_INTERVAL), + TCP_RESOURCE_PROBE_INTERVAL, TCP_RTO_MAX); goto out; } @@ -539,7 +527,6 @@ void tcp_retransmit_timer(struct sock *sk) * the 120 second clamps though! */ icsk->icsk_backoff++; - icsk->icsk_retransmits++; out_reset_timer: /* If stream is thin, use linear timeouts. Since 'icsk_backoff' is diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c index be8b5b2157d8..e93cc0379201 100644 --- a/net/ipv4/udp_tunnel.c +++ b/net/ipv4/udp_tunnel.c @@ -21,18 +21,9 @@ int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg, goto error; if (cfg->bind_ifindex) { - struct net_device *dev; - - dev = dev_get_by_index(net, cfg->bind_ifindex); - if (!dev) { - err = -ENODEV; - goto error; - } - - err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE, - dev->name, strlen(dev->name) + 1); - dev_put(dev); - + err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTOIFINDEX, + (void *)&cfg->bind_ifindex, + sizeof(cfg->bind_ifindex)); if (err < 0) goto error; } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 93d5ad2b1a69..57198b3c86da 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -597,6 +597,43 @@ static const struct nla_policy devconf_ipv6_policy[NETCONFA_MAX+1] = { [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN] = { .len = sizeof(int) }, }; +static int inet6_netconf_valid_get_req(struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + int i, err; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) { + NL_SET_ERR_MSG_MOD(extack, "Invalid header for netconf get request"); + return -EINVAL; + } + + if (!netlink_strict_get_check(skb)) + return nlmsg_parse(nlh, sizeof(struct netconfmsg), tb, + NETCONFA_MAX, devconf_ipv6_policy, extack); + + err = nlmsg_parse_strict(nlh, sizeof(struct netconfmsg), tb, + NETCONFA_MAX, devconf_ipv6_policy, extack); + if (err) + return err; + + for (i = 0; i <= NETCONFA_MAX; i++) { + if (!tb[i]) + continue; + + switch (i) { + case NETCONFA_IFINDEX: + break; + default: + NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in netconf get request"); + return -EINVAL; + } + } + + return 0; +} + static int inet6_netconf_get_devconf(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) @@ -605,14 +642,12 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb, struct nlattr *tb[NETCONFA_MAX+1]; struct inet6_dev *in6_dev = NULL; struct net_device *dev = NULL; - struct netconfmsg *ncm; struct sk_buff *skb; struct ipv6_devconf *devconf; int ifindex; int err; - err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX, - devconf_ipv6_policy, extack); + err = inet6_netconf_valid_get_req(in_skb, nlh, tb, extack); if (err < 0) return err; @@ -5179,6 +5214,52 @@ static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb) return inet6_dump_addr(skb, cb, type); } +static int inet6_rtm_valid_getaddr_req(struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + struct ifaddrmsg *ifm; + int i, err; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) { + NL_SET_ERR_MSG_MOD(extack, "Invalid header for get address request"); + return -EINVAL; + } + + ifm = nlmsg_data(nlh); + if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) { + NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get address request"); + return -EINVAL; + } + + if (!netlink_strict_get_check(skb)) + return nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, + ifa_ipv6_policy, extack); + + err = nlmsg_parse_strict(nlh, sizeof(*ifm), tb, IFA_MAX, + ifa_ipv6_policy, extack); + if (err) + return err; + + for (i = 0; i <= IFA_MAX; i++) { + if (!tb[i]) + continue; + + switch (i) { + case IFA_TARGET_NETNSID: + case IFA_ADDRESS: + case IFA_LOCAL: + break; + default: + NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get address request"); + return -EINVAL; + } + } + + return 0; +} + static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -5199,8 +5280,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct sk_buff *skb; int err; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy, - extack); + err = inet6_rtm_valid_getaddr_req(in_skb, nlh, tb, extack); if (err < 0) return err; diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 0d1ee82ee55b..d43d076c98f5 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -523,6 +523,50 @@ static inline int ip6addrlbl_msgsize(void) + nla_total_size(4); /* IFAL_LABEL */ } +static int ip6addrlbl_valid_get_req(struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + struct ifaddrlblmsg *ifal; + int i, err; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifal))) { + NL_SET_ERR_MSG_MOD(extack, "Invalid header for addrlabel get request"); + return -EINVAL; + } + + if (!netlink_strict_get_check(skb)) + return nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, + ifal_policy, extack); + + ifal = nlmsg_data(nlh); + if (ifal->__ifal_reserved || ifal->ifal_flags || ifal->ifal_seq) { + NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for addrlabel get request"); + return -EINVAL; + } + + err = nlmsg_parse_strict(nlh, sizeof(*ifal), tb, IFAL_MAX, + ifal_policy, extack); + if (err) + return err; + + for (i = 0; i <= IFAL_MAX; i++) { + if (!tb[i]) + continue; + + switch (i) { + case IFAL_ADDRESS: + break; + default: + NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in addrlabel get request"); + return -EINVAL; + } + } + + return 0; +} + static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -535,8 +579,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct ip6addrlbl_entry *p; struct sk_buff *skb; - err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy, - extack); + err = ip6addrlbl_valid_get_req(in_skb, nlh, tb, extack); if (err < 0) return err; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index b1be67ca6768..c465d8a102f2 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -524,7 +524,7 @@ static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) return PACKET_REJECT; } -static int ip6erspan_rcv(struct sk_buff *skb, int gre_hdr_len, +static int ip6erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi) { struct erspan_base_hdr *ershdr; @@ -611,7 +611,7 @@ static int gre_rcv(struct sk_buff *skb) if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) || tpi.proto == htons(ETH_P_ERSPAN2))) { - if (ip6erspan_rcv(skb, hdr_len, &tpi) == PACKET_RCVD) + if (ip6erspan_rcv(skb, &tpi) == PACKET_RCVD) return 0; goto out; } diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index ad1a9ccd4b44..25430c991cea 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -32,18 +32,9 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, goto error; } if (cfg->bind_ifindex) { - struct net_device *dev; - - dev = dev_get_by_index(net, cfg->bind_ifindex); - if (!dev) { - err = -ENODEV; - goto error; - } - - err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE, - dev->name, strlen(dev->name) + 1); - dev_put(dev); - + err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTOIFINDEX, + (void *)&cfg->bind_ifindex, + sizeof(cfg->bind_ifindex)); if (err < 0) goto error; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 964491cf3672..dc066fdf7e46 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4812,6 +4812,73 @@ int rt6_dump_route(struct fib6_info *rt, void *p_arg) arg->cb->nlh->nlmsg_seq, flags); } +static int inet6_rtm_valid_getroute_req(struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + struct rtmsg *rtm; + int i, err; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) { + NL_SET_ERR_MSG_MOD(extack, + "Invalid header for get route request"); + return -EINVAL; + } + + if (!netlink_strict_get_check(skb)) + return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv6_policy, extack); + + rtm = nlmsg_data(nlh); + if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) || + (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) || + rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope || + rtm->rtm_type) { + NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request"); + return -EINVAL; + } + if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) { + NL_SET_ERR_MSG_MOD(extack, + "Invalid flags for get route request"); + return -EINVAL; + } + + err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv6_policy, extack); + if (err) + return err; + + if ((tb[RTA_SRC] && !rtm->rtm_src_len) || + (tb[RTA_DST] && !rtm->rtm_dst_len)) { + NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6"); + return -EINVAL; + } + + for (i = 0; i <= RTA_MAX; i++) { + if (!tb[i]) + continue; + + switch (i) { + case RTA_SRC: + case RTA_DST: + case RTA_IIF: + case RTA_OIF: + case RTA_MARK: + case RTA_UID: + case RTA_SPORT: + case RTA_DPORT: + case RTA_IP_PROTO: + break; + default: + NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request"); + return -EINVAL; + } + } + + return 0; +} + static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -4826,8 +4893,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct flowi6 fl6 = {}; bool fibmatch; - err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy, - extack); + err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack); if (err < 0) goto errout; diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 7d55d4c04088..2662a23c658e 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1209,21 +1209,57 @@ static const struct nla_policy devconf_mpls_policy[NETCONFA_MAX + 1] = { [NETCONFA_IFINDEX] = { .len = sizeof(int) }, }; +static int mpls_netconf_valid_get_req(struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + int i, err; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) { + NL_SET_ERR_MSG_MOD(extack, + "Invalid header for netconf get request"); + return -EINVAL; + } + + if (!netlink_strict_get_check(skb)) + return nlmsg_parse(nlh, sizeof(struct netconfmsg), tb, + NETCONFA_MAX, devconf_mpls_policy, extack); + + err = nlmsg_parse_strict(nlh, sizeof(struct netconfmsg), tb, + NETCONFA_MAX, devconf_mpls_policy, extack); + if (err) + return err; + + for (i = 0; i <= NETCONFA_MAX; i++) { + if (!tb[i]) + continue; + + switch (i) { + case NETCONFA_IFINDEX: + break; + default: + NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in netconf get request"); + return -EINVAL; + } + } + + return 0; +} + static int mpls_netconf_get_devconf(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(in_skb->sk); struct nlattr *tb[NETCONFA_MAX + 1]; - struct netconfmsg *ncm; struct net_device *dev; struct mpls_dev *mdev; struct sk_buff *skb; int ifindex; int err; - err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX, - devconf_mpls_policy, extack); + err = mpls_netconf_valid_get_req(in_skb, nlh, tb, extack); if (err < 0) goto errout; @@ -2236,6 +2272,64 @@ errout: rtnl_set_sk_err(net, RTNLGRP_MPLS_ROUTE, err); } +static int mpls_valid_getroute_req(struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + struct rtmsg *rtm; + int i, err; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) { + NL_SET_ERR_MSG_MOD(extack, + "Invalid header for get route request"); + return -EINVAL; + } + + if (!netlink_strict_get_check(skb)) + return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_mpls_policy, extack); + + rtm = nlmsg_data(nlh); + if ((rtm->rtm_dst_len && rtm->rtm_dst_len != 20) || + rtm->rtm_src_len || rtm->rtm_tos || rtm->rtm_table || + rtm->rtm_protocol || rtm->rtm_scope || rtm->rtm_type) { + NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request"); + return -EINVAL; + } + if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) { + NL_SET_ERR_MSG_MOD(extack, + "Invalid flags for get route request"); + return -EINVAL; + } + + err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_mpls_policy, extack); + if (err) + return err; + + if ((tb[RTA_DST] || tb[RTA_NEWDST]) && !rtm->rtm_dst_len) { + NL_SET_ERR_MSG_MOD(extack, "rtm_dst_len must be 20 for MPLS"); + return -EINVAL; + } + + for (i = 0; i <= RTA_MAX; i++) { + if (!tb[i]) + continue; + + switch (i) { + case RTA_DST: + case RTA_NEWDST: + break; + default: + NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request"); + return -EINVAL; + } + } + + return 0; +} + static int mpls_getroute(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, struct netlink_ext_ack *extack) { @@ -2255,8 +2349,7 @@ static int mpls_getroute(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, u8 n_labels; int err; - err = nlmsg_parse(in_nlh, sizeof(*rtm), tb, RTA_MAX, - rtm_mpls_policy, extack); + err = mpls_valid_getroute_req(in_skb, in_nlh, tb, extack); if (err < 0) goto errout; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 3c023d6120f6..8fa35df94c07 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1371,6 +1371,14 @@ int netlink_has_listeners(struct sock *sk, unsigned int group) } EXPORT_SYMBOL_GPL(netlink_has_listeners); +bool netlink_strict_get_check(struct sk_buff *skb) +{ + const struct netlink_sock *nlk = nlk_sk(NETLINK_CB(skb).sk); + + return nlk->flags & NETLINK_F_STRICT_CHK; +} +EXPORT_SYMBOL_GPL(netlink_strict_get_check); + static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb) { struct netlink_sock *nlk = nlk_sk(sk); diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c index c038e021a591..43849d752a1e 100644 --- a/net/openvswitch/meter.c +++ b/net/openvswitch/meter.c @@ -206,8 +206,7 @@ static struct dp_meter *dp_meter_create(struct nlattr **a) return ERR_PTR(-EINVAL); /* Allocate and set up the meter before locking anything. */ - meter = kzalloc(n_bands * sizeof(struct dp_meter_band) + - sizeof(*meter), GFP_KERNEL); + meter = kzalloc(struct_size(meter, bands, n_bands), GFP_KERNEL); if (!meter) return ERR_PTR(-ENOMEM); diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 6a5dce8baf19..4a57fec6f306 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -18,6 +18,7 @@ #include <linux/rtnetlink.h> #include <linux/skbuff.h> #include <linux/idr.h> +#include <linux/percpu.h> #include <net/netlink.h> #include <net/act_api.h> #include <net/pkt_cls.h> @@ -35,6 +36,7 @@ struct basic_filter { struct tcf_result res; struct tcf_proto *tp; struct list_head link; + struct tc_basic_pcnt __percpu *pf; struct rcu_work rwork; }; @@ -46,8 +48,10 @@ static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct basic_filter *f; list_for_each_entry_rcu(f, &head->flist, link) { + __this_cpu_inc(f->pf->rcnt); if (!tcf_em_tree_match(skb, &f->ematches, NULL)) continue; + __this_cpu_inc(f->pf->rhit); *res = f->res; r = tcf_exts_exec(skb, &f->exts, res); if (r < 0) @@ -89,6 +93,7 @@ static void __basic_delete_filter(struct basic_filter *f) tcf_exts_destroy(&f->exts); tcf_em_tree_destroy(&f->ematches); tcf_exts_put_net(&f->exts); + free_percpu(f->pf); kfree(f); } @@ -208,6 +213,11 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, if (err) goto errout; fnew->handle = handle; + fnew->pf = alloc_percpu(struct tc_basic_pcnt); + if (!fnew->pf) { + err = -ENOMEM; + goto errout; + } err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr, extack); @@ -231,6 +241,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, return 0; errout: + free_percpu(fnew->pf); tcf_exts_destroy(&fnew->exts); kfree(fnew); return err; @@ -265,8 +276,10 @@ static void basic_bind_class(void *fh, u32 classid, unsigned long cl) static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t) { + struct tc_basic_pcnt gpf = {}; struct basic_filter *f = fh; struct nlattr *nest; + int cpu; if (f == NULL) return skb->len; @@ -281,6 +294,18 @@ static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh, nla_put_u32(skb, TCA_BASIC_CLASSID, f->res.classid)) goto nla_put_failure; + for_each_possible_cpu(cpu) { + struct tc_basic_pcnt *pf = per_cpu_ptr(f->pf, cpu); + + gpf.rcnt += pf->rcnt; + gpf.rhit += pf->rhit; + } + + if (nla_put_64bit(skb, TCA_BASIC_PCNT, + sizeof(struct tc_basic_pcnt), + &gpf, TCA_BASIC_PAD)) + goto nla_put_failure; + if (tcf_exts_dump(skb, &f->exts) < 0 || tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0) goto nla_put_failure; diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 0e408ee9dcec..a1b803fd372e 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -12,6 +12,7 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> +#include <linux/percpu.h> #include <net/sch_generic.h> #include <net/pkt_cls.h> @@ -22,6 +23,7 @@ struct cls_mall_head { u32 handle; u32 flags; unsigned int in_hw_count; + struct tc_matchall_pcnt __percpu *pf; struct rcu_work rwork; }; @@ -34,6 +36,7 @@ static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp, return -1; *res = head->res; + __this_cpu_inc(head->pf->rhit); return tcf_exts_exec(skb, &head->exts, res); } @@ -46,6 +49,7 @@ static void __mall_destroy(struct cls_mall_head *head) { tcf_exts_destroy(&head->exts); tcf_exts_put_net(&head->exts); + free_percpu(head->pf); kfree(head); } @@ -192,6 +196,11 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, handle = 1; new->handle = handle; new->flags = flags; + new->pf = alloc_percpu(struct tc_matchall_pcnt); + if (!new->pf) { + err = -ENOMEM; + goto err_alloc_percpu; + } err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], ovr, extack); @@ -214,6 +223,8 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, err_replace_hw_filter: err_set_parms: + free_percpu(new->pf); +err_alloc_percpu: tcf_exts_destroy(&new->exts); err_exts_init: kfree(new); @@ -270,8 +281,10 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t) { + struct tc_matchall_pcnt gpf = {}; struct cls_mall_head *head = fh; struct nlattr *nest; + int cpu; if (!head) return skb->len; @@ -289,6 +302,17 @@ static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh, if (head->flags && nla_put_u32(skb, TCA_MATCHALL_FLAGS, head->flags)) goto nla_put_failure; + for_each_possible_cpu(cpu) { + struct tc_matchall_pcnt *pf = per_cpu_ptr(head->pf, cpu); + + gpf.rhit += pf->rhit; + } + + if (nla_put_64bit(skb, TCA_MATCHALL_PCNT, + sizeof(struct tc_matchall_pcnt), + &gpf, TCA_MATCHALL_PAD)) + goto nla_put_failure; + if (tcf_exts_dump(skb, &head->exts)) goto nla_put_failure; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 7e4d1ccf4c87..03e26e8d0ec9 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -758,8 +758,7 @@ static u32 qdisc_alloc_handle(struct net_device *dev) return 0; } -void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n, - unsigned int len) +void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len) { bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED; const struct Qdisc_class_ops *cops; diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 5df9d1138ac9..cd78253de31d 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -556,10 +556,11 @@ EXPORT_SYMBOL_GPL(unregister_switchdev_notifier); * Call all network notifier blocks. */ int call_switchdev_notifiers(unsigned long val, struct net_device *dev, - struct switchdev_notifier_info *info) + struct switchdev_notifier_info *info, + struct netlink_ext_ack *extack) { info->dev = dev; - info->extack = NULL; + info->extack = extack; return atomic_notifier_call_chain(&switchdev_notif_chain, val, info); } EXPORT_SYMBOL_GPL(call_switchdev_notifiers); diff --git a/net/tipc/trace.c b/net/tipc/trace.c index 964823841efe..265f6a26aa3d 100644 --- a/net/tipc/trace.c +++ b/net/tipc/trace.c @@ -111,7 +111,7 @@ int tipc_skb_dump(struct sk_buff *skb, bool more, char *buf) break; default: break; - }; + } i += scnprintf(buf + i, sz - i, " | %u", msg_src_droppable(hdr)); i += scnprintf(buf + i, sz - i, " %u", @@ -122,7 +122,7 @@ int tipc_skb_dump(struct sk_buff *skb, bool more, char *buf) default: /* need more? */ break; - }; + } i += scnprintf(buf + i, sz - i, "\n"); if (!more) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 11cdc8f7db63..86b9527c4826 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -124,6 +124,7 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err) { struct aead_request *aead_req = (struct aead_request *)req; struct scatterlist *sgout = aead_req->dst; + struct scatterlist *sgin = aead_req->src; struct tls_sw_context_rx *ctx; struct tls_context *tls_ctx; struct scatterlist *sg; @@ -134,12 +135,16 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err) skb = (struct sk_buff *)req->data; tls_ctx = tls_get_ctx(skb->sk); ctx = tls_sw_ctx_rx(tls_ctx); - pending = atomic_dec_return(&ctx->decrypt_pending); /* Propagate if there was an err */ if (err) { ctx->async_wait.err = err; tls_err_abort(skb->sk, err); + } else { + struct strp_msg *rxm = strp_msg(skb); + + rxm->offset += tls_ctx->rx.prepend_size; + rxm->full_len -= tls_ctx->rx.overhead_size; } /* After using skb->sk to propagate sk through crypto async callback @@ -147,18 +152,21 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err) */ skb->sk = NULL; - /* Release the skb, pages and memory allocated for crypto req */ - kfree_skb(skb); - /* Skip the first S/G entry as it points to AAD */ - for_each_sg(sg_next(sgout), sg, UINT_MAX, pages) { - if (!sg) - break; - put_page(sg_page(sg)); + /* Free the destination pages if skb was not decrypted inplace */ + if (sgout != sgin) { + /* Skip the first S/G entry as it points to AAD */ + for_each_sg(sg_next(sgout), sg, UINT_MAX, pages) { + if (!sg) + break; + put_page(sg_page(sg)); + } } kfree(aead_req); + pending = atomic_dec_return(&ctx->decrypt_pending); + if (!pending && READ_ONCE(ctx->async_notify)) complete(&ctx->async_wait.completion); } @@ -1020,8 +1028,8 @@ send_end: return copied ? copied : ret; } -int tls_sw_do_sendpage(struct sock *sk, struct page *page, - int offset, size_t size, int flags) +static int tls_sw_do_sendpage(struct sock *sk, struct page *page, + int offset, size_t size, int flags) { long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); struct tls_context *tls_ctx = tls_get_ctx(sk); @@ -1143,16 +1151,6 @@ sendpage_end: return copied ? copied : ret; } -int tls_sw_sendpage_locked(struct sock *sk, struct page *page, - int offset, size_t size, int flags) -{ - if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | - MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY)) - return -ENOTSUPP; - - return tls_sw_do_sendpage(sk, page, offset, size, flags); -} - int tls_sw_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags) { @@ -1281,7 +1279,7 @@ out: static int decrypt_internal(struct sock *sk, struct sk_buff *skb, struct iov_iter *out_iov, struct scatterlist *out_sg, - int *chunk, bool *zc) + int *chunk, bool *zc, bool async) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); @@ -1381,13 +1379,13 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, fallback_to_reg_recv: sgout = sgin; pages = 0; - *chunk = 0; + *chunk = data_len; *zc = false; } /* Prepare and submit AEAD request */ err = tls_do_decryption(sk, skb, sgin, sgout, iv, - data_len, aead_req, *zc); + data_len, aead_req, async); if (err == -EINPROGRESS) return err; @@ -1400,7 +1398,8 @@ fallback_to_reg_recv: } static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, - struct iov_iter *dest, int *chunk, bool *zc) + struct iov_iter *dest, int *chunk, bool *zc, + bool async) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); @@ -1413,7 +1412,7 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, return err; #endif if (!ctx->decrypted) { - err = decrypt_internal(sk, skb, dest, NULL, chunk, zc); + err = decrypt_internal(sk, skb, dest, NULL, chunk, zc, async); if (err < 0) { if (err == -EINPROGRESS) tls_advance_record_sn(sk, &tls_ctx->rx); @@ -1439,7 +1438,7 @@ int decrypt_skb(struct sock *sk, struct sk_buff *skb, bool zc = true; int chunk; - return decrypt_internal(sk, skb, NULL, sgout, &chunk, &zc); + return decrypt_internal(sk, skb, NULL, sgout, &chunk, &zc, false); } static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb, @@ -1466,6 +1465,77 @@ static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb, return true; } +/* This function traverses the rx_list in tls receive context to copies the + * decrypted data records into the buffer provided by caller zero copy is not + * true. Further, the records are removed from the rx_list if it is not a peek + * case and the record has been consumed completely. + */ +static int process_rx_list(struct tls_sw_context_rx *ctx, + struct msghdr *msg, + size_t skip, + size_t len, + bool zc, + bool is_peek) +{ + struct sk_buff *skb = skb_peek(&ctx->rx_list); + ssize_t copied = 0; + + while (skip && skb) { + struct strp_msg *rxm = strp_msg(skb); + + if (skip < rxm->full_len) + break; + + skip = skip - rxm->full_len; + skb = skb_peek_next(skb, &ctx->rx_list); + } + + while (len && skb) { + struct sk_buff *next_skb; + struct strp_msg *rxm = strp_msg(skb); + int chunk = min_t(unsigned int, rxm->full_len - skip, len); + + if (!zc || (rxm->full_len - skip) > len) { + int err = skb_copy_datagram_msg(skb, rxm->offset + skip, + msg, chunk); + if (err < 0) + return err; + } + + len = len - chunk; + copied = copied + chunk; + + /* Consume the data from record if it is non-peek case*/ + if (!is_peek) { + rxm->offset = rxm->offset + chunk; + rxm->full_len = rxm->full_len - chunk; + + /* Return if there is unconsumed data in the record */ + if (rxm->full_len - skip) + break; + } + + /* The remaining skip-bytes must lie in 1st record in rx_list. + * So from the 2nd record, 'skip' should be 0. + */ + skip = 0; + + if (msg) + msg->msg_flags |= MSG_EOR; + + next_skb = skb_peek_next(skb, &ctx->rx_list); + + if (!is_peek) { + skb_unlink(skb, &ctx->rx_list); + kfree_skb(skb); + } + + skb = next_skb; + } + + return copied; +} + int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, @@ -1476,7 +1546,8 @@ int tls_sw_recvmsg(struct sock *sk, struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); struct sk_psock *psock; - unsigned char control; + unsigned char control = 0; + ssize_t decrypted = 0; struct strp_msg *rxm; struct sk_buff *skb; ssize_t copied = 0; @@ -1484,6 +1555,7 @@ int tls_sw_recvmsg(struct sock *sk, int target, err = 0; long timeo; bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); + bool is_peek = flags & MSG_PEEK; int num_async = 0; flags |= nonblock; @@ -1494,11 +1566,28 @@ int tls_sw_recvmsg(struct sock *sk, psock = sk_psock_get(sk); lock_sock(sk); - target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); - timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + /* Process pending decrypted records. It must be non-zero-copy */ + err = process_rx_list(ctx, msg, 0, len, false, is_peek); + if (err < 0) { + tls_err_abort(sk, err); + goto end; + } else { + copied = err; + } + + len = len - copied; + if (len) { + target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + } else { + goto recv_end; + } + do { - bool zc = false; + bool retain_skb = false; bool async = false; + bool zc = false; + int to_decrypt; int chunk = 0; skb = tls_wait_data(sk, psock, flags, timeo, &err); @@ -1508,7 +1597,7 @@ int tls_sw_recvmsg(struct sock *sk, msg, len, flags); if (ret > 0) { - copied += ret; + decrypted += ret; len -= ret; continue; } @@ -1535,70 +1624,70 @@ int tls_sw_recvmsg(struct sock *sk, goto recv_end; } - if (!ctx->decrypted) { - int to_copy = rxm->full_len - tls_ctx->rx.overhead_size; + to_decrypt = rxm->full_len - tls_ctx->rx.overhead_size; - if (!is_kvec && to_copy <= len && - likely(!(flags & MSG_PEEK))) - zc = true; + if (to_decrypt <= len && !is_kvec && !is_peek) + zc = true; - err = decrypt_skb_update(sk, skb, &msg->msg_iter, - &chunk, &zc); - if (err < 0 && err != -EINPROGRESS) { - tls_err_abort(sk, EBADMSG); - goto recv_end; - } - - if (err == -EINPROGRESS) { - async = true; - num_async++; - goto pick_next_record; - } - - ctx->decrypted = true; + err = decrypt_skb_update(sk, skb, &msg->msg_iter, + &chunk, &zc, ctx->async_capable); + if (err < 0 && err != -EINPROGRESS) { + tls_err_abort(sk, EBADMSG); + goto recv_end; } - if (!zc) { - chunk = min_t(unsigned int, rxm->full_len, len); + if (err == -EINPROGRESS) { + async = true; + num_async++; + goto pick_next_record; + } else { + if (!zc) { + if (rxm->full_len > len) { + retain_skb = true; + chunk = len; + } else { + chunk = rxm->full_len; + } - err = skb_copy_datagram_msg(skb, rxm->offset, msg, - chunk); - if (err < 0) - goto recv_end; + err = skb_copy_datagram_msg(skb, rxm->offset, + msg, chunk); + if (err < 0) + goto recv_end; + + if (!is_peek) { + rxm->offset = rxm->offset + chunk; + rxm->full_len = rxm->full_len - chunk; + } + } } pick_next_record: - copied += chunk; + if (chunk > len) + chunk = len; + + decrypted += chunk; len -= chunk; - if (likely(!(flags & MSG_PEEK))) { - u8 control = ctx->control; - - /* For async, drop current skb reference */ - if (async) - skb = NULL; - - if (tls_sw_advance_skb(sk, skb, chunk)) { - /* Return full control message to - * userspace before trying to parse - * another message type - */ - msg->msg_flags |= MSG_EOR; - if (control != TLS_RECORD_TYPE_DATA) - goto recv_end; - } else { - break; - } - } else { - /* MSG_PEEK right now cannot look beyond current skb - * from strparser, meaning we cannot advance skb here - * and thus unpause strparser since we'd loose original - * one. + + /* For async or peek case, queue the current skb */ + if (async || is_peek || retain_skb) { + skb_queue_tail(&ctx->rx_list, skb); + skb = NULL; + } + + if (tls_sw_advance_skb(sk, skb, chunk)) { + /* Return full control message to + * userspace before trying to parse + * another message type */ + msg->msg_flags |= MSG_EOR; + if (ctx->control != TLS_RECORD_TYPE_DATA) + goto recv_end; + } else { break; } /* If we have a new message from strparser, continue now. */ - if (copied >= target && !ctx->recv_pkt) + if (decrypted >= target && !ctx->recv_pkt) break; } while (len); @@ -1612,13 +1701,33 @@ recv_end: /* one of async decrypt failed */ tls_err_abort(sk, err); copied = 0; + decrypted = 0; + goto end; } } else { reinit_completion(&ctx->async_wait.completion); } WRITE_ONCE(ctx->async_notify, false); + + /* Drain records from the rx_list & copy if required */ + if (is_peek || is_kvec) + err = process_rx_list(ctx, msg, copied, + decrypted, false, is_peek); + else + err = process_rx_list(ctx, msg, 0, + decrypted, true, is_peek); + if (err < 0) { + tls_err_abort(sk, err); + copied = 0; + goto end; + } + + WARN_ON(decrypted != err); } + copied += decrypted; + +end: release_sock(sk); if (psock) sk_psock_put(sk, psock); @@ -1655,7 +1764,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, } if (!ctx->decrypted) { - err = decrypt_skb_update(sk, skb, NULL, &chunk, &zc); + err = decrypt_skb_update(sk, skb, NULL, &chunk, &zc, false); if (err < 0) { tls_err_abort(sk, EBADMSG); @@ -1842,6 +1951,7 @@ void tls_sw_release_resources_rx(struct sock *sk) if (ctx->aead_recv) { kfree_skb(ctx->recv_pkt); ctx->recv_pkt = NULL; + skb_queue_purge(&ctx->rx_list); crypto_free_aead(ctx->aead_recv); strp_stop(&ctx->strp); write_lock_bh(&sk->sk_callback_lock); @@ -1891,6 +2001,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) struct crypto_aead **aead; struct strp_callbacks cb; u16 nonce_size, tag_size, iv_size, rec_seq_size; + struct crypto_tfm *tfm; char *iv, *rec_seq; int rc = 0; @@ -1937,6 +2048,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) crypto_init_wait(&sw_ctx_rx->async_wait); crypto_info = &ctx->crypto_recv.info; cctx = &ctx->rx; + skb_queue_head_init(&sw_ctx_rx->rx_list); aead = &sw_ctx_rx->aead_recv; } @@ -2004,6 +2116,10 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) goto free_aead; if (sw_ctx_rx) { + tfm = crypto_aead_tfm(sw_ctx_rx->aead_recv); + sw_ctx_rx->async_capable = + tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC; + /* Set up strparser */ memset(&cb, 0, sizeof(cb)); cb.rcv_msg = tls_queue; diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 43a1dec08825..a60df252d3cc 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -505,7 +505,7 @@ out: static int __vsock_bind_stream(struct vsock_sock *vsk, struct sockaddr_vm *addr) { - static u32 port = 0; + static u32 port; struct sockaddr_vm new_addr; if (!port) |