summaryrefslogtreecommitdiff
path: root/net/ipv6/route.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6/route.c')
-rw-r--r--net/ipv6/route.c456
1 files changed, 270 insertions, 186 deletions
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 322bd62e688b..a96d5b385d8f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -128,7 +128,6 @@ static void rt6_uncached_list_add(struct rt6_info *rt)
{
struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
- rt->dst.flags |= DST_NOCACHE;
rt->rt6i_uncached_list = ul;
spin_lock_bh(&ul->lock);
@@ -354,7 +353,7 @@ static struct rt6_info *__ip6_dst_alloc(struct net *net,
int flags)
{
struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
- 0, DST_OBSOLETE_FORCE_CHK, flags);
+ 1, DST_OBSOLETE_FORCE_CHK, flags);
if (rt)
rt6_info_init(rt);
@@ -381,7 +380,7 @@ struct rt6_info *ip6_dst_alloc(struct net *net,
*p = NULL;
}
} else {
- dst_destroy((struct dst_entry *)rt);
+ dst_release_immediate(&rt->dst);
return NULL;
}
}
@@ -418,14 +417,11 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
struct net_device *loopback_dev =
dev_net(dev)->loopback_dev;
- if (dev != loopback_dev) {
- if (idev && idev->dev == dev) {
- struct inet6_dev *loopback_idev =
- in6_dev_get(loopback_dev);
- if (loopback_idev) {
- rt->rt6i_idev = loopback_idev;
- in6_dev_put(idev);
- }
+ if (idev && idev->dev != loopback_dev) {
+ struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
+ if (loopback_idev) {
+ rt->rt6i_idev = loopback_idev;
+ in6_dev_put(idev);
}
}
}
@@ -444,21 +440,12 @@ static bool rt6_check_expired(const struct rt6_info *rt)
if (time_after(jiffies, rt->dst.expires))
return true;
} else if (rt->dst.from) {
- return rt6_check_expired((struct rt6_info *) rt->dst.from);
+ return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
+ rt6_check_expired((struct rt6_info *)rt->dst.from);
}
return false;
}
-/* Multipath route selection:
- * Hash based function using packet header and flowlabel.
- * Adapted from fib_info_hashfn()
- */
-static int rt6_info_hash_nhsfn(unsigned int candidate_count,
- const struct flowi6 *fl6)
-{
- return get_hash_from_flowi6(fl6) % candidate_count;
-}
-
static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
struct flowi6 *fl6, int oif,
int strict)
@@ -466,7 +453,13 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
struct rt6_info *sibling, *next_sibling;
int route_choosen;
- route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
+ /* We might have already computed the hash for ICMPv6 errors. In such
+ * case it will always be non-zero. Otherwise now is the time to do it.
+ */
+ if (!fl6->mp_hash)
+ fl6->mp_hash = rt6_multipath_hash(fl6, NULL);
+
+ route_choosen = fl6->mp_hash % (match->rt6i_nsiblings + 1);
/* Don't change the route, if route_choosen == 0
* (siblings does not include ourself)
*/
@@ -932,20 +925,21 @@ struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
EXPORT_SYMBOL(rt6_lookup);
/* ip6_ins_rt is called with FREE table->tb6_lock.
- It takes new route entry, the addition fails by any reason the
- route is freed. In any case, if caller does not hold it, it may
- be destroyed.
+ * It takes new route entry, the addition fails by any reason the
+ * route is released.
+ * Caller must hold dst before calling it.
*/
static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
- struct mx6_config *mxc)
+ struct mx6_config *mxc,
+ struct netlink_ext_ack *extack)
{
int err;
struct fib6_table *table;
table = rt->rt6i_table;
write_lock_bh(&table->tb6_lock);
- err = fib6_add(&table->tb6_root, rt, info, mxc);
+ err = fib6_add(&table->tb6_root, rt, info, mxc, extack);
write_unlock_bh(&table->tb6_lock);
return err;
@@ -956,13 +950,39 @@ int ip6_ins_rt(struct rt6_info *rt)
struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
struct mx6_config mxc = { .mx = NULL, };
- return __ip6_ins_rt(rt, &info, &mxc);
+ /* Hold dst to account for the reference from the fib6 tree */
+ dst_hold(&rt->dst);
+ return __ip6_ins_rt(rt, &info, &mxc, NULL);
+}
+
+/* called with rcu_lock held */
+static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
+{
+ struct net_device *dev = rt->dst.dev;
+
+ if (rt->rt6i_flags & RTF_LOCAL) {
+ /* for copies of local routes, dst->dev needs to be the
+ * device if it is a master device, the master device if
+ * device is enslaved, and the loopback as the default
+ */
+ if (netif_is_l3_slave(dev) &&
+ !rt6_need_strict(&rt->rt6i_dst.addr))
+ dev = l3mdev_master_dev_rcu(dev);
+ else if (!netif_is_l3_master(dev))
+ dev = dev_net(dev)->loopback_dev;
+ /* last case is netif_is_l3_master(dev) is true in which
+ * case we want dev returned to be dev
+ */
+ }
+
+ return dev;
}
static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
const struct in6_addr *daddr,
const struct in6_addr *saddr)
{
+ struct net_device *dev;
struct rt6_info *rt;
/*
@@ -972,8 +992,10 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
ort = (struct rt6_info *)ort->dst.from;
- rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
-
+ rcu_read_lock();
+ dev = ip6_rt_get_dev_rcu(ort);
+ rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
+ rcu_read_unlock();
if (!rt)
return NULL;
@@ -1001,11 +1023,13 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
{
+ struct net_device *dev;
struct rt6_info *pcpu_rt;
- pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
- rt->dst.dev, rt->dst.flags);
-
+ rcu_read_lock();
+ dev = ip6_rt_get_dev_rcu(rt);
+ pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags);
+ rcu_read_unlock();
if (!pcpu_rt)
return NULL;
ip6_rt_copy_init(pcpu_rt, rt);
@@ -1048,7 +1072,7 @@ static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
prev = cmpxchg(p, NULL, pcpu_rt);
if (prev) {
/* If someone did it before us, return prev instead */
- dst_destroy(&pcpu_rt->dst);
+ dst_release_immediate(&pcpu_rt->dst);
pcpu_rt = prev;
}
} else {
@@ -1058,7 +1082,7 @@ static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
* since rt is going away anyway. The next
* dst_check() will trigger a re-lookup.
*/
- dst_destroy(&pcpu_rt->dst);
+ dst_release_immediate(&pcpu_rt->dst);
pcpu_rt = rt;
}
dst_hold(&pcpu_rt->dst);
@@ -1128,12 +1152,15 @@ redo_rt6_select:
uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
dst_release(&rt->dst);
- if (uncached_rt)
+ if (uncached_rt) {
+ /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
+ * No need for another dst_hold()
+ */
rt6_uncached_list_add(uncached_rt);
- else
+ } else {
uncached_rt = net->ipv6.ip6_null_entry;
-
- dst_hold(&uncached_rt->dst);
+ dst_hold(&uncached_rt->dst);
+ }
trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
return uncached_rt;
@@ -1184,6 +1211,54 @@ struct dst_entry *ip6_route_input_lookup(struct net *net,
}
EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
+static void ip6_multipath_l3_keys(const struct sk_buff *skb,
+ struct flow_keys *keys)
+{
+ const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
+ const struct ipv6hdr *key_iph = outer_iph;
+ const struct ipv6hdr *inner_iph;
+ const struct icmp6hdr *icmph;
+ struct ipv6hdr _inner_iph;
+
+ if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
+ goto out;
+
+ icmph = icmp6_hdr(skb);
+ if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
+ icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
+ icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
+ icmph->icmp6_type != ICMPV6_PARAMPROB)
+ goto out;
+
+ inner_iph = skb_header_pointer(skb,
+ skb_transport_offset(skb) + sizeof(*icmph),
+ sizeof(_inner_iph), &_inner_iph);
+ if (!inner_iph)
+ goto out;
+
+ key_iph = inner_iph;
+out:
+ memset(keys, 0, sizeof(*keys));
+ keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ keys->addrs.v6addrs.src = key_iph->saddr;
+ keys->addrs.v6addrs.dst = key_iph->daddr;
+ keys->tags.flow_label = ip6_flowinfo(key_iph);
+ keys->basic.ip_proto = key_iph->nexthdr;
+}
+
+/* if skb is set it will be used and fl6 can be NULL */
+u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
+{
+ struct flow_keys hash_keys;
+
+ if (skb) {
+ ip6_multipath_l3_keys(skb, &hash_keys);
+ return flow_hash_from_keys(&hash_keys);
+ }
+
+ return get_hash_from_flowi6(fl6);
+}
+
void ip6_route_input(struct sk_buff *skb)
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
@@ -1202,6 +1277,8 @@ void ip6_route_input(struct sk_buff *skb)
tun_info = skb_tunnel_info(skb);
if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
+ if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
+ fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
skb_dst_drop(skb);
skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
}
@@ -1244,9 +1321,11 @@ EXPORT_SYMBOL_GPL(ip6_route_output_flags);
struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
{
struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
+ struct net_device *loopback_dev = net->loopback_dev;
struct dst_entry *new = NULL;
- rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
+ rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
+ DST_OBSOLETE_DEAD, 0);
if (rt) {
rt6_info_init(rt);
@@ -1256,10 +1335,8 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
new->output = dst_discard_out;
dst_copy_metrics(new, &ort->dst);
- rt->rt6i_idev = ort->rt6i_idev;
- if (rt->rt6i_idev)
- in6_dev_hold(rt->rt6i_idev);
+ rt->rt6i_idev = in6_dev_get(loopback_dev);
rt->rt6i_gateway = ort->rt6i_gateway;
rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
rt->rt6i_metric = 0;
@@ -1268,8 +1345,6 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
#ifdef CONFIG_IPV6_SUBTREES
memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
#endif
-
- dst_free(new);
}
dst_release(dst_orig);
@@ -1289,7 +1364,9 @@ static void rt6_dst_from_metrics_check(struct rt6_info *rt)
static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
{
- if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
+ u32 rt_cookie = 0;
+
+ if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
return NULL;
if (rt6_check_expired(rt))
@@ -1322,7 +1399,7 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
rt6_dst_from_metrics_check(rt);
if (rt->rt6i_flags & RTF_PCPU ||
- (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
+ (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from))
return rt6_dst_from_check(rt, cookie);
else
return rt6_check(rt, cookie);
@@ -1355,10 +1432,16 @@ static void ip6_link_failure(struct sk_buff *skb)
rt = (struct rt6_info *) skb_dst(skb);
if (rt) {
if (rt->rt6i_flags & RTF_CACHE) {
- dst_hold(&rt->dst);
- ip6_del_rt(rt);
- } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
- rt->rt6i_node->fn_sernum = -1;
+ if (dst_hold_safe(&rt->dst))
+ ip6_del_rt(rt);
+ } else {
+ struct fib6_node *fn;
+
+ rcu_read_lock();
+ fn = rcu_dereference(rt->rt6i_node);
+ if (fn && (rt->rt6i_flags & RTF_DEFAULT))
+ fn->fn_sernum = -1;
+ rcu_read_unlock();
}
}
}
@@ -1375,7 +1458,8 @@ static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
{
return !(rt->rt6i_flags & RTF_CACHE) &&
- (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
+ (rt->rt6i_flags & RTF_PCPU ||
+ rcu_access_pointer(rt->rt6i_node));
}
static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
@@ -1420,6 +1504,10 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
* invalidate the sk->sk_dst_cache.
*/
ip6_ins_rt(nrt6);
+ /* Release the reference taken in
+ * ip6_rt_cache_alloc()
+ */
+ dst_release(&nrt6->dst);
}
}
}
@@ -1648,9 +1736,6 @@ out:
return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
}
-static struct dst_entry *icmp6_dst_gc_list;
-static DEFINE_SPINLOCK(icmp6_dst_lock);
-
struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
struct flowi6 *fl6)
{
@@ -1671,19 +1756,16 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
rt->dst.flags |= DST_HOST;
rt->dst.output = ip6_output;
- atomic_set(&rt->dst.__refcnt, 1);
rt->rt6i_gateway = fl6->daddr;
rt->rt6i_dst.addr = fl6->daddr;
rt->rt6i_dst.plen = 128;
rt->rt6i_idev = idev;
dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
- spin_lock_bh(&icmp6_dst_lock);
- rt->dst.next = icmp6_dst_gc_list;
- icmp6_dst_gc_list = &rt->dst;
- spin_unlock_bh(&icmp6_dst_lock);
-
- fib6_force_start_gc(net);
+ /* Add this dst into uncached_list so that rt6_ifdown() can
+ * do proper release of the net_device
+ */
+ rt6_uncached_list_add(rt);
dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
@@ -1691,48 +1773,6 @@ out:
return dst;
}
-int icmp6_dst_gc(void)
-{
- struct dst_entry *dst, **pprev;
- int more = 0;
-
- spin_lock_bh(&icmp6_dst_lock);
- pprev = &icmp6_dst_gc_list;
-
- while ((dst = *pprev) != NULL) {
- if (!atomic_read(&dst->__refcnt)) {
- *pprev = dst->next;
- dst_free(dst);
- } else {
- pprev = &dst->next;
- ++more;
- }
- }
-
- spin_unlock_bh(&icmp6_dst_lock);
-
- return more;
-}
-
-static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
- void *arg)
-{
- struct dst_entry *dst, **pprev;
-
- spin_lock_bh(&icmp6_dst_lock);
- pprev = &icmp6_dst_gc_list;
- while ((dst = *pprev) != NULL) {
- struct rt6_info *rt = (struct rt6_info *) dst;
- if (func(rt, arg)) {
- *pprev = dst->next;
- dst_free(dst);
- } else {
- pprev = &dst->next;
- }
- }
- spin_unlock_bh(&icmp6_dst_lock);
-}
-
static int ip6_dst_gc(struct dst_ops *ops)
{
struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
@@ -1844,7 +1884,8 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
return rt;
}
-static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
+static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
+ struct netlink_ext_ack *extack)
{
struct net *net = cfg->fc_nlinfo.nl_net;
struct rt6_info *rt = NULL;
@@ -1855,14 +1896,25 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
int err = -EINVAL;
/* RTF_PCPU is an internal flag; can not be set by userspace */
- if (cfg->fc_flags & RTF_PCPU)
+ if (cfg->fc_flags & RTF_PCPU) {
+ NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
goto out;
+ }
- if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
+ if (cfg->fc_dst_len > 128) {
+ NL_SET_ERR_MSG(extack, "Invalid prefix length");
+ goto out;
+ }
+ if (cfg->fc_src_len > 128) {
+ NL_SET_ERR_MSG(extack, "Invalid source address length");
goto out;
+ }
#ifndef CONFIG_IPV6_SUBTREES
- if (cfg->fc_src_len)
+ if (cfg->fc_src_len) {
+ NL_SET_ERR_MSG(extack,
+ "Specifying source address requires IPV6_SUBTREES to be enabled");
goto out;
+ }
#endif
if (cfg->fc_ifindex) {
err = -ENODEV;
@@ -1926,7 +1978,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
err = lwtunnel_build_state(cfg->fc_encap_type,
cfg->fc_encap, AF_INET6, cfg,
- &lwtstate);
+ &lwtstate, extack);
if (err)
goto out;
rt->dst.lwtstate = lwtstate_get(lwtstate);
@@ -2013,9 +2065,10 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
err = -EINVAL;
if (ipv6_chk_addr_and_flags(net, gw_addr,
gwa_type & IPV6_ADDR_LINKLOCAL ?
- dev : NULL, 0, 0))
+ dev : NULL, 0, 0)) {
+ NL_SET_ERR_MSG(extack, "Invalid gateway address");
goto out;
-
+ }
rt->rt6i_gateway = *gw_addr;
if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
@@ -2031,8 +2084,11 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
addressing
*/
if (!(gwa_type & (IPV6_ADDR_UNICAST |
- IPV6_ADDR_MAPPED)))
+ IPV6_ADDR_MAPPED))) {
+ NL_SET_ERR_MSG(extack,
+ "Invalid gateway address");
goto out;
+ }
if (cfg->fc_table) {
grt = ip6_nh_lookup_table(net, cfg, gw_addr);
@@ -2072,8 +2128,14 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
goto out;
}
err = -EINVAL;
- if (!dev || (dev->flags & IFF_LOOPBACK))
+ if (!dev) {
+ NL_SET_ERR_MSG(extack, "Egress device not specified");
+ goto out;
+ } else if (dev->flags & IFF_LOOPBACK) {
+ NL_SET_ERR_MSG(extack,
+ "Egress device can not be loopback device for this route");
goto out;
+ }
}
err = -ENODEV;
@@ -2082,6 +2144,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
+ NL_SET_ERR_MSG(extack, "Invalid source address");
err = -EINVAL;
goto out;
}
@@ -2106,18 +2169,19 @@ out:
if (idev)
in6_dev_put(idev);
if (rt)
- dst_free(&rt->dst);
+ dst_release_immediate(&rt->dst);
return ERR_PTR(err);
}
-int ip6_route_add(struct fib6_config *cfg)
+int ip6_route_add(struct fib6_config *cfg,
+ struct netlink_ext_ack *extack)
{
struct mx6_config mxc = { .mx = NULL, };
struct rt6_info *rt;
int err;
- rt = ip6_route_info_create(cfg);
+ rt = ip6_route_info_create(cfg, extack);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
rt = NULL;
@@ -2128,14 +2192,14 @@ int ip6_route_add(struct fib6_config *cfg)
if (err)
goto out;
- err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
+ err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack);
kfree(mxc.mx);
return err;
out:
if (rt)
- dst_free(&rt->dst);
+ dst_release_immediate(&rt->dst);
return err;
}
@@ -2146,8 +2210,7 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
struct fib6_table *table;
struct net *net = dev_net(rt->dst.dev);
- if (rt == net->ipv6.ip6_null_entry ||
- rt->dst.flags & DST_NOCACHE) {
+ if (rt == net->ipv6.ip6_null_entry) {
err = -ENOENT;
goto out;
}
@@ -2222,7 +2285,8 @@ out_put:
return err;
}
-static int ip6_route_del(struct fib6_config *cfg)
+static int ip6_route_del(struct fib6_config *cfg,
+ struct netlink_ext_ack *extack)
{
struct fib6_table *table;
struct fib6_node *fn;
@@ -2230,8 +2294,10 @@ static int ip6_route_del(struct fib6_config *cfg)
int err = -ESRCH;
table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
- if (!table)
+ if (!table) {
+ NL_SET_ERR_MSG(extack, "FIB table does not exist");
return err;
+ }
read_lock_bh(&table->tb6_lock);
@@ -2366,10 +2432,11 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
if (on_link)
nrt->rt6i_flags &= ~RTF_GATEWAY;
+ nrt->rt6i_protocol = RTPROT_REDIRECT;
nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
if (ip6_ins_rt(nrt))
- goto out;
+ goto out_release;
netevent.old = &rt->dst;
netevent.new = &nrt->dst;
@@ -2382,6 +2449,12 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
ip6_del_rt(rt);
}
+out_release:
+ /* Release the reference taken in
+ * ip6_rt_cache_alloc()
+ */
+ dst_release(&nrt->dst);
+
out:
neigh_release(neigh);
}
@@ -2470,6 +2543,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
.fc_dst_len = prefixlen,
.fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
RTF_UP | RTF_PREF(pref),
+ .fc_protocol = RTPROT_RA,
.fc_nlinfo.portid = 0,
.fc_nlinfo.nlh = NULL,
.fc_nlinfo.nl_net = net,
@@ -2483,7 +2557,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
if (!prefixlen)
cfg.fc_flags |= RTF_DEFAULT;
- ip6_route_add(&cfg);
+ ip6_route_add(&cfg, NULL);
return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
}
@@ -2522,6 +2596,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
.fc_ifindex = dev->ifindex,
.fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
+ .fc_protocol = RTPROT_RA,
.fc_nlinfo.portid = 0,
.fc_nlinfo.nlh = NULL,
.fc_nlinfo.nl_net = dev_net(dev),
@@ -2529,7 +2604,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
cfg.fc_gateway = *gwaddr;
- if (!ip6_route_add(&cfg)) {
+ if (!ip6_route_add(&cfg, NULL)) {
struct fib6_table *table;
table = fib6_get_table(dev_net(dev), cfg.fc_table);
@@ -2622,10 +2697,10 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
rtnl_lock();
switch (cmd) {
case SIOCADDRT:
- err = ip6_route_add(&cfg);
+ err = ip6_route_add(&cfg, NULL);
break;
case SIOCDELRT:
- err = ip6_route_del(&cfg);
+ err = ip6_route_del(&cfg, NULL);
break;
default:
err = -EINVAL;
@@ -2697,15 +2772,9 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
{
u32 tb_id;
struct net *net = dev_net(idev->dev);
- struct net_device *dev = net->loopback_dev;
+ struct net_device *dev = idev->dev;
struct rt6_info *rt;
- /* use L3 Master device as loopback for host routes if device
- * is enslaved and address is not link local or multicast
- */
- if (!rt6_need_strict(addr))
- dev = l3mdev_master_dev_rcu(idev->dev) ? : dev;
-
rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
if (!rt)
return ERR_PTR(-ENOMEM);
@@ -2729,9 +2798,6 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
rt->rt6i_dst.plen = 128;
tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
rt->rt6i_table = fib6_get_table(net, tb_id);
- rt->dst.flags |= DST_NOCACHE;
-
- atomic_set(&rt->dst.__refcnt, 1);
return rt;
}
@@ -2819,7 +2885,6 @@ void rt6_ifdown(struct net *net, struct net_device *dev)
};
fib6_clean_all(net, fib6_ifdown, &adn);
- icmp6_clean_all(fib6_ifdown, &adn);
if (dev)
rt6_uncached_list_flush_dev(net, dev);
}
@@ -2904,7 +2969,8 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
};
static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct fib6_config *cfg)
+ struct fib6_config *cfg,
+ struct netlink_ext_ack *extack)
{
struct rtmsg *rtm;
struct nlattr *tb[RTA_MAX+1];
@@ -2988,7 +3054,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
- cfg->fc_mp_len);
+ cfg->fc_mp_len, extack);
if (err < 0)
goto errout;
}
@@ -3007,7 +3073,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tb[RTA_ENCAP_TYPE]) {
cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
- err = lwtunnel_valid_encap_type(cfg->fc_encap_type);
+ err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
if (err < 0)
goto errout;
}
@@ -3048,17 +3114,11 @@ static int ip6_route_info_append(struct list_head *rt6_nh_list,
struct rt6_info *rt, struct fib6_config *r_cfg)
{
struct rt6_nh *nh;
- struct rt6_info *rtnh;
int err = -EEXIST;
list_for_each_entry(nh, rt6_nh_list, next) {
/* check if rt6_info already exists */
- rtnh = nh->rt6_info;
-
- if (rtnh->dst.dev == rt->dst.dev &&
- rtnh->rt6i_idev == rt->rt6i_idev &&
- ipv6_addr_equal(&rtnh->rt6i_gateway,
- &rt->rt6i_gateway))
+ if (rt6_duplicate_nexthop(nh->rt6_info, rt))
return err;
}
@@ -3098,7 +3158,8 @@ static void ip6_route_mpath_notify(struct rt6_info *rt,
inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
}
-static int ip6_route_multipath_add(struct fib6_config *cfg)
+static int ip6_route_multipath_add(struct fib6_config *cfg,
+ struct netlink_ext_ack *extack)
{
struct rt6_info *rt_notif = NULL, *rt_last = NULL;
struct nl_info *info = &cfg->fc_nlinfo;
@@ -3146,7 +3207,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg)
r_cfg.fc_encap_type = nla_get_u16(nla);
}
- rt = ip6_route_info_create(&r_cfg);
+ rt = ip6_route_info_create(&r_cfg, extack);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
rt = NULL;
@@ -3155,7 +3216,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg)
err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
if (err) {
- dst_free(&rt->dst);
+ dst_release_immediate(&rt->dst);
goto cleanup;
}
@@ -3171,7 +3232,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg)
err_nh = NULL;
list_for_each_entry(nh, &rt6_nh_list, next) {
rt_last = nh->rt6_info;
- err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc);
+ err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack);
/* save reference to first route for notification */
if (!rt_notif && !err)
rt_notif = nh->rt6_info;
@@ -3213,13 +3274,13 @@ add_errout:
list_for_each_entry(nh, &rt6_nh_list, next) {
if (err_nh == nh)
break;
- ip6_route_del(&nh->r_cfg);
+ ip6_route_del(&nh->r_cfg, extack);
}
cleanup:
list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
if (nh->rt6_info)
- dst_free(&nh->rt6_info->dst);
+ dst_release_immediate(&nh->rt6_info->dst);
kfree(nh->mxc.mx);
list_del(&nh->next);
kfree(nh);
@@ -3228,7 +3289,8 @@ cleanup:
return err;
}
-static int ip6_route_multipath_del(struct fib6_config *cfg)
+static int ip6_route_multipath_del(struct fib6_config *cfg,
+ struct netlink_ext_ack *extack)
{
struct fib6_config r_cfg;
struct rtnexthop *rtnh;
@@ -3255,7 +3317,7 @@ static int ip6_route_multipath_del(struct fib6_config *cfg)
r_cfg.fc_flags |= RTF_GATEWAY;
}
}
- err = ip6_route_del(&r_cfg);
+ err = ip6_route_del(&r_cfg, extack);
if (err)
last_err = err;
@@ -3271,15 +3333,15 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
struct fib6_config cfg;
int err;
- err = rtm_to_fib6_config(skb, nlh, &cfg);
+ err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
if (err < 0)
return err;
if (cfg.fc_mp)
- return ip6_route_multipath_del(&cfg);
+ return ip6_route_multipath_del(&cfg, extack);
else {
cfg.fc_delete_all_nh = 1;
- return ip6_route_del(&cfg);
+ return ip6_route_del(&cfg, extack);
}
}
@@ -3289,14 +3351,14 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
struct fib6_config cfg;
int err;
- err = rtm_to_fib6_config(skb, nlh, &cfg);
+ err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
if (err < 0)
return err;
if (cfg.fc_mp)
- return ip6_route_multipath_add(&cfg);
+ return ip6_route_multipath_add(&cfg, extack);
else
- return ip6_route_add(&cfg);
+ return ip6_route_add(&cfg, extack);
}
static size_t rt6_nlmsg_size(struct rt6_info *rt)
@@ -3343,6 +3405,9 @@ static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
goto nla_put_failure;
}
+ if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD)
+ *flags |= RTNH_F_OFFLOAD;
+
/* not needed for multipath encoding b/c it has a rtnexthop struct */
if (!skip_oif && rt->dst.dev &&
nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
@@ -3440,14 +3505,6 @@ static int rt6_fill_node(struct net *net,
rtm->rtm_flags = 0;
rtm->rtm_scope = RT_SCOPE_UNIVERSE;
rtm->rtm_protocol = rt->rt6i_protocol;
- if (rt->rt6i_flags & RTF_DYNAMIC)
- rtm->rtm_protocol = RTPROT_REDIRECT;
- else if (rt->rt6i_flags & RTF_ADDRCONF) {
- if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
- rtm->rtm_protocol = RTPROT_RA;
- else
- rtm->rtm_protocol = RTPROT_KERNEL;
- }
if (rt->rt6i_flags & RTF_CACHE)
rtm->rtm_flags |= RTM_F_CLONED;
@@ -3577,11 +3634,13 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
{
struct net *net = sock_net(in_skb->sk);
struct nlattr *tb[RTA_MAX+1];
+ int err, iif = 0, oif = 0;
+ struct dst_entry *dst;
struct rt6_info *rt;
struct sk_buff *skb;
struct rtmsg *rtm;
struct flowi6 fl6;
- int err, iif = 0, oif = 0;
+ bool fibmatch;
err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
extack);
@@ -3592,6 +3651,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
memset(&fl6, 0, sizeof(fl6));
rtm = nlmsg_data(nlh);
fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
+ fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
if (tb[RTA_SRC]) {
if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
@@ -3626,8 +3686,11 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
struct net_device *dev;
int flags = 0;
- dev = __dev_get_by_index(net, iif);
+ rcu_read_lock();
+
+ dev = dev_get_by_index_rcu(net, iif);
if (!dev) {
+ rcu_read_unlock();
err = -ENODEV;
goto errout;
}
@@ -3637,12 +3700,27 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
if (!ipv6_addr_any(&fl6.saddr))
flags |= RT6_LOOKUP_F_HAS_SADDR;
- rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
- flags);
+ if (!fibmatch)
+ dst = ip6_route_input_lookup(net, dev, &fl6, flags);
+ else
+ dst = ip6_route_lookup(net, &fl6, 0);
+
+ rcu_read_unlock();
} else {
fl6.flowi6_oif = oif;
- rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
+ if (!fibmatch)
+ dst = ip6_route_output(net, NULL, &fl6);
+ else
+ dst = ip6_route_lookup(net, &fl6, 0);
+ }
+
+
+ rt = container_of(dst, struct rt6_info, dst);
+ if (rt->dst.error) {
+ err = rt->dst.error;
+ ip6_rt_put(rt);
+ goto errout;
}
if (rt == net->ipv6.ip6_null_entry) {
@@ -3659,10 +3737,14 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
}
skb_dst_set(skb, &rt->dst);
-
- err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
- RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
- nlh->nlmsg_seq, 0);
+ if (fibmatch)
+ err = rt6_fill_node(net, skb, rt, NULL, NULL, iif,
+ RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
+ nlh->nlmsg_seq, 0);
+ else
+ err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
+ RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
+ nlh->nlmsg_seq, 0);
if (err < 0) {
kfree_skb(skb);
goto errout;
@@ -3727,10 +3809,10 @@ static int ip6_route_dev_notify(struct notifier_block *this,
/* NETDEV_UNREGISTER could be fired for multiple times by
* netdev_wait_allrefs(). Make sure we only call this once.
*/
- in6_dev_put(net->ipv6.ip6_null_entry->rt6i_idev);
+ in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
- in6_dev_put(net->ipv6.ip6_prohibit_entry->rt6i_idev);
- in6_dev_put(net->ipv6.ip6_blk_hole_entry->rt6i_idev);
+ in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
+ in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
#endif
}
@@ -3924,6 +4006,7 @@ static int __net_init ip6_route_net_init(struct net *net)
ip6_template_metrics, true);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+ net->ipv6.fib6_has_custom_rules = false;
net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
sizeof(*net->ipv6.ip6_prohibit_entry),
GFP_KERNEL);
@@ -4099,9 +4182,10 @@ int __init ip6_route_init(void)
goto fib6_rules_init;
ret = -ENOBUFS;
- if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
- __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
- __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
+ if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, 0) ||
+ __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, 0) ||
+ __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL,
+ RTNL_FLAG_DOIT_UNLOCKED))
goto out_register_late_subsys;
ret = register_netdevice_notifier(&ip6_route_dev_notifier);