diff options
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r-- | net/ipv4/route.c | 172 |
1 files changed, 114 insertions, 58 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 2a57566e6e91..acd69cfe2951 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -65,7 +65,7 @@ #define pr_fmt(fmt) "IPv4: " fmt #include <linux/module.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/bitops.h> #include <linux/types.h> #include <linux/kernel.h> @@ -154,6 +154,7 @@ static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, struct sk_buff *skb, const void *daddr); +static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr); static struct dst_ops ipv4_dst_ops = { .family = AF_INET, @@ -168,6 +169,7 @@ static struct dst_ops ipv4_dst_ops = { .redirect = ip_do_redirect, .local_out = __ip_local_out, .neigh_lookup = ipv4_neigh_lookup, + .confirm_neigh = ipv4_confirm_neigh, }; #define ECN_OR_COST(class) TC_PRIO_##class @@ -461,6 +463,23 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, return neigh_create(&arp_tbl, pkey, dev); } +static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr) +{ + struct net_device *dev = dst->dev; + const __be32 *pkey = daddr; + const struct rtable *rt; + + rt = (const struct rtable *)dst; + if (rt->rt_gateway) + pkey = (const __be32 *)&rt->rt_gateway; + else if (!daddr || + (rt->rt_flags & + (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL))) + return; + + __ipv4_confirm_neigh(dev, *(__force u32 *)pkey); +} + #define IP_IDENTS_SZ 2048u static atomic_t *ip_idents __read_mostly; @@ -507,7 +526,8 @@ void __ip_select_ident(struct net *net, struct iphdr *iph, int segs) } EXPORT_SYMBOL(__ip_select_ident); -static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk, +static void __build_flow_key(const struct net *net, struct flowi4 *fl4, + const struct sock *sk, const struct iphdr *iph, int oif, u8 tos, u8 prot, u32 mark, int flow_flags) @@ -523,19 +543,21 @@ static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk, flowi4_init_output(fl4, oif, mark, tos, RT_SCOPE_UNIVERSE, prot, flow_flags, - iph->daddr, iph->saddr, 0, 0); + iph->daddr, iph->saddr, 0, 0, + sock_net_uid(net, sk)); } static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb, const struct sock *sk) { + const struct net *net = dev_net(skb->dev); const struct iphdr *iph = ip_hdr(skb); int oif = skb->dev->ifindex; u8 tos = RT_TOS(iph->tos); u8 prot = iph->protocol; u32 mark = skb->mark; - __build_flow_key(fl4, sk, iph, oif, tos, prot, mark, 0); + __build_flow_key(net, fl4, sk, iph, oif, tos, prot, mark, 0); } static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk) @@ -552,7 +574,7 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk) RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, inet_sk_flowi_flags(sk), - daddr, inet->inet_saddr, 0, 0); + daddr, inet->inet_saddr, 0, 0, sk->sk_uid); rcu_read_unlock(); } @@ -795,6 +817,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf struct rtable *rt; struct flowi4 fl4; const struct iphdr *iph = (const struct iphdr *) skb->data; + struct net *net = dev_net(skb->dev); int oif = skb->dev->ifindex; u8 tos = RT_TOS(iph->tos); u8 prot = iph->protocol; @@ -802,7 +825,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf rt = (struct rtable *) dst; - __build_flow_key(&fl4, sk, iph, oif, tos, prot, mark, 0); + __build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0); __ip_do_redirect(rt, skb, &fl4, true); } @@ -1020,7 +1043,7 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, if (!mark) mark = IP4_REPLY_MARK(net, skb->mark); - __build_flow_key(&fl4, NULL, iph, oif, + __build_flow_key(net, &fl4, NULL, iph, oif, RT_TOS(iph->tos), protocol, mark, flow_flags); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { @@ -1036,7 +1059,7 @@ static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) struct flowi4 fl4; struct rtable *rt; - __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); + __build_flow_key(sock_net(sk), &fl4, sk, iph, 0, 0, 0, 0, 0); if (!fl4.flowi4_mark) fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark); @@ -1055,6 +1078,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) struct rtable *rt; struct dst_entry *odst = NULL; bool new = false; + struct net *net = sock_net(sk); bh_lock_sock(sk); @@ -1068,7 +1092,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) goto out; } - __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); + __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0); rt = (struct rtable *)odst; if (odst->obsolete && !odst->ops->check(odst, 0)) { @@ -1108,7 +1132,7 @@ void ipv4_redirect(struct sk_buff *skb, struct net *net, struct flowi4 fl4; struct rtable *rt; - __build_flow_key(&fl4, NULL, iph, oif, + __build_flow_key(net, &fl4, NULL, iph, oif, RT_TOS(iph->tos), protocol, mark, flow_flags); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { @@ -1123,9 +1147,10 @@ void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk) const struct iphdr *iph = (const struct iphdr *) skb->data; struct flowi4 fl4; struct rtable *rt; + struct net *net = sock_net(sk); - __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); - rt = __ip_route_output_key(sock_net(sk), &fl4); + __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0); + rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { __ip_do_redirect(rt, skb, &fl4, false); ip_rt_put(rt); @@ -1598,6 +1623,19 @@ static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr) spin_unlock_bh(&fnhe_lock); } +static void set_lwt_redirect(struct rtable *rth) +{ + if (lwtunnel_output_redirect(rth->dst.lwtstate)) { + rth->dst.lwtstate->orig_output = rth->dst.output; + rth->dst.output = lwtunnel_output; + } + + if (lwtunnel_input_redirect(rth->dst.lwtstate)) { + rth->dst.lwtstate->orig_input = rth->dst.input; + rth->dst.input = lwtunnel_input; + } +} + /* called in rcu_read_lock() section */ static int __mkroute_input(struct sk_buff *skb, const struct fib_result *res, @@ -1687,14 +1725,7 @@ rt_cache: rth->dst.input = ip_forward; rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag); - if (lwtunnel_output_redirect(rth->dst.lwtstate)) { - rth->dst.lwtstate->orig_output = rth->dst.output; - rth->dst.output = lwtunnel_output; - } - if (lwtunnel_input_redirect(rth->dst.lwtstate)) { - rth->dst.lwtstate->orig_input = rth->dst.input; - rth->dst.input = lwtunnel_input; - } + set_lwt_redirect(rth); skb_dst_set(skb, &rth->dst); out: err = 0; @@ -1746,7 +1777,6 @@ standard_hash: static int ip_mkroute_input(struct sk_buff *skb, struct fib_result *res, - const struct flowi4 *fl4, struct in_device *in_dev, __be32 daddr, __be32 saddr, u32 tos) { @@ -1846,6 +1876,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, fl4.flowi4_flags = 0; fl4.daddr = daddr; fl4.saddr = saddr; + fl4.flowi4_uid = sock_net_uid(net, NULL); err = fib_lookup(net, &fl4, &res, 0); if (err != 0) { if (!IN_DEV_FORWARD(in_dev)) @@ -1871,7 +1902,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (res.type != RTN_UNICAST) goto martian_destination; - err = ip_mkroute_input(skb, &res, &fl4, in_dev, daddr, saddr, tos); + err = ip_mkroute_input(skb, &res, in_dev, daddr, saddr, tos); out: return err; brd_input: @@ -1902,7 +1933,8 @@ local_input: } } - rth = rt_dst_alloc(net->loopback_dev, flags | RTCF_LOCAL, res.type, + rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev, + flags | RTCF_LOCAL, res.type, IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache); if (!rth) goto e_nobufs; @@ -1921,8 +1953,18 @@ local_input: rth->dst.error= -err; rth->rt_flags &= ~RTCF_LOCAL; } + if (do_cache) { - if (unlikely(!rt_cache_route(&FIB_RES_NH(res), rth))) { + struct fib_nh *nh = &FIB_RES_NH(res); + + rth->dst.lwtstate = lwtstate_get(nh->nh_lwtstate); + if (lwtunnel_input_redirect(rth->dst.lwtstate)) { + WARN_ON(rth->dst.input == lwtunnel_input); + rth->dst.lwtstate->orig_input = rth->dst.input; + rth->dst.input = lwtunnel_input; + } + + if (unlikely(!rt_cache_route(nh, rth))) { rth->dst.flags |= DST_NOCACHE; rt_add_uncached_list(rth); } @@ -1967,6 +2009,7 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, { int res; + tos &= IPTOS_RT_MASK; rcu_read_lock(); /* Multicast recognition logic is moved from route cache to here. @@ -1982,25 +2025,35 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, */ if (ipv4_is_multicast(daddr)) { struct in_device *in_dev = __in_dev_get_rcu(dev); + int our = 0; - if (in_dev) { - int our = ip_check_mc_rcu(in_dev, daddr, saddr, - ip_hdr(skb)->protocol); - if (our + if (in_dev) + our = ip_check_mc_rcu(in_dev, daddr, saddr, + ip_hdr(skb)->protocol); + + /* check l3 master if no match yet */ + if ((!in_dev || !our) && netif_is_l3_slave(dev)) { + struct in_device *l3_in_dev; + + l3_in_dev = __in_dev_get_rcu(skb->dev); + if (l3_in_dev) + our = ip_check_mc_rcu(l3_in_dev, daddr, saddr, + ip_hdr(skb)->protocol); + } + + res = -EINVAL; + if (our #ifdef CONFIG_IP_MROUTE - || - (!ipv4_is_local_multicast(daddr) && - IN_DEV_MFORWARD(in_dev)) + || + (!ipv4_is_local_multicast(daddr) && + IN_DEV_MFORWARD(in_dev)) #endif - ) { - int res = ip_route_input_mc(skb, daddr, saddr, - tos, dev, our); - rcu_read_unlock(); - return res; - } + ) { + res = ip_route_input_mc(skb, daddr, saddr, + tos, dev, our); } rcu_read_unlock(); - return -EINVAL; + return res; } res = ip_route_input_slow(skb, daddr, saddr, tos, dev); rcu_read_unlock(); @@ -2140,8 +2193,7 @@ add: } rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0); - if (lwtunnel_output_redirect(rth->dst.lwtstate)) - rth->dst.output = lwtunnel_output; + set_lwt_redirect(rth); return rth; } @@ -2268,7 +2320,8 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, res.fi = NULL; res.table = NULL; if (fl4->flowi4_oif && - !netif_index_is_l3_master(net, fl4->flowi4_oif)) { + (ipv4_is_multicast(fl4->daddr) || + !netif_index_is_l3_master(net, fl4->flowi4_oif))) { /* Apparently, routing tables are wrong. Assume, that the destination is on link. @@ -2421,7 +2474,7 @@ EXPORT_SYMBOL_GPL(ip_route_output_flow); static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id, struct flowi4 *fl4, struct sk_buff *skb, u32 portid, - u32 seq, int event, int nowait, unsigned int flags) + u32 seq, int event) { struct rtable *rt = skb_rtable(skb); struct rtmsg *r; @@ -2430,7 +2483,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id, u32 error; u32 metrics[RTAX_MAX]; - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), 0); if (!nlh) return -EMSGSIZE; @@ -2439,7 +2492,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id, r->rtm_dst_len = 32; r->rtm_src_len = 0; r->rtm_tos = fl4->flowi4_tos; - r->rtm_table = table_id; + r->rtm_table = table_id < 256 ? table_id : RT_TABLE_COMPAT; if (nla_put_u32(skb, RTA_TABLE, table_id)) goto nla_put_failure; r->rtm_type = rt->rt_type; @@ -2495,6 +2548,11 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id, nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark)) goto nla_put_failure; + if (!uid_eq(fl4->flowi4_uid, INVALID_UID) && + nla_put_u32(skb, RTA_UID, + from_kuid_munged(current_user_ns(), fl4->flowi4_uid))) + goto nla_put_failure; + error = rt->dst.error; if (rt_is_input_route(rt)) { @@ -2503,18 +2561,12 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id, IPV4_DEVCONF_ALL(net, MC_FORWARDING)) { int err = ipmr_get_route(net, skb, fl4->saddr, fl4->daddr, - r, nowait, portid); + r, portid); if (err <= 0) { - if (!nowait) { - if (err == 0) - return 0; - goto nla_put_failure; - } else { - if (err == -EMSGSIZE) - goto nla_put_failure; - error = err; - } + if (err == 0) + return 0; + goto nla_put_failure; } } else #endif @@ -2547,6 +2599,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) int mark; struct sk_buff *skb; u32 table_id = RT_TABLE_MAIN; + kuid_t uid; err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); if (err < 0) @@ -2567,13 +2620,17 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) skb_reset_network_header(skb); /* Bugfix: need to give ip_route_input enough of an IP header to not gag. */ - ip_hdr(skb)->protocol = IPPROTO_ICMP; + ip_hdr(skb)->protocol = IPPROTO_UDP; skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr)); src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0; dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0; iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0; + if (tb[RTA_UID]) + uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID])); + else + uid = (iif ? INVALID_UID : current_uid()); memset(&fl4, 0, sizeof(fl4)); fl4.daddr = dst; @@ -2581,6 +2638,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) fl4.flowi4_tos = rtm->rtm_tos; fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0; fl4.flowi4_mark = mark; + fl4.flowi4_uid = uid; if (iif) { struct net_device *dev; @@ -2594,9 +2652,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) skb->protocol = htons(ETH_P_IP); skb->dev = dev; skb->mark = mark; - local_bh_disable(); err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); - local_bh_enable(); rt = skb_rtable(skb); if (err == 0 && rt->dst.error) @@ -2621,7 +2677,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) err = rt_fill_info(net, dst, src, table_id, &fl4, skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, - RTM_NEWROUTE, 0, 0); + RTM_NEWROUTE); if (err < 0) goto errout_free; |