summaryrefslogtreecommitdiff
path: root/net/ipv6
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/Kconfig10
-rw-r--r--net/ipv6/Makefile7
-rw-r--r--net/ipv6/addrconf.c1002
-rw-r--r--net/ipv6/af_inet6.c24
-rw-r--r--net/ipv6/calipso.c1475
-rw-r--r--net/ipv6/datagram.c29
-rw-r--r--net/ipv6/exthdrs.c142
-rw-r--r--net/ipv6/exthdrs_core.c2
-rw-r--r--net/ipv6/fib6_rules.c6
-rw-r--r--net/ipv6/fou6.c140
-rw-r--r--net/ipv6/icmp.c120
-rw-r--r--net/ipv6/ila/ila.h80
-rw-r--r--net/ipv6/ila/ila_common.c83
-rw-r--r--net/ipv6/ila/ila_lwt.c54
-rw-r--r--net/ipv6/ila/ila_xlat.c167
-rw-r--r--net/ipv6/inet6_hashtables.c64
-rw-r--r--net/ipv6/ip6_checksum.c7
-rw-r--r--net/ipv6/ip6_fib.c2
-rw-r--r--net/ipv6/ip6_flowlabel.c7
-rw-r--r--net/ipv6/ip6_gre.c533
-rw-r--r--net/ipv6/ip6_icmp.c2
-rw-r--r--net/ipv6/ip6_input.c73
-rw-r--r--net/ipv6/ip6_offload.c94
-rw-r--r--net/ipv6/ip6_offload.h3
-rw-r--r--net/ipv6/ip6_output.c104
-rw-r--r--net/ipv6/ip6_tunnel.c450
-rw-r--r--net/ipv6/ip6_vti.c19
-rw-r--r--net/ipv6/ip6mr.c40
-rw-r--r--net/ipv6/ipv6_sockglue.c14
-rw-r--r--net/ipv6/ndisc.c123
-rw-r--r--net/ipv6/netfilter/ip6_tables.c578
-rw-r--r--net/ipv6/netfilter/ip6t_SYNPROXY.c58
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c4
-rw-r--r--net/ipv6/netfilter/nf_dup_ipv6.c1
-rw-r--r--net/ipv6/netfilter/nf_reject_ipv6.c2
-rw-r--r--net/ipv6/netfilter/nft_chain_route_ipv6.c10
-rw-r--r--net/ipv6/netfilter/nft_reject_ipv6.c1
-rw-r--r--net/ipv6/ping.c55
-rw-r--r--net/ipv6/raw.c38
-rw-r--r--net/ipv6/reassembly.c32
-rw-r--r--net/ipv6/route.c83
-rw-r--r--net/ipv6/sit.c158
-rw-r--r--net/ipv6/syncookies.c4
-rw-r--r--net/ipv6/sysctl_net_ipv6.c19
-rw-r--r--net/ipv6/tcp_ipv6.c135
-rw-r--r--net/ipv6/udp.c444
-rw-r--r--net/ipv6/udp_offload.c24
-rw-r--r--net/ipv6/udplite.c1
-rw-r--r--net/ipv6/xfrm6_input.c15
-rw-r--r--net/ipv6/xfrm6_policy.c6
-rw-r--r--net/ipv6/xfrm6_tunnel.c2
51 files changed, 4266 insertions, 2280 deletions
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 11e875ffd7ac..2343e4f2e0bf 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -218,6 +218,7 @@ config IPV6_GRE
tristate "IPv6: GRE tunnel"
select IPV6_TUNNEL
select NET_IP_TUNNEL
+ depends on NET_IPGRE_DEMUX
---help---
Tunneling means encapsulating data of one protocol type within
another protocol and sending it over a channel that understands the
@@ -231,6 +232,15 @@ config IPV6_GRE
Saying M here will produce a module called ip6_gre. If unsure, say N.
+config IPV6_FOU
+ tristate
+ default NET_FOU && IPV6
+
+config IPV6_FOU_TUNNEL
+ tristate
+ default NET_FOU_IP_TUNNELS && IPV6_FOU
+ select IPV6_TUNNEL
+
config IPV6_MULTIPLE_TABLES
bool "IPv6: Multiple Routing Tables"
select FIB_RULES
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 2fbd90bf8d33..c174ccb340a1 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -8,9 +8,10 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
addrlabel.o \
route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \
- exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o
+ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \
+ udp_offload.o
-ipv6-offload := ip6_offload.o tcpv6_offload.o udp_offload.o exthdrs_offload.o
+ipv6-offload := ip6_offload.o tcpv6_offload.o exthdrs_offload.o
ipv6-$(CONFIG_SYSCTL) = sysctl_net_ipv6.o
ipv6-$(CONFIG_IPV6_MROUTE) += ip6mr.o
@@ -21,6 +22,7 @@ ipv6-$(CONFIG_NETFILTER) += netfilter.o
ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o
ipv6-$(CONFIG_PROC_FS) += proc.o
ipv6-$(CONFIG_SYN_COOKIES) += syncookies.o
+ipv6-$(CONFIG_NETLABEL) += calipso.o
ipv6-objs += $(ipv6-y)
@@ -41,6 +43,7 @@ obj-$(CONFIG_IPV6_VTI) += ip6_vti.o
obj-$(CONFIG_IPV6_SIT) += sit.o
obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
obj-$(CONFIG_IPV6_GRE) += ip6_gre.o
+obj-$(CONFIG_IPV6_FOU) += fou6.o
obj-y += addrconf_core.o exthdrs_core.o ip6_checksum.o ip6_icmp.o
obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6-offload)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 8ec4b3089e20..2f1f5d439788 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -359,7 +359,6 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
ndev->addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64;
ndev->cnf.mtu6 = dev->mtu;
- ndev->cnf.sysctl = NULL;
ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl);
if (!ndev->nd_parms) {
kfree(ndev);
@@ -548,7 +547,7 @@ void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex,
struct sk_buff *skb;
int err = -ENOBUFS;
- skb = nlmsg_new(inet6_netconf_msgsize_devconf(type), GFP_ATOMIC);
+ skb = nlmsg_new(inet6_netconf_msgsize_devconf(type), GFP_KERNEL);
if (!skb)
goto errout;
@@ -560,7 +559,7 @@ void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex,
kfree_skb(skb);
goto errout;
}
- rtnl_notify(skb, net, 0, RTNLGRP_IPV6_NETCONF, NULL, GFP_ATOMIC);
+ rtnl_notify(skb, net, 0, RTNLGRP_IPV6_NETCONF, NULL, GFP_KERNEL);
return;
errout:
rtnl_set_sk_err(net, RTNLGRP_IPV6_NETCONF, err);
@@ -779,7 +778,14 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf)
}
if (p == &net->ipv6.devconf_all->forwarding) {
+ int old_dflt = net->ipv6.devconf_dflt->forwarding;
+
net->ipv6.devconf_dflt->forwarding = newf;
+ if ((!newf) ^ (!old_dflt))
+ inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+ NETCONFA_IFINDEX_DEFAULT,
+ net->ipv6.devconf_dflt);
+
addrconf_forward_change(net, newf);
if ((!newf) ^ (!old))
inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING,
@@ -1525,6 +1531,28 @@ out:
return hiscore_idx;
}
+static int ipv6_get_saddr_master(struct net *net,
+ const struct net_device *dst_dev,
+ const struct net_device *master,
+ struct ipv6_saddr_dst *dst,
+ struct ipv6_saddr_score *scores,
+ int hiscore_idx)
+{
+ struct inet6_dev *idev;
+
+ idev = __in6_dev_get(dst_dev);
+ if (idev)
+ hiscore_idx = __ipv6_dev_get_saddr(net, dst, idev,
+ scores, hiscore_idx);
+
+ idev = __in6_dev_get(master);
+ if (idev)
+ hiscore_idx = __ipv6_dev_get_saddr(net, dst, idev,
+ scores, hiscore_idx);
+
+ return hiscore_idx;
+}
+
int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
const struct in6_addr *daddr, unsigned int prefs,
struct in6_addr *saddr)
@@ -1578,13 +1606,39 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
if (idev)
hiscore_idx = __ipv6_dev_get_saddr(net, &dst, idev, scores, hiscore_idx);
} else {
+ const struct net_device *master;
+ int master_idx = 0;
+
+ /* if dst_dev exists and is enslaved to an L3 device, then
+ * prefer addresses from dst_dev and then the master over
+ * any other enslaved devices in the L3 domain.
+ */
+ master = l3mdev_master_dev_rcu(dst_dev);
+ if (master) {
+ master_idx = master->ifindex;
+
+ hiscore_idx = ipv6_get_saddr_master(net, dst_dev,
+ master, &dst,
+ scores, hiscore_idx);
+
+ if (scores[hiscore_idx].ifa)
+ goto out;
+ }
+
for_each_netdev_rcu(net, dev) {
+ /* only consider addresses on devices in the
+ * same L3 domain
+ */
+ if (l3mdev_master_ifindex_rcu(dev) != master_idx)
+ continue;
idev = __in6_dev_get(dev);
if (!idev)
continue;
hiscore_idx = __ipv6_dev_get_saddr(net, &dst, idev, scores, hiscore_idx);
}
}
+
+out:
rcu_read_unlock();
hiscore = &scores[hiscore_idx];
@@ -1825,7 +1879,6 @@ static int addrconf_dad_end(struct inet6_ifaddr *ifp)
void addrconf_dad_failure(struct inet6_ifaddr *ifp)
{
- struct in6_addr addr;
struct inet6_dev *idev = ifp->idev;
struct net *net = dev_net(ifp->idev->dev);
@@ -1887,18 +1940,6 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
in6_ifa_put(ifp2);
lock_errdad:
spin_lock_bh(&ifp->lock);
- } else if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6) {
- addr.s6_addr32[0] = htonl(0xfe800000);
- addr.s6_addr32[1] = 0;
-
- if (!ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) &&
- ipv6_addr_equal(&ifp->addr, &addr)) {
- /* DAD failed for link-local based on MAC address */
- idev->cnf.disable_ipv6 = 1;
-
- pr_info("%s: IPv6 being disabled!\n",
- ifp->idev->dev->name);
- }
}
errdad:
@@ -1907,6 +1948,7 @@ errdad:
spin_unlock_bh(&ifp->lock);
addrconf_mod_dad_work(ifp, 0);
+ in6_ifa_put(ifp);
}
/* Join to solicited addr multicast group.
@@ -2255,7 +2297,7 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
return ERR_PTR(-EACCES);
/* Add default multicast route */
- if (!(dev->flags & IFF_LOOPBACK))
+ if (!(dev->flags & IFF_LOOPBACK) && !netif_is_l3_master(dev))
addrconf_add_mroute(dev);
return idev;
@@ -2334,12 +2376,109 @@ static bool is_addr_mode_generate_stable(struct inet6_dev *idev)
idev->addr_gen_mode == IN6_ADDR_GEN_MODE_RANDOM;
}
+int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
+ const struct prefix_info *pinfo,
+ struct inet6_dev *in6_dev,
+ const struct in6_addr *addr, int addr_type,
+ u32 addr_flags, bool sllao, bool tokenized,
+ __u32 valid_lft, u32 prefered_lft)
+{
+ struct inet6_ifaddr *ifp = ipv6_get_ifaddr(net, addr, dev, 1);
+ int create = 0, update_lft = 0;
+
+ if (!ifp && valid_lft) {
+ int max_addresses = in6_dev->cnf.max_addresses;
+
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+ if (in6_dev->cnf.optimistic_dad &&
+ !net->ipv6.devconf_all->forwarding && sllao)
+ addr_flags |= IFA_F_OPTIMISTIC;
+#endif
+
+ /* Do not allow to create too much of autoconfigured
+ * addresses; this would be too easy way to crash kernel.
+ */
+ if (!max_addresses ||
+ ipv6_count_addresses(in6_dev) < max_addresses)
+ ifp = ipv6_add_addr(in6_dev, addr, NULL,
+ pinfo->prefix_len,
+ addr_type&IPV6_ADDR_SCOPE_MASK,
+ addr_flags, valid_lft,
+ prefered_lft);
+
+ if (IS_ERR_OR_NULL(ifp))
+ return -1;
+
+ update_lft = 0;
+ create = 1;
+ spin_lock_bh(&ifp->lock);
+ ifp->flags |= IFA_F_MANAGETEMPADDR;
+ ifp->cstamp = jiffies;
+ ifp->tokenized = tokenized;
+ spin_unlock_bh(&ifp->lock);
+ addrconf_dad_start(ifp);
+ }
+
+ if (ifp) {
+ u32 flags;
+ unsigned long now;
+ u32 stored_lft;
+
+ /* update lifetime (RFC2462 5.5.3 e) */
+ spin_lock_bh(&ifp->lock);
+ now = jiffies;
+ if (ifp->valid_lft > (now - ifp->tstamp) / HZ)
+ stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ;
+ else
+ stored_lft = 0;
+ if (!update_lft && !create && stored_lft) {
+ const u32 minimum_lft = min_t(u32,
+ stored_lft, MIN_VALID_LIFETIME);
+ valid_lft = max(valid_lft, minimum_lft);
+
+ /* RFC4862 Section 5.5.3e:
+ * "Note that the preferred lifetime of the
+ * corresponding address is always reset to
+ * the Preferred Lifetime in the received
+ * Prefix Information option, regardless of
+ * whether the valid lifetime is also reset or
+ * ignored."
+ *
+ * So we should always update prefered_lft here.
+ */
+ update_lft = 1;
+ }
+
+ if (update_lft) {
+ ifp->valid_lft = valid_lft;
+ ifp->prefered_lft = prefered_lft;
+ ifp->tstamp = now;
+ flags = ifp->flags;
+ ifp->flags &= ~IFA_F_DEPRECATED;
+ spin_unlock_bh(&ifp->lock);
+
+ if (!(flags&IFA_F_TENTATIVE))
+ ipv6_ifa_notify(0, ifp);
+ } else
+ spin_unlock_bh(&ifp->lock);
+
+ manage_tempaddrs(in6_dev, ifp, valid_lft, prefered_lft,
+ create, now);
+
+ in6_ifa_put(ifp);
+ addrconf_verify();
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(addrconf_prefix_rcv_add_addr);
+
void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
{
struct prefix_info *pinfo;
__u32 valid_lft;
__u32 prefered_lft;
- int addr_type;
+ int addr_type, err;
u32 addr_flags = 0;
struct inet6_dev *in6_dev;
struct net *net = dev_net(dev);
@@ -2433,10 +2572,8 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
/* Try to figure out our local address for this prefix */
if (pinfo->autoconf && in6_dev->cnf.autoconf) {
- struct inet6_ifaddr *ifp;
struct in6_addr addr;
- int create = 0, update_lft = 0;
- bool tokenized = false;
+ bool tokenized = false, dev_addr_generated = false;
if (pinfo->prefix_len == 64) {
memcpy(&addr, &pinfo->prefix, 8);
@@ -2454,106 +2591,36 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
goto ok;
} else if (ipv6_generate_eui64(addr.s6_addr + 8, dev) &&
ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) {
- in6_dev_put(in6_dev);
- return;
+ goto put;
+ } else {
+ dev_addr_generated = true;
}
goto ok;
}
net_dbg_ratelimited("IPv6 addrconf: prefix with wrong length %d\n",
pinfo->prefix_len);
- in6_dev_put(in6_dev);
- return;
+ goto put;
ok:
+ err = addrconf_prefix_rcv_add_addr(net, dev, pinfo, in6_dev,
+ &addr, addr_type,
+ addr_flags, sllao,
+ tokenized, valid_lft,
+ prefered_lft);
+ if (err)
+ goto put;
- ifp = ipv6_get_ifaddr(net, &addr, dev, 1);
-
- if (!ifp && valid_lft) {
- int max_addresses = in6_dev->cnf.max_addresses;
-
-#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
- if (in6_dev->cnf.optimistic_dad &&
- !net->ipv6.devconf_all->forwarding && sllao)
- addr_flags |= IFA_F_OPTIMISTIC;
-#endif
-
- /* Do not allow to create too much of autoconfigured
- * addresses; this would be too easy way to crash kernel.
- */
- if (!max_addresses ||
- ipv6_count_addresses(in6_dev) < max_addresses)
- ifp = ipv6_add_addr(in6_dev, &addr, NULL,
- pinfo->prefix_len,
- addr_type&IPV6_ADDR_SCOPE_MASK,
- addr_flags, valid_lft,
- prefered_lft);
-
- if (IS_ERR_OR_NULL(ifp)) {
- in6_dev_put(in6_dev);
- return;
- }
-
- update_lft = 0;
- create = 1;
- spin_lock_bh(&ifp->lock);
- ifp->flags |= IFA_F_MANAGETEMPADDR;
- ifp->cstamp = jiffies;
- ifp->tokenized = tokenized;
- spin_unlock_bh(&ifp->lock);
- addrconf_dad_start(ifp);
- }
-
- if (ifp) {
- u32 flags;
- unsigned long now;
- u32 stored_lft;
-
- /* update lifetime (RFC2462 5.5.3 e) */
- spin_lock_bh(&ifp->lock);
- now = jiffies;
- if (ifp->valid_lft > (now - ifp->tstamp) / HZ)
- stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ;
- else
- stored_lft = 0;
- if (!update_lft && !create && stored_lft) {
- const u32 minimum_lft = min_t(u32,
- stored_lft, MIN_VALID_LIFETIME);
- valid_lft = max(valid_lft, minimum_lft);
-
- /* RFC4862 Section 5.5.3e:
- * "Note that the preferred lifetime of the
- * corresponding address is always reset to
- * the Preferred Lifetime in the received
- * Prefix Information option, regardless of
- * whether the valid lifetime is also reset or
- * ignored."
- *
- * So we should always update prefered_lft here.
- */
- update_lft = 1;
- }
-
- if (update_lft) {
- ifp->valid_lft = valid_lft;
- ifp->prefered_lft = prefered_lft;
- ifp->tstamp = now;
- flags = ifp->flags;
- ifp->flags &= ~IFA_F_DEPRECATED;
- spin_unlock_bh(&ifp->lock);
-
- if (!(flags&IFA_F_TENTATIVE))
- ipv6_ifa_notify(0, ifp);
- } else
- spin_unlock_bh(&ifp->lock);
-
- manage_tempaddrs(in6_dev, ifp, valid_lft, prefered_lft,
- create, now);
-
- in6_ifa_put(ifp);
- addrconf_verify();
- }
+ /* Ignore error case here because previous prefix add addr was
+ * successful which will be notified.
+ */
+ ndisc_ops_prefix_rcv_add_addr(net, dev, pinfo, in6_dev, &addr,
+ addr_type, addr_flags, sllao,
+ tokenized, valid_lft,
+ prefered_lft,
+ dev_addr_generated);
}
inet6_prefix_notify(RTM_NEWPREFIX, in6_dev, pinfo);
+put:
in6_dev_put(in6_dev);
}
@@ -2948,8 +3015,8 @@ static void init_loopback(struct net_device *dev)
}
}
-static void addrconf_add_linklocal(struct inet6_dev *idev,
- const struct in6_addr *addr, u32 flags)
+void addrconf_add_linklocal(struct inet6_dev *idev,
+ const struct in6_addr *addr, u32 flags)
{
struct inet6_ifaddr *ifp;
u32 addr_flags = flags | IFA_F_PERMANENT;
@@ -2968,6 +3035,7 @@ static void addrconf_add_linklocal(struct inet6_dev *idev,
in6_ifa_put(ifp);
}
}
+EXPORT_SYMBOL_GPL(addrconf_add_linklocal);
static bool ipv6_reserved_interfaceid(struct in6_addr address)
{
@@ -3470,7 +3538,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
/* combine the user config with event to determine if permanent
* addresses are to be removed from address hash table
*/
- keep_addr = !(how || _keep_addr <= 0);
+ keep_addr = !(how || _keep_addr <= 0 || idev->cnf.disable_ipv6);
/* Step 2: clear hash table */
for (i = 0; i < IN6_ADDR_HSIZE; i++) {
@@ -3526,7 +3594,7 @@ restart:
/* re-combine the user config with event to determine if permanent
* addresses are to be removed from the interface list
*/
- keep_addr = (!how && _keep_addr > 0);
+ keep_addr = (!how && _keep_addr > 0 && !idev->cnf.disable_ipv6);
INIT_LIST_HEAD(&del_list);
list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) {
@@ -3551,8 +3619,7 @@ restart:
state = ifa->state;
ifa->state = INET6_IFADDR_STATE_DEAD;
- list_del(&ifa->if_list);
- list_add(&ifa->if_list, &del_list);
+ list_move(&ifa->if_list, &del_list);
}
spin_unlock_bh(&ifa->lock);
@@ -3563,6 +3630,10 @@ restart:
if (state != INET6_IFADDR_STATE_DEAD) {
__ipv6_ifa_notify(RTM_DELADDR, ifa);
inet6addr_notifier_call_chain(NETDEV_DOWN, ifa);
+ } else {
+ if (idev->cnf.forwarding)
+ addrconf_leave_anycast(ifa);
+ addrconf_leave_solict(ifa->idev, &ifa->addr);
}
write_lock_bh(&idev->lock);
@@ -3745,6 +3816,7 @@ static void addrconf_dad_work(struct work_struct *w)
dad_work);
struct inet6_dev *idev = ifp->idev;
struct in6_addr mcaddr;
+ bool disable_ipv6 = false;
enum {
DAD_PROCESS,
@@ -3761,6 +3833,24 @@ static void addrconf_dad_work(struct work_struct *w)
} else if (ifp->state == INET6_IFADDR_STATE_ERRDAD) {
action = DAD_ABORT;
ifp->state = INET6_IFADDR_STATE_POSTDAD;
+
+ if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6 &&
+ !(ifp->flags & IFA_F_STABLE_PRIVACY)) {
+ struct in6_addr addr;
+
+ addr.s6_addr32[0] = htonl(0xfe800000);
+ addr.s6_addr32[1] = 0;
+
+ if (!ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) &&
+ ipv6_addr_equal(&ifp->addr, &addr)) {
+ /* DAD failed for link-local based on MAC */
+ idev->cnf.disable_ipv6 = 1;
+
+ pr_info("%s: IPv6 being disabled!\n",
+ ifp->idev->dev->name);
+ disable_ipv6 = true;
+ }
+ }
}
spin_unlock_bh(&ifp->lock);
@@ -3768,7 +3858,10 @@ static void addrconf_dad_work(struct work_struct *w)
addrconf_dad_begin(ifp);
goto out;
} else if (action == DAD_ABORT) {
+ in6_ifa_hold(ifp);
addrconf_dad_stop(ifp, 1);
+ if (disable_ipv6)
+ addrconf_ifdown(idev->dev, 0);
goto out;
}
@@ -4995,15 +5088,13 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)
{
struct inet6_ifaddr *ifp;
struct net_device *dev = idev->dev;
- bool update_rs = false;
+ bool clear_token, update_rs = false;
struct in6_addr ll_addr;
ASSERT_RTNL();
if (!token)
return -EINVAL;
- if (ipv6_addr_any(token))
- return -EINVAL;
if (dev->flags & (IFF_LOOPBACK | IFF_NOARP))
return -EINVAL;
if (!ipv6_accept_ra(idev))
@@ -5018,10 +5109,13 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)
write_unlock_bh(&idev->lock);
+ clear_token = ipv6_addr_any(token);
+ if (clear_token)
+ goto update_lft;
+
if (!idev->dead && (idev->if_flags & IF_READY) &&
!ipv6_get_lladdr(dev, &ll_addr, IFA_F_TENTATIVE |
IFA_F_OPTIMISTIC)) {
-
/* If we're not ready, then normal ifup will take care
* of this. Otherwise, we need to request our rs here.
*/
@@ -5029,6 +5123,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)
update_rs = true;
}
+update_lft:
write_lock_bh(&idev->lock);
if (update_rs) {
@@ -5618,376 +5713,373 @@ int addrconf_sysctl_ignore_routes_with_linkdown(struct ctl_table *ctl,
return ret;
}
-static struct addrconf_sysctl_table
-{
- struct ctl_table_header *sysctl_header;
- struct ctl_table addrconf_vars[DEVCONF_MAX+1];
-} addrconf_sysctl __read_mostly = {
- .sysctl_header = NULL,
- .addrconf_vars = {
- {
- .procname = "forwarding",
- .data = &ipv6_devconf.forwarding,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = addrconf_sysctl_forward,
- },
- {
- .procname = "hop_limit",
- .data = &ipv6_devconf.hop_limit,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = addrconf_sysctl_hop_limit,
- },
- {
- .procname = "mtu",
- .data = &ipv6_devconf.mtu6,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = addrconf_sysctl_mtu,
- },
- {
- .procname = "accept_ra",
- .data = &ipv6_devconf.accept_ra,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "accept_redirects",
- .data = &ipv6_devconf.accept_redirects,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "autoconf",
- .data = &ipv6_devconf.autoconf,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "dad_transmits",
- .data = &ipv6_devconf.dad_transmits,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "router_solicitations",
- .data = &ipv6_devconf.rtr_solicits,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "router_solicitation_interval",
- .data = &ipv6_devconf.rtr_solicit_interval,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "router_solicitation_delay",
- .data = &ipv6_devconf.rtr_solicit_delay,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "force_mld_version",
- .data = &ipv6_devconf.force_mld_version,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "mldv1_unsolicited_report_interval",
- .data =
- &ipv6_devconf.mldv1_unsolicited_report_interval,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_ms_jiffies,
- },
- {
- .procname = "mldv2_unsolicited_report_interval",
- .data =
- &ipv6_devconf.mldv2_unsolicited_report_interval,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_ms_jiffies,
- },
- {
- .procname = "use_tempaddr",
- .data = &ipv6_devconf.use_tempaddr,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "temp_valid_lft",
- .data = &ipv6_devconf.temp_valid_lft,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "temp_prefered_lft",
- .data = &ipv6_devconf.temp_prefered_lft,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "regen_max_retry",
- .data = &ipv6_devconf.regen_max_retry,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "max_desync_factor",
- .data = &ipv6_devconf.max_desync_factor,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "max_addresses",
- .data = &ipv6_devconf.max_addresses,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "accept_ra_defrtr",
- .data = &ipv6_devconf.accept_ra_defrtr,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "accept_ra_min_hop_limit",
- .data = &ipv6_devconf.accept_ra_min_hop_limit,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "accept_ra_pinfo",
- .data = &ipv6_devconf.accept_ra_pinfo,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
+static const struct ctl_table addrconf_sysctl[] = {
+ {
+ .procname = "forwarding",
+ .data = &ipv6_devconf.forwarding,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = addrconf_sysctl_forward,
+ },
+ {
+ .procname = "hop_limit",
+ .data = &ipv6_devconf.hop_limit,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = addrconf_sysctl_hop_limit,
+ },
+ {
+ .procname = "mtu",
+ .data = &ipv6_devconf.mtu6,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = addrconf_sysctl_mtu,
+ },
+ {
+ .procname = "accept_ra",
+ .data = &ipv6_devconf.accept_ra,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "accept_redirects",
+ .data = &ipv6_devconf.accept_redirects,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "autoconf",
+ .data = &ipv6_devconf.autoconf,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "dad_transmits",
+ .data = &ipv6_devconf.dad_transmits,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "router_solicitations",
+ .data = &ipv6_devconf.rtr_solicits,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "router_solicitation_interval",
+ .data = &ipv6_devconf.rtr_solicit_interval,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ {
+ .procname = "router_solicitation_delay",
+ .data = &ipv6_devconf.rtr_solicit_delay,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ {
+ .procname = "force_mld_version",
+ .data = &ipv6_devconf.force_mld_version,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "mldv1_unsolicited_report_interval",
+ .data =
+ &ipv6_devconf.mldv1_unsolicited_report_interval,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_ms_jiffies,
+ },
+ {
+ .procname = "mldv2_unsolicited_report_interval",
+ .data =
+ &ipv6_devconf.mldv2_unsolicited_report_interval,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_ms_jiffies,
+ },
+ {
+ .procname = "use_tempaddr",
+ .data = &ipv6_devconf.use_tempaddr,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "temp_valid_lft",
+ .data = &ipv6_devconf.temp_valid_lft,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "temp_prefered_lft",
+ .data = &ipv6_devconf.temp_prefered_lft,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "regen_max_retry",
+ .data = &ipv6_devconf.regen_max_retry,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "max_desync_factor",
+ .data = &ipv6_devconf.max_desync_factor,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "max_addresses",
+ .data = &ipv6_devconf.max_addresses,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "accept_ra_defrtr",
+ .data = &ipv6_devconf.accept_ra_defrtr,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "accept_ra_min_hop_limit",
+ .data = &ipv6_devconf.accept_ra_min_hop_limit,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "accept_ra_pinfo",
+ .data = &ipv6_devconf.accept_ra_pinfo,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
#ifdef CONFIG_IPV6_ROUTER_PREF
- {
- .procname = "accept_ra_rtr_pref",
- .data = &ipv6_devconf.accept_ra_rtr_pref,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "router_probe_interval",
- .data = &ipv6_devconf.rtr_probe_interval,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
+ {
+ .procname = "accept_ra_rtr_pref",
+ .data = &ipv6_devconf.accept_ra_rtr_pref,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "router_probe_interval",
+ .data = &ipv6_devconf.rtr_probe_interval,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
#ifdef CONFIG_IPV6_ROUTE_INFO
- {
- .procname = "accept_ra_rt_info_max_plen",
- .data = &ipv6_devconf.accept_ra_rt_info_max_plen,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
+ {
+ .procname = "accept_ra_rt_info_max_plen",
+ .data = &ipv6_devconf.accept_ra_rt_info_max_plen,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
#endif
#endif
- {
- .procname = "proxy_ndp",
- .data = &ipv6_devconf.proxy_ndp,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = addrconf_sysctl_proxy_ndp,
- },
- {
- .procname = "accept_source_route",
- .data = &ipv6_devconf.accept_source_route,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
+ {
+ .procname = "proxy_ndp",
+ .data = &ipv6_devconf.proxy_ndp,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = addrconf_sysctl_proxy_ndp,
+ },
+ {
+ .procname = "accept_source_route",
+ .data = &ipv6_devconf.accept_source_route,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
- {
- .procname = "optimistic_dad",
- .data = &ipv6_devconf.optimistic_dad,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
-
- },
- {
- .procname = "use_optimistic",
- .data = &ipv6_devconf.use_optimistic,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
-
- },
+ {
+ .procname = "optimistic_dad",
+ .data = &ipv6_devconf.optimistic_dad,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "use_optimistic",
+ .data = &ipv6_devconf.use_optimistic,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
#endif
#ifdef CONFIG_IPV6_MROUTE
- {
- .procname = "mc_forwarding",
- .data = &ipv6_devconf.mc_forwarding,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = proc_dointvec,
- },
+ {
+ .procname = "mc_forwarding",
+ .data = &ipv6_devconf.mc_forwarding,
+ .maxlen = sizeof(int),
+ .mode = 0444,
+ .proc_handler = proc_dointvec,
+ },
#endif
- {
- .procname = "disable_ipv6",
- .data = &ipv6_devconf.disable_ipv6,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = addrconf_sysctl_disable,
- },
- {
- .procname = "accept_dad",
- .data = &ipv6_devconf.accept_dad,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "force_tllao",
- .data = &ipv6_devconf.force_tllao,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
- .procname = "ndisc_notify",
- .data = &ipv6_devconf.ndisc_notify,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
- .procname = "suppress_frag_ndisc",
- .data = &ipv6_devconf.suppress_frag_ndisc,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
- .procname = "accept_ra_from_local",
- .data = &ipv6_devconf.accept_ra_from_local,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "accept_ra_mtu",
- .data = &ipv6_devconf.accept_ra_mtu,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "stable_secret",
- .data = &ipv6_devconf.stable_secret,
- .maxlen = IPV6_MAX_STRLEN,
- .mode = 0600,
- .proc_handler = addrconf_sysctl_stable_secret,
- },
- {
- .procname = "use_oif_addrs_only",
- .data = &ipv6_devconf.use_oif_addrs_only,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "ignore_routes_with_linkdown",
- .data = &ipv6_devconf.ignore_routes_with_linkdown,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = addrconf_sysctl_ignore_routes_with_linkdown,
- },
- {
- .procname = "drop_unicast_in_l2_multicast",
- .data = &ipv6_devconf.drop_unicast_in_l2_multicast,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "drop_unsolicited_na",
- .data = &ipv6_devconf.drop_unsolicited_na,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "keep_addr_on_down",
- .data = &ipv6_devconf.keep_addr_on_down,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
-
- },
- {
- /* sentinel */
- }
+ {
+ .procname = "disable_ipv6",
+ .data = &ipv6_devconf.disable_ipv6,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = addrconf_sysctl_disable,
+ },
+ {
+ .procname = "accept_dad",
+ .data = &ipv6_devconf.accept_dad,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "force_tllao",
+ .data = &ipv6_devconf.force_tllao,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "ndisc_notify",
+ .data = &ipv6_devconf.ndisc_notify,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "suppress_frag_ndisc",
+ .data = &ipv6_devconf.suppress_frag_ndisc,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "accept_ra_from_local",
+ .data = &ipv6_devconf.accept_ra_from_local,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "accept_ra_mtu",
+ .data = &ipv6_devconf.accept_ra_mtu,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "stable_secret",
+ .data = &ipv6_devconf.stable_secret,
+ .maxlen = IPV6_MAX_STRLEN,
+ .mode = 0600,
+ .proc_handler = addrconf_sysctl_stable_secret,
+ },
+ {
+ .procname = "use_oif_addrs_only",
+ .data = &ipv6_devconf.use_oif_addrs_only,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
},
+ {
+ .procname = "ignore_routes_with_linkdown",
+ .data = &ipv6_devconf.ignore_routes_with_linkdown,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = addrconf_sysctl_ignore_routes_with_linkdown,
+ },
+ {
+ .procname = "drop_unicast_in_l2_multicast",
+ .data = &ipv6_devconf.drop_unicast_in_l2_multicast,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "drop_unsolicited_na",
+ .data = &ipv6_devconf.drop_unsolicited_na,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "keep_addr_on_down",
+ .data = &ipv6_devconf.keep_addr_on_down,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+
+ },
+ {
+ /* sentinel */
+ }
};
static int __addrconf_sysctl_register(struct net *net, char *dev_name,
struct inet6_dev *idev, struct ipv6_devconf *p)
{
- int i;
- struct addrconf_sysctl_table *t;
+ int i, ifindex;
+ struct ctl_table *table;
char path[sizeof("net/ipv6/conf/") + IFNAMSIZ];
- t = kmemdup(&addrconf_sysctl, sizeof(*t), GFP_KERNEL);
- if (!t)
+ table = kmemdup(addrconf_sysctl, sizeof(addrconf_sysctl), GFP_KERNEL);
+ if (!table)
goto out;
- for (i = 0; t->addrconf_vars[i].data; i++) {
- t->addrconf_vars[i].data += (char *)p - (char *)&ipv6_devconf;
- t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */
- t->addrconf_vars[i].extra2 = net;
+ for (i = 0; table[i].data; i++) {
+ table[i].data += (char *)p - (char *)&ipv6_devconf;
+ table[i].extra1 = idev; /* embedded; no ref */
+ table[i].extra2 = net;
}
snprintf(path, sizeof(path), "net/ipv6/conf/%s", dev_name);
- t->sysctl_header = register_net_sysctl(net, path, t->addrconf_vars);
- if (!t->sysctl_header)
+ p->sysctl_header = register_net_sysctl(net, path, table);
+ if (!p->sysctl_header)
goto free;
- p->sysctl = t;
+ if (!strcmp(dev_name, "all"))
+ ifindex = NETCONFA_IFINDEX_ALL;
+ else if (!strcmp(dev_name, "default"))
+ ifindex = NETCONFA_IFINDEX_DEFAULT;
+ else
+ ifindex = idev->dev->ifindex;
+ inet6_netconf_notify_devconf(net, NETCONFA_ALL, ifindex, p);
return 0;
free:
- kfree(t);
+ kfree(table);
out:
return -ENOBUFS;
}
static void __addrconf_sysctl_unregister(struct ipv6_devconf *p)
{
- struct addrconf_sysctl_table *t;
+ struct ctl_table *table;
- if (!p->sysctl)
+ if (!p->sysctl_header)
return;
- t = p->sysctl;
- p->sysctl = NULL;
- unregister_net_sysctl_table(t->sysctl_header);
- kfree(t);
+ table = p->sysctl_header->ctl_table_arg;
+ unregister_net_sysctl_table(p->sysctl_header);
+ p->sysctl_header = NULL;
+ kfree(table);
}
static int addrconf_sysctl_register(struct inet6_dev *idev)
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index b11c37cfd67c..b454055ba625 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -60,10 +60,13 @@
#ifdef CONFIG_IPV6_TUNNEL
#include <net/ip6_tunnel.h>
#endif
+#include <net/calipso.h>
#include <asm/uaccess.h>
#include <linux/mroute6.h>
+#include "ip6_offload.h"
+
MODULE_AUTHOR("Cast of dozens");
MODULE_DESCRIPTION("IPv6 protocol stack for Linux");
MODULE_LICENSE("GPL");
@@ -90,6 +93,12 @@ MODULE_PARM_DESC(disable_ipv6, "Disable IPv6 on all interfaces");
module_param_named(autoconf, ipv6_defaults.autoconf, int, 0444);
MODULE_PARM_DESC(autoconf, "Enable IPv6 address autoconfiguration on all interfaces");
+bool ipv6_mod_enabled(void)
+{
+ return disable_ipv6_mod == 0;
+}
+EXPORT_SYMBOL_GPL(ipv6_mod_enabled);
+
static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
{
const int offset = sk->sk_prot->obj_size - sizeof(struct ipv6_pinfo);
@@ -561,6 +570,7 @@ const struct proto_ops inet6_dgram_ops = {
.recvmsg = inet_recvmsg, /* ok */
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
+ .set_peek_off = sk_set_peek_off,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_sock_common_setsockopt,
.compat_getsockopt = compat_sock_common_getsockopt,
@@ -958,6 +968,10 @@ static int __init inet6_init(void)
if (err)
goto udplitev6_fail;
+ err = udpv6_offload_init();
+ if (err)
+ goto udpv6_offload_fail;
+
err = tcpv6_init();
if (err)
goto tcpv6_fail;
@@ -970,6 +984,10 @@ static int __init inet6_init(void)
if (err)
goto pingv6_fail;
+ err = calipso_init();
+ if (err)
+ goto calipso_fail;
+
#ifdef CONFIG_SYSCTL
err = ipv6_sysctl_register();
if (err)
@@ -980,13 +998,17 @@ out:
#ifdef CONFIG_SYSCTL
sysctl_fail:
- pingv6_exit();
+ calipso_exit();
#endif
+calipso_fail:
+ pingv6_exit();
pingv6_fail:
ipv6_packet_cleanup();
ipv6_packet_fail:
tcpv6_exit();
tcpv6_fail:
+ udpv6_offload_exit();
+udpv6_offload_fail:
udplitev6_exit();
udplitev6_fail:
udpv6_exit();
diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c
new file mode 100644
index 000000000000..37ac9de713c6
--- /dev/null
+++ b/net/ipv6/calipso.c
@@ -0,0 +1,1475 @@
+/*
+ * CALIPSO - Common Architecture Label IPv6 Security Option
+ *
+ * This is an implementation of the CALIPSO protocol as specified in
+ * RFC 5570.
+ *
+ * Authors: Paul Moore <paul.moore@hp.com>
+ * Huw Davies <huw@codeweavers.com>
+ *
+ */
+
+/* (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008
+ * (c) Copyright Huw Davies <huw@codeweavers.com>, 2015
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/jhash.h>
+#include <linux/audit.h>
+#include <linux/slab.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/tcp.h>
+#include <net/netlabel.h>
+#include <net/calipso.h>
+#include <linux/atomic.h>
+#include <linux/bug.h>
+#include <asm/unaligned.h>
+#include <linux/crc-ccitt.h>
+
+/* Maximium size of the calipso option including
+ * the two-byte TLV header.
+ */
+#define CALIPSO_OPT_LEN_MAX (2 + 252)
+
+/* Size of the minimum calipso option including
+ * the two-byte TLV header.
+ */
+#define CALIPSO_HDR_LEN (2 + 8)
+
+/* Maximium size of the calipso option including
+ * the two-byte TLV header and upto 3 bytes of
+ * leading pad and 7 bytes of trailing pad.
+ */
+#define CALIPSO_OPT_LEN_MAX_WITH_PAD (3 + CALIPSO_OPT_LEN_MAX + 7)
+
+ /* Maximium size of u32 aligned buffer required to hold calipso
+ * option. Max of 3 initial pad bytes starting from buffer + 3.
+ * i.e. the worst case is when the previous tlv finishes on 4n + 3.
+ */
+#define CALIPSO_MAX_BUFFER (6 + CALIPSO_OPT_LEN_MAX)
+
+/* List of available DOI definitions */
+static DEFINE_SPINLOCK(calipso_doi_list_lock);
+static LIST_HEAD(calipso_doi_list);
+
+/* Label mapping cache */
+int calipso_cache_enabled = 1;
+int calipso_cache_bucketsize = 10;
+#define CALIPSO_CACHE_BUCKETBITS 7
+#define CALIPSO_CACHE_BUCKETS BIT(CALIPSO_CACHE_BUCKETBITS)
+#define CALIPSO_CACHE_REORDERLIMIT 10
+struct calipso_map_cache_bkt {
+ spinlock_t lock;
+ u32 size;
+ struct list_head list;
+};
+
+struct calipso_map_cache_entry {
+ u32 hash;
+ unsigned char *key;
+ size_t key_len;
+
+ struct netlbl_lsm_cache *lsm_data;
+
+ u32 activity;
+ struct list_head list;
+};
+
+static struct calipso_map_cache_bkt *calipso_cache;
+
+/* Label Mapping Cache Functions
+ */
+
+/**
+ * calipso_cache_entry_free - Frees a cache entry
+ * @entry: the entry to free
+ *
+ * Description:
+ * This function frees the memory associated with a cache entry including the
+ * LSM cache data if there are no longer any users, i.e. reference count == 0.
+ *
+ */
+static void calipso_cache_entry_free(struct calipso_map_cache_entry *entry)
+{
+ if (entry->lsm_data)
+ netlbl_secattr_cache_free(entry->lsm_data);
+ kfree(entry->key);
+ kfree(entry);
+}
+
+/**
+ * calipso_map_cache_hash - Hashing function for the CALIPSO cache
+ * @key: the hash key
+ * @key_len: the length of the key in bytes
+ *
+ * Description:
+ * The CALIPSO tag hashing function. Returns a 32-bit hash value.
+ *
+ */
+static u32 calipso_map_cache_hash(const unsigned char *key, u32 key_len)
+{
+ return jhash(key, key_len, 0);
+}
+
+/**
+ * calipso_cache_init - Initialize the CALIPSO cache
+ *
+ * Description:
+ * Initializes the CALIPSO label mapping cache, this function should be called
+ * before any of the other functions defined in this file. Returns zero on
+ * success, negative values on error.
+ *
+ */
+static int __init calipso_cache_init(void)
+{
+ u32 iter;
+
+ calipso_cache = kcalloc(CALIPSO_CACHE_BUCKETS,
+ sizeof(struct calipso_map_cache_bkt),
+ GFP_KERNEL);
+ if (!calipso_cache)
+ return -ENOMEM;
+
+ for (iter = 0; iter < CALIPSO_CACHE_BUCKETS; iter++) {
+ spin_lock_init(&calipso_cache[iter].lock);
+ calipso_cache[iter].size = 0;
+ INIT_LIST_HEAD(&calipso_cache[iter].list);
+ }
+
+ return 0;
+}
+
+/**
+ * calipso_cache_invalidate - Invalidates the current CALIPSO cache
+ *
+ * Description:
+ * Invalidates and frees any entries in the CALIPSO cache. Returns zero on
+ * success and negative values on failure.
+ *
+ */
+static void calipso_cache_invalidate(void)
+{
+ struct calipso_map_cache_entry *entry, *tmp_entry;
+ u32 iter;
+
+ for (iter = 0; iter < CALIPSO_CACHE_BUCKETS; iter++) {
+ spin_lock_bh(&calipso_cache[iter].lock);
+ list_for_each_entry_safe(entry,
+ tmp_entry,
+ &calipso_cache[iter].list, list) {
+ list_del(&entry->list);
+ calipso_cache_entry_free(entry);
+ }
+ calipso_cache[iter].size = 0;
+ spin_unlock_bh(&calipso_cache[iter].lock);
+ }
+}
+
+/**
+ * calipso_cache_check - Check the CALIPSO cache for a label mapping
+ * @key: the buffer to check
+ * @key_len: buffer length in bytes
+ * @secattr: the security attribute struct to use
+ *
+ * Description:
+ * This function checks the cache to see if a label mapping already exists for
+ * the given key. If there is a match then the cache is adjusted and the
+ * @secattr struct is populated with the correct LSM security attributes. The
+ * cache is adjusted in the following manner if the entry is not already the
+ * first in the cache bucket:
+ *
+ * 1. The cache entry's activity counter is incremented
+ * 2. The previous (higher ranking) entry's activity counter is decremented
+ * 3. If the difference between the two activity counters is geater than
+ * CALIPSO_CACHE_REORDERLIMIT the two entries are swapped
+ *
+ * Returns zero on success, -ENOENT for a cache miss, and other negative values
+ * on error.
+ *
+ */
+static int calipso_cache_check(const unsigned char *key,
+ u32 key_len,
+ struct netlbl_lsm_secattr *secattr)
+{
+ u32 bkt;
+ struct calipso_map_cache_entry *entry;
+ struct calipso_map_cache_entry *prev_entry = NULL;
+ u32 hash;
+
+ if (!calipso_cache_enabled)
+ return -ENOENT;
+
+ hash = calipso_map_cache_hash(key, key_len);
+ bkt = hash & (CALIPSO_CACHE_BUCKETS - 1);
+ spin_lock_bh(&calipso_cache[bkt].lock);
+ list_for_each_entry(entry, &calipso_cache[bkt].list, list) {
+ if (entry->hash == hash &&
+ entry->key_len == key_len &&
+ memcmp(entry->key, key, key_len) == 0) {
+ entry->activity += 1;
+ atomic_inc(&entry->lsm_data->refcount);
+ secattr->cache = entry->lsm_data;
+ secattr->flags |= NETLBL_SECATTR_CACHE;
+ secattr->type = NETLBL_NLTYPE_CALIPSO;
+ if (!prev_entry) {
+ spin_unlock_bh(&calipso_cache[bkt].lock);
+ return 0;
+ }
+
+ if (prev_entry->activity > 0)
+ prev_entry->activity -= 1;
+ if (entry->activity > prev_entry->activity &&
+ entry->activity - prev_entry->activity >
+ CALIPSO_CACHE_REORDERLIMIT) {
+ __list_del(entry->list.prev, entry->list.next);
+ __list_add(&entry->list,
+ prev_entry->list.prev,
+ &prev_entry->list);
+ }
+
+ spin_unlock_bh(&calipso_cache[bkt].lock);
+ return 0;
+ }
+ prev_entry = entry;
+ }
+ spin_unlock_bh(&calipso_cache[bkt].lock);
+
+ return -ENOENT;
+}
+
+/**
+ * calipso_cache_add - Add an entry to the CALIPSO cache
+ * @calipso_ptr: the CALIPSO option
+ * @secattr: the packet's security attributes
+ *
+ * Description:
+ * Add a new entry into the CALIPSO label mapping cache. Add the new entry to
+ * head of the cache bucket's list, if the cache bucket is out of room remove
+ * the last entry in the list first. It is important to note that there is
+ * currently no checking for duplicate keys. Returns zero on success,
+ * negative values on failure. The key stored starts at calipso_ptr + 2,
+ * i.e. the type and length bytes are not stored, this corresponds to
+ * calipso_ptr[1] bytes of data.
+ *
+ */
+static int calipso_cache_add(const unsigned char *calipso_ptr,
+ const struct netlbl_lsm_secattr *secattr)
+{
+ int ret_val = -EPERM;
+ u32 bkt;
+ struct calipso_map_cache_entry *entry = NULL;
+ struct calipso_map_cache_entry *old_entry = NULL;
+ u32 calipso_ptr_len;
+
+ if (!calipso_cache_enabled || calipso_cache_bucketsize <= 0)
+ return 0;
+
+ calipso_ptr_len = calipso_ptr[1];
+
+ entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
+ if (!entry)
+ return -ENOMEM;
+ entry->key = kmemdup(calipso_ptr + 2, calipso_ptr_len, GFP_ATOMIC);
+ if (!entry->key) {
+ ret_val = -ENOMEM;
+ goto cache_add_failure;
+ }
+ entry->key_len = calipso_ptr_len;
+ entry->hash = calipso_map_cache_hash(calipso_ptr, calipso_ptr_len);
+ atomic_inc(&secattr->cache->refcount);
+ entry->lsm_data = secattr->cache;
+
+ bkt = entry->hash & (CALIPSO_CACHE_BUCKETS - 1);
+ spin_lock_bh(&calipso_cache[bkt].lock);
+ if (calipso_cache[bkt].size < calipso_cache_bucketsize) {
+ list_add(&entry->list, &calipso_cache[bkt].list);
+ calipso_cache[bkt].size += 1;
+ } else {
+ old_entry = list_entry(calipso_cache[bkt].list.prev,
+ struct calipso_map_cache_entry, list);
+ list_del(&old_entry->list);
+ list_add(&entry->list, &calipso_cache[bkt].list);
+ calipso_cache_entry_free(old_entry);
+ }
+ spin_unlock_bh(&calipso_cache[bkt].lock);
+
+ return 0;
+
+cache_add_failure:
+ if (entry)
+ calipso_cache_entry_free(entry);
+ return ret_val;
+}
+
+/* DOI List Functions
+ */
+
+/**
+ * calipso_doi_search - Searches for a DOI definition
+ * @doi: the DOI to search for
+ *
+ * Description:
+ * Search the DOI definition list for a DOI definition with a DOI value that
+ * matches @doi. The caller is responsible for calling rcu_read_[un]lock().
+ * Returns a pointer to the DOI definition on success and NULL on failure.
+ */
+static struct calipso_doi *calipso_doi_search(u32 doi)
+{
+ struct calipso_doi *iter;
+
+ list_for_each_entry_rcu(iter, &calipso_doi_list, list)
+ if (iter->doi == doi && atomic_read(&iter->refcount))
+ return iter;
+ return NULL;
+}
+
+/**
+ * calipso_doi_add - Add a new DOI to the CALIPSO protocol engine
+ * @doi_def: the DOI structure
+ * @audit_info: NetLabel audit information
+ *
+ * Description:
+ * The caller defines a new DOI for use by the CALIPSO engine and calls this
+ * function to add it to the list of acceptable domains. The caller must
+ * ensure that the mapping table specified in @doi_def->map meets all of the
+ * requirements of the mapping type (see calipso.h for details). Returns
+ * zero on success and non-zero on failure.
+ *
+ */
+static int calipso_doi_add(struct calipso_doi *doi_def,
+ struct netlbl_audit *audit_info)
+{
+ int ret_val = -EINVAL;
+ u32 doi;
+ u32 doi_type;
+ struct audit_buffer *audit_buf;
+
+ doi = doi_def->doi;
+ doi_type = doi_def->type;
+
+ if (doi_def->doi == CALIPSO_DOI_UNKNOWN)
+ goto doi_add_return;
+
+ atomic_set(&doi_def->refcount, 1);
+
+ spin_lock(&calipso_doi_list_lock);
+ if (calipso_doi_search(doi_def->doi)) {
+ spin_unlock(&calipso_doi_list_lock);
+ ret_val = -EEXIST;
+ goto doi_add_return;
+ }
+ list_add_tail_rcu(&doi_def->list, &calipso_doi_list);
+ spin_unlock(&calipso_doi_list_lock);
+ ret_val = 0;
+
+doi_add_return:
+ audit_buf = netlbl_audit_start(AUDIT_MAC_CALIPSO_ADD, audit_info);
+ if (audit_buf) {
+ const char *type_str;
+
+ switch (doi_type) {
+ case CALIPSO_MAP_PASS:
+ type_str = "pass";
+ break;
+ default:
+ type_str = "(unknown)";
+ }
+ audit_log_format(audit_buf,
+ " calipso_doi=%u calipso_type=%s res=%u",
+ doi, type_str, ret_val == 0 ? 1 : 0);
+ audit_log_end(audit_buf);
+ }
+
+ return ret_val;
+}
+
+/**
+ * calipso_doi_free - Frees a DOI definition
+ * @doi_def: the DOI definition
+ *
+ * Description:
+ * This function frees all of the memory associated with a DOI definition.
+ *
+ */
+static void calipso_doi_free(struct calipso_doi *doi_def)
+{
+ kfree(doi_def);
+}
+
+/**
+ * calipso_doi_free_rcu - Frees a DOI definition via the RCU pointer
+ * @entry: the entry's RCU field
+ *
+ * Description:
+ * This function is designed to be used as a callback to the call_rcu()
+ * function so that the memory allocated to the DOI definition can be released
+ * safely.
+ *
+ */
+static void calipso_doi_free_rcu(struct rcu_head *entry)
+{
+ struct calipso_doi *doi_def;
+
+ doi_def = container_of(entry, struct calipso_doi, rcu);
+ calipso_doi_free(doi_def);
+}
+
+/**
+ * calipso_doi_remove - Remove an existing DOI from the CALIPSO protocol engine
+ * @doi: the DOI value
+ * @audit_secid: the LSM secid to use in the audit message
+ *
+ * Description:
+ * Removes a DOI definition from the CALIPSO engine. The NetLabel routines will
+ * be called to release their own LSM domain mappings as well as our own
+ * domain list. Returns zero on success and negative values on failure.
+ *
+ */
+static int calipso_doi_remove(u32 doi, struct netlbl_audit *audit_info)
+{
+ int ret_val;
+ struct calipso_doi *doi_def;
+ struct audit_buffer *audit_buf;
+
+ spin_lock(&calipso_doi_list_lock);
+ doi_def = calipso_doi_search(doi);
+ if (!doi_def) {
+ spin_unlock(&calipso_doi_list_lock);
+ ret_val = -ENOENT;
+ goto doi_remove_return;
+ }
+ if (!atomic_dec_and_test(&doi_def->refcount)) {
+ spin_unlock(&calipso_doi_list_lock);
+ ret_val = -EBUSY;
+ goto doi_remove_return;
+ }
+ list_del_rcu(&doi_def->list);
+ spin_unlock(&calipso_doi_list_lock);
+
+ call_rcu(&doi_def->rcu, calipso_doi_free_rcu);
+ ret_val = 0;
+
+doi_remove_return:
+ audit_buf = netlbl_audit_start(AUDIT_MAC_CALIPSO_DEL, audit_info);
+ if (audit_buf) {
+ audit_log_format(audit_buf,
+ " calipso_doi=%u res=%u",
+ doi, ret_val == 0 ? 1 : 0);
+ audit_log_end(audit_buf);
+ }
+
+ return ret_val;
+}
+
+/**
+ * calipso_doi_getdef - Returns a reference to a valid DOI definition
+ * @doi: the DOI value
+ *
+ * Description:
+ * Searches for a valid DOI definition and if one is found it is returned to
+ * the caller. Otherwise NULL is returned. The caller must ensure that
+ * calipso_doi_putdef() is called when the caller is done.
+ *
+ */
+static struct calipso_doi *calipso_doi_getdef(u32 doi)
+{
+ struct calipso_doi *doi_def;
+
+ rcu_read_lock();
+ doi_def = calipso_doi_search(doi);
+ if (!doi_def)
+ goto doi_getdef_return;
+ if (!atomic_inc_not_zero(&doi_def->refcount))
+ doi_def = NULL;
+
+doi_getdef_return:
+ rcu_read_unlock();
+ return doi_def;
+}
+
+/**
+ * calipso_doi_putdef - Releases a reference for the given DOI definition
+ * @doi_def: the DOI definition
+ *
+ * Description:
+ * Releases a DOI definition reference obtained from calipso_doi_getdef().
+ *
+ */
+static void calipso_doi_putdef(struct calipso_doi *doi_def)
+{
+ if (!doi_def)
+ return;
+
+ if (!atomic_dec_and_test(&doi_def->refcount))
+ return;
+ spin_lock(&calipso_doi_list_lock);
+ list_del_rcu(&doi_def->list);
+ spin_unlock(&calipso_doi_list_lock);
+
+ call_rcu(&doi_def->rcu, calipso_doi_free_rcu);
+}
+
+/**
+ * calipso_doi_walk - Iterate through the DOI definitions
+ * @skip_cnt: skip past this number of DOI definitions, updated
+ * @callback: callback for each DOI definition
+ * @cb_arg: argument for the callback function
+ *
+ * Description:
+ * Iterate over the DOI definition list, skipping the first @skip_cnt entries.
+ * For each entry call @callback, if @callback returns a negative value stop
+ * 'walking' through the list and return. Updates the value in @skip_cnt upon
+ * return. Returns zero on success, negative values on failure.
+ *
+ */
+static int calipso_doi_walk(u32 *skip_cnt,
+ int (*callback)(struct calipso_doi *doi_def,
+ void *arg),
+ void *cb_arg)
+{
+ int ret_val = -ENOENT;
+ u32 doi_cnt = 0;
+ struct calipso_doi *iter_doi;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(iter_doi, &calipso_doi_list, list)
+ if (atomic_read(&iter_doi->refcount) > 0) {
+ if (doi_cnt++ < *skip_cnt)
+ continue;
+ ret_val = callback(iter_doi, cb_arg);
+ if (ret_val < 0) {
+ doi_cnt--;
+ goto doi_walk_return;
+ }
+ }
+
+doi_walk_return:
+ rcu_read_unlock();
+ *skip_cnt = doi_cnt;
+ return ret_val;
+}
+
+/**
+ * calipso_validate - Validate a CALIPSO option
+ * @skb: the packet
+ * @option: the start of the option
+ *
+ * Description:
+ * This routine is called to validate a CALIPSO option.
+ * If the option is valid then %true is returned, otherwise
+ * %false is returned.
+ *
+ * The caller should have already checked that the length of the
+ * option (including the TLV header) is >= 10 and that the catmap
+ * length is consistent with the option length.
+ *
+ * We leave checks on the level and categories to the socket layer.
+ */
+bool calipso_validate(const struct sk_buff *skb, const unsigned char *option)
+{
+ struct calipso_doi *doi_def;
+ bool ret_val;
+ u16 crc, len = option[1] + 2;
+ static const u8 zero[2];
+
+ /* The original CRC runs over the option including the TLV header
+ * with the CRC-16 field (at offset 8) zeroed out. */
+ crc = crc_ccitt(0xffff, option, 8);
+ crc = crc_ccitt(crc, zero, sizeof(zero));
+ if (len > 10)
+ crc = crc_ccitt(crc, option + 10, len - 10);
+ crc = ~crc;
+ if (option[8] != (crc & 0xff) || option[9] != ((crc >> 8) & 0xff))
+ return false;
+
+ rcu_read_lock();
+ doi_def = calipso_doi_search(get_unaligned_be32(option + 2));
+ ret_val = !!doi_def;
+ rcu_read_unlock();
+
+ return ret_val;
+}
+
+/**
+ * calipso_map_cat_hton - Perform a category mapping from host to network
+ * @doi_def: the DOI definition
+ * @secattr: the security attributes
+ * @net_cat: the zero'd out category bitmap in network/CALIPSO format
+ * @net_cat_len: the length of the CALIPSO bitmap in bytes
+ *
+ * Description:
+ * Perform a label mapping to translate a local MLS category bitmap to the
+ * correct CALIPSO bitmap using the given DOI definition. Returns the minimum
+ * size in bytes of the network bitmap on success, negative values otherwise.
+ *
+ */
+static int calipso_map_cat_hton(const struct calipso_doi *doi_def,
+ const struct netlbl_lsm_secattr *secattr,
+ unsigned char *net_cat,
+ u32 net_cat_len)
+{
+ int spot = -1;
+ u32 net_spot_max = 0;
+ u32 net_clen_bits = net_cat_len * 8;
+
+ for (;;) {
+ spot = netlbl_catmap_walk(secattr->attr.mls.cat,
+ spot + 1);
+ if (spot < 0)
+ break;
+ if (spot >= net_clen_bits)
+ return -ENOSPC;
+ netlbl_bitmap_setbit(net_cat, spot, 1);
+
+ if (spot > net_spot_max)
+ net_spot_max = spot;
+ }
+
+ return (net_spot_max / 32 + 1) * 4;
+}
+
+/**
+ * calipso_map_cat_ntoh - Perform a category mapping from network to host
+ * @doi_def: the DOI definition
+ * @net_cat: the category bitmap in network/CALIPSO format
+ * @net_cat_len: the length of the CALIPSO bitmap in bytes
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Perform a label mapping to translate a CALIPSO bitmap to the correct local
+ * MLS category bitmap using the given DOI definition. Returns zero on
+ * success, negative values on failure.
+ *
+ */
+static int calipso_map_cat_ntoh(const struct calipso_doi *doi_def,
+ const unsigned char *net_cat,
+ u32 net_cat_len,
+ struct netlbl_lsm_secattr *secattr)
+{
+ int ret_val;
+ int spot = -1;
+ u32 net_clen_bits = net_cat_len * 8;
+
+ for (;;) {
+ spot = netlbl_bitmap_walk(net_cat,
+ net_clen_bits,
+ spot + 1,
+ 1);
+ if (spot < 0) {
+ if (spot == -2)
+ return -EFAULT;
+ return 0;
+ }
+
+ ret_val = netlbl_catmap_setbit(&secattr->attr.mls.cat,
+ spot,
+ GFP_ATOMIC);
+ if (ret_val != 0)
+ return ret_val;
+ }
+
+ return -EINVAL;
+}
+
+/**
+ * calipso_pad_write - Writes pad bytes in TLV format
+ * @buf: the buffer
+ * @offset: offset from start of buffer to write padding
+ * @count: number of pad bytes to write
+ *
+ * Description:
+ * Write @count bytes of TLV padding into @buffer starting at offset @offset.
+ * @count should be less than 8 - see RFC 4942.
+ *
+ */
+static int calipso_pad_write(unsigned char *buf, unsigned int offset,
+ unsigned int count)
+{
+ if (WARN_ON_ONCE(count >= 8))
+ return -EINVAL;
+
+ switch (count) {
+ case 0:
+ break;
+ case 1:
+ buf[offset] = IPV6_TLV_PAD1;
+ break;
+ default:
+ buf[offset] = IPV6_TLV_PADN;
+ buf[offset + 1] = count - 2;
+ if (count > 2)
+ memset(buf + offset + 2, 0, count - 2);
+ break;
+ }
+ return 0;
+}
+
+/**
+ * calipso_genopt - Generate a CALIPSO option
+ * @buf: the option buffer
+ * @start: offset from which to write
+ * @buf_len: the size of opt_buf
+ * @doi_def: the CALIPSO DOI to use
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Generate a CALIPSO option using the DOI definition and security attributes
+ * passed to the function. This also generates upto three bytes of leading
+ * padding that ensures that the option is 4n + 2 aligned. It returns the
+ * number of bytes written (including any initial padding).
+ */
+static int calipso_genopt(unsigned char *buf, u32 start, u32 buf_len,
+ const struct calipso_doi *doi_def,
+ const struct netlbl_lsm_secattr *secattr)
+{
+ int ret_val;
+ u32 len, pad;
+ u16 crc;
+ static const unsigned char padding[4] = {2, 1, 0, 3};
+ unsigned char *calipso;
+
+ /* CALIPSO has 4n + 2 alignment */
+ pad = padding[start & 3];
+ if (buf_len <= start + pad + CALIPSO_HDR_LEN)
+ return -ENOSPC;
+
+ if ((secattr->flags & NETLBL_SECATTR_MLS_LVL) == 0)
+ return -EPERM;
+
+ len = CALIPSO_HDR_LEN;
+
+ if (secattr->flags & NETLBL_SECATTR_MLS_CAT) {
+ ret_val = calipso_map_cat_hton(doi_def,
+ secattr,
+ buf + start + pad + len,
+ buf_len - start - pad - len);
+ if (ret_val < 0)
+ return ret_val;
+ len += ret_val;
+ }
+
+ calipso_pad_write(buf, start, pad);
+ calipso = buf + start + pad;
+
+ calipso[0] = IPV6_TLV_CALIPSO;
+ calipso[1] = len - 2;
+ *(__be32 *)(calipso + 2) = htonl(doi_def->doi);
+ calipso[6] = (len - CALIPSO_HDR_LEN) / 4;
+ calipso[7] = secattr->attr.mls.lvl,
+ crc = ~crc_ccitt(0xffff, calipso, len);
+ calipso[8] = crc & 0xff;
+ calipso[9] = (crc >> 8) & 0xff;
+ return pad + len;
+}
+
+/* Hop-by-hop hdr helper functions
+ */
+
+/**
+ * calipso_opt_update - Replaces socket's hop options with a new set
+ * @sk: the socket
+ * @hop: new hop options
+ *
+ * Description:
+ * Replaces @sk's hop options with @hop. @hop may be NULL to leave
+ * the socket with no hop options.
+ *
+ */
+static int calipso_opt_update(struct sock *sk, struct ipv6_opt_hdr *hop)
+{
+ struct ipv6_txoptions *old = txopt_get(inet6_sk(sk)), *txopts;
+
+ txopts = ipv6_renew_options_kern(sk, old, IPV6_HOPOPTS,
+ hop, hop ? ipv6_optlen(hop) : 0);
+ txopt_put(old);
+ if (IS_ERR(txopts))
+ return PTR_ERR(txopts);
+
+ txopts = ipv6_update_options(sk, txopts);
+ if (txopts) {
+ atomic_sub(txopts->tot_len, &sk->sk_omem_alloc);
+ txopt_put(txopts);
+ }
+
+ return 0;
+}
+
+/**
+ * calipso_tlv_len - Returns the length of the TLV
+ * @opt: the option header
+ * @offset: offset of the TLV within the header
+ *
+ * Description:
+ * Returns the length of the TLV option at offset @offset within
+ * the option header @opt. Checks that the entire TLV fits inside
+ * the option header, returns a negative value if this is not the case.
+ */
+static int calipso_tlv_len(struct ipv6_opt_hdr *opt, unsigned int offset)
+{
+ unsigned char *tlv = (unsigned char *)opt;
+ unsigned int opt_len = ipv6_optlen(opt), tlv_len;
+
+ if (offset < sizeof(*opt) || offset >= opt_len)
+ return -EINVAL;
+ if (tlv[offset] == IPV6_TLV_PAD1)
+ return 1;
+ if (offset + 1 >= opt_len)
+ return -EINVAL;
+ tlv_len = tlv[offset + 1] + 2;
+ if (offset + tlv_len > opt_len)
+ return -EINVAL;
+ return tlv_len;
+}
+
+/**
+ * calipso_opt_find - Finds the CALIPSO option in an IPv6 hop options header
+ * @hop: the hop options header
+ * @start: on return holds the offset of any leading padding
+ * @end: on return holds the offset of the first non-pad TLV after CALIPSO
+ *
+ * Description:
+ * Finds the space occupied by a CALIPSO option (including any leading and
+ * trailing padding).
+ *
+ * If a CALIPSO option exists set @start and @end to the
+ * offsets within @hop of the start of padding before the first
+ * CALIPSO option and the end of padding after the first CALIPSO
+ * option. In this case the function returns 0.
+ *
+ * In the absence of a CALIPSO option, @start and @end will be
+ * set to the start and end of any trailing padding in the header.
+ * This is useful when appending a new option, as the caller may want
+ * to overwrite some of this padding. In this case the function will
+ * return -ENOENT.
+ */
+static int calipso_opt_find(struct ipv6_opt_hdr *hop, unsigned int *start,
+ unsigned int *end)
+{
+ int ret_val = -ENOENT, tlv_len;
+ unsigned int opt_len, offset, offset_s = 0, offset_e = 0;
+ unsigned char *opt = (unsigned char *)hop;
+
+ opt_len = ipv6_optlen(hop);
+ offset = sizeof(*hop);
+
+ while (offset < opt_len) {
+ tlv_len = calipso_tlv_len(hop, offset);
+ if (tlv_len < 0)
+ return tlv_len;
+
+ switch (opt[offset]) {
+ case IPV6_TLV_PAD1:
+ case IPV6_TLV_PADN:
+ if (offset_e)
+ offset_e = offset;
+ break;
+ case IPV6_TLV_CALIPSO:
+ ret_val = 0;
+ offset_e = offset;
+ break;
+ default:
+ if (offset_e == 0)
+ offset_s = offset;
+ else
+ goto out;
+ }
+ offset += tlv_len;
+ }
+
+out:
+ if (offset_s)
+ *start = offset_s + calipso_tlv_len(hop, offset_s);
+ else
+ *start = sizeof(*hop);
+ if (offset_e)
+ *end = offset_e + calipso_tlv_len(hop, offset_e);
+ else
+ *end = opt_len;
+
+ return ret_val;
+}
+
+/**
+ * calipso_opt_insert - Inserts a CALIPSO option into an IPv6 hop opt hdr
+ * @hop: the original hop options header
+ * @doi_def: the CALIPSO DOI to use
+ * @secattr: the specific security attributes of the socket
+ *
+ * Description:
+ * Creates a new hop options header based on @hop with a
+ * CALIPSO option added to it. If @hop already contains a CALIPSO
+ * option this is overwritten, otherwise the new option is appended
+ * after any existing options. If @hop is NULL then the new header
+ * will contain just the CALIPSO option and any needed padding.
+ *
+ */
+static struct ipv6_opt_hdr *
+calipso_opt_insert(struct ipv6_opt_hdr *hop,
+ const struct calipso_doi *doi_def,
+ const struct netlbl_lsm_secattr *secattr)
+{
+ unsigned int start, end, buf_len, pad, hop_len;
+ struct ipv6_opt_hdr *new;
+ int ret_val;
+
+ if (hop) {
+ hop_len = ipv6_optlen(hop);
+ ret_val = calipso_opt_find(hop, &start, &end);
+ if (ret_val && ret_val != -ENOENT)
+ return ERR_PTR(ret_val);
+ } else {
+ hop_len = 0;
+ start = sizeof(*hop);
+ end = 0;
+ }
+
+ buf_len = hop_len + start - end + CALIPSO_OPT_LEN_MAX_WITH_PAD;
+ new = kzalloc(buf_len, GFP_ATOMIC);
+ if (!new)
+ return ERR_PTR(-ENOMEM);
+
+ if (start > sizeof(*hop))
+ memcpy(new, hop, start);
+ ret_val = calipso_genopt((unsigned char *)new, start, buf_len, doi_def,
+ secattr);
+ if (ret_val < 0) {
+ kfree(new);
+ return ERR_PTR(ret_val);
+ }
+
+ buf_len = start + ret_val;
+ /* At this point buf_len aligns to 4n, so (buf_len & 4) pads to 8n */
+ pad = ((buf_len & 4) + (end & 7)) & 7;
+ calipso_pad_write((unsigned char *)new, buf_len, pad);
+ buf_len += pad;
+
+ if (end != hop_len) {
+ memcpy((char *)new + buf_len, (char *)hop + end, hop_len - end);
+ buf_len += hop_len - end;
+ }
+ new->nexthdr = 0;
+ new->hdrlen = buf_len / 8 - 1;
+
+ return new;
+}
+
+/**
+ * calipso_opt_del - Removes the CALIPSO option from an option header
+ * @hop: the original header
+ * @new: the new header
+ *
+ * Description:
+ * Creates a new header based on @hop without any CALIPSO option. If @hop
+ * doesn't contain a CALIPSO option it returns -ENOENT. If @hop contains
+ * no other non-padding options, it returns zero with @new set to NULL.
+ * Otherwise it returns zero, creates a new header without the CALIPSO
+ * option (and removing as much padding as possible) and returns with
+ * @new set to that header.
+ *
+ */
+static int calipso_opt_del(struct ipv6_opt_hdr *hop,
+ struct ipv6_opt_hdr **new)
+{
+ int ret_val;
+ unsigned int start, end, delta, pad, hop_len;
+
+ ret_val = calipso_opt_find(hop, &start, &end);
+ if (ret_val)
+ return ret_val;
+
+ hop_len = ipv6_optlen(hop);
+ if (start == sizeof(*hop) && end == hop_len) {
+ /* There's no other option in the header so return NULL */
+ *new = NULL;
+ return 0;
+ }
+
+ delta = (end - start) & ~7;
+ *new = kzalloc(hop_len - delta, GFP_ATOMIC);
+ if (!*new)
+ return -ENOMEM;
+
+ memcpy(*new, hop, start);
+ (*new)->hdrlen -= delta / 8;
+ pad = (end - start) & 7;
+ calipso_pad_write((unsigned char *)*new, start, pad);
+ if (end != hop_len)
+ memcpy((char *)*new + start + pad, (char *)hop + end,
+ hop_len - end);
+
+ return 0;
+}
+
+/**
+ * calipso_opt_getattr - Get the security attributes from a memory block
+ * @calipso: the CALIPSO option
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Inspect @calipso and return the security attributes in @secattr.
+ * Returns zero on success and negative values on failure.
+ *
+ */
+static int calipso_opt_getattr(const unsigned char *calipso,
+ struct netlbl_lsm_secattr *secattr)
+{
+ int ret_val = -ENOMSG;
+ u32 doi, len = calipso[1], cat_len = calipso[6] * 4;
+ struct calipso_doi *doi_def;
+
+ if (cat_len + 8 > len)
+ return -EINVAL;
+
+ if (calipso_cache_check(calipso + 2, calipso[1], secattr) == 0)
+ return 0;
+
+ doi = get_unaligned_be32(calipso + 2);
+ rcu_read_lock();
+ doi_def = calipso_doi_search(doi);
+ if (!doi_def)
+ goto getattr_return;
+
+ secattr->attr.mls.lvl = calipso[7];
+ secattr->flags |= NETLBL_SECATTR_MLS_LVL;
+
+ if (cat_len) {
+ ret_val = calipso_map_cat_ntoh(doi_def,
+ calipso + 10,
+ cat_len,
+ secattr);
+ if (ret_val != 0) {
+ netlbl_catmap_free(secattr->attr.mls.cat);
+ goto getattr_return;
+ }
+
+ secattr->flags |= NETLBL_SECATTR_MLS_CAT;
+ }
+
+ secattr->type = NETLBL_NLTYPE_CALIPSO;
+
+getattr_return:
+ rcu_read_unlock();
+ return ret_val;
+}
+
+/* sock functions.
+ */
+
+/**
+ * calipso_sock_getattr - Get the security attributes from a sock
+ * @sk: the sock
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Query @sk to see if there is a CALIPSO option attached to the sock and if
+ * there is return the CALIPSO security attributes in @secattr. This function
+ * requires that @sk be locked, or privately held, but it does not do any
+ * locking itself. Returns zero on success and negative values on failure.
+ *
+ */
+static int calipso_sock_getattr(struct sock *sk,
+ struct netlbl_lsm_secattr *secattr)
+{
+ struct ipv6_opt_hdr *hop;
+ int opt_len, len, ret_val = -ENOMSG, offset;
+ unsigned char *opt;
+ struct ipv6_txoptions *txopts = txopt_get(inet6_sk(sk));
+
+ if (!txopts || !txopts->hopopt)
+ goto done;
+
+ hop = txopts->hopopt;
+ opt = (unsigned char *)hop;
+ opt_len = ipv6_optlen(hop);
+ offset = sizeof(*hop);
+ while (offset < opt_len) {
+ len = calipso_tlv_len(hop, offset);
+ if (len < 0) {
+ ret_val = len;
+ goto done;
+ }
+ switch (opt[offset]) {
+ case IPV6_TLV_CALIPSO:
+ if (len < CALIPSO_HDR_LEN)
+ ret_val = -EINVAL;
+ else
+ ret_val = calipso_opt_getattr(&opt[offset],
+ secattr);
+ goto done;
+ default:
+ offset += len;
+ break;
+ }
+ }
+done:
+ txopt_put(txopts);
+ return ret_val;
+}
+
+/**
+ * calipso_sock_setattr - Add a CALIPSO option to a socket
+ * @sk: the socket
+ * @doi_def: the CALIPSO DOI to use
+ * @secattr: the specific security attributes of the socket
+ *
+ * Description:
+ * Set the CALIPSO option on the given socket using the DOI definition and
+ * security attributes passed to the function. This function requires
+ * exclusive access to @sk, which means it either needs to be in the
+ * process of being created or locked. Returns zero on success and negative
+ * values on failure.
+ *
+ */
+static int calipso_sock_setattr(struct sock *sk,
+ const struct calipso_doi *doi_def,
+ const struct netlbl_lsm_secattr *secattr)
+{
+ int ret_val;
+ struct ipv6_opt_hdr *old, *new;
+ struct ipv6_txoptions *txopts = txopt_get(inet6_sk(sk));
+
+ old = NULL;
+ if (txopts)
+ old = txopts->hopopt;
+
+ new = calipso_opt_insert(old, doi_def, secattr);
+ txopt_put(txopts);
+ if (IS_ERR(new))
+ return PTR_ERR(new);
+
+ ret_val = calipso_opt_update(sk, new);
+
+ kfree(new);
+ return ret_val;
+}
+
+/**
+ * calipso_sock_delattr - Delete the CALIPSO option from a socket
+ * @sk: the socket
+ *
+ * Description:
+ * Removes the CALIPSO option from a socket, if present.
+ *
+ */
+static void calipso_sock_delattr(struct sock *sk)
+{
+ struct ipv6_opt_hdr *new_hop;
+ struct ipv6_txoptions *txopts = txopt_get(inet6_sk(sk));
+
+ if (!txopts || !txopts->hopopt)
+ goto done;
+
+ if (calipso_opt_del(txopts->hopopt, &new_hop))
+ goto done;
+
+ calipso_opt_update(sk, new_hop);
+ kfree(new_hop);
+
+done:
+ txopt_put(txopts);
+}
+
+/* request sock functions.
+ */
+
+/**
+ * calipso_req_setattr - Add a CALIPSO option to a connection request socket
+ * @req: the connection request socket
+ * @doi_def: the CALIPSO DOI to use
+ * @secattr: the specific security attributes of the socket
+ *
+ * Description:
+ * Set the CALIPSO option on the given socket using the DOI definition and
+ * security attributes passed to the function. Returns zero on success and
+ * negative values on failure.
+ *
+ */
+static int calipso_req_setattr(struct request_sock *req,
+ const struct calipso_doi *doi_def,
+ const struct netlbl_lsm_secattr *secattr)
+{
+ struct ipv6_txoptions *txopts;
+ struct inet_request_sock *req_inet = inet_rsk(req);
+ struct ipv6_opt_hdr *old, *new;
+ struct sock *sk = sk_to_full_sk(req_to_sk(req));
+
+ if (req_inet->ipv6_opt && req_inet->ipv6_opt->hopopt)
+ old = req_inet->ipv6_opt->hopopt;
+ else
+ old = NULL;
+
+ new = calipso_opt_insert(old, doi_def, secattr);
+ if (IS_ERR(new))
+ return PTR_ERR(new);
+
+ txopts = ipv6_renew_options_kern(sk, req_inet->ipv6_opt, IPV6_HOPOPTS,
+ new, new ? ipv6_optlen(new) : 0);
+
+ kfree(new);
+
+ if (IS_ERR(txopts))
+ return PTR_ERR(txopts);
+
+ txopts = xchg(&req_inet->ipv6_opt, txopts);
+ if (txopts) {
+ atomic_sub(txopts->tot_len, &sk->sk_omem_alloc);
+ txopt_put(txopts);
+ }
+
+ return 0;
+}
+
+/**
+ * calipso_req_delattr - Delete the CALIPSO option from a request socket
+ * @reg: the request socket
+ *
+ * Description:
+ * Removes the CALIPSO option from a request socket, if present.
+ *
+ */
+static void calipso_req_delattr(struct request_sock *req)
+{
+ struct inet_request_sock *req_inet = inet_rsk(req);
+ struct ipv6_opt_hdr *new;
+ struct ipv6_txoptions *txopts;
+ struct sock *sk = sk_to_full_sk(req_to_sk(req));
+
+ if (!req_inet->ipv6_opt || !req_inet->ipv6_opt->hopopt)
+ return;
+
+ if (calipso_opt_del(req_inet->ipv6_opt->hopopt, &new))
+ return; /* Nothing to do */
+
+ txopts = ipv6_renew_options_kern(sk, req_inet->ipv6_opt, IPV6_HOPOPTS,
+ new, new ? ipv6_optlen(new) : 0);
+
+ if (!IS_ERR(txopts)) {
+ txopts = xchg(&req_inet->ipv6_opt, txopts);
+ if (txopts) {
+ atomic_sub(txopts->tot_len, &sk->sk_omem_alloc);
+ txopt_put(txopts);
+ }
+ }
+ kfree(new);
+}
+
+/* skbuff functions.
+ */
+
+/**
+ * calipso_skbuff_optptr - Find the CALIPSO option in the packet
+ * @skb: the packet
+ *
+ * Description:
+ * Parse the packet's IP header looking for a CALIPSO option. Returns a pointer
+ * to the start of the CALIPSO option on success, NULL if one if not found.
+ *
+ */
+static unsigned char *calipso_skbuff_optptr(const struct sk_buff *skb)
+{
+ const struct ipv6hdr *ip6_hdr = ipv6_hdr(skb);
+ int offset;
+
+ if (ip6_hdr->nexthdr != NEXTHDR_HOP)
+ return NULL;
+
+ offset = ipv6_find_tlv(skb, sizeof(*ip6_hdr), IPV6_TLV_CALIPSO);
+ if (offset >= 0)
+ return (unsigned char *)ip6_hdr + offset;
+
+ return NULL;
+}
+
+/**
+ * calipso_skbuff_setattr - Set the CALIPSO option on a packet
+ * @skb: the packet
+ * @doi_def: the CALIPSO DOI to use
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Set the CALIPSO option on the given packet based on the security attributes.
+ * Returns a pointer to the IP header on success and NULL on failure.
+ *
+ */
+static int calipso_skbuff_setattr(struct sk_buff *skb,
+ const struct calipso_doi *doi_def,
+ const struct netlbl_lsm_secattr *secattr)
+{
+ int ret_val;
+ struct ipv6hdr *ip6_hdr;
+ struct ipv6_opt_hdr *hop;
+ unsigned char buf[CALIPSO_MAX_BUFFER];
+ int len_delta, new_end, pad;
+ unsigned int start, end;
+
+ ip6_hdr = ipv6_hdr(skb);
+ if (ip6_hdr->nexthdr == NEXTHDR_HOP) {
+ hop = (struct ipv6_opt_hdr *)(ip6_hdr + 1);
+ ret_val = calipso_opt_find(hop, &start, &end);
+ if (ret_val && ret_val != -ENOENT)
+ return ret_val;
+ } else {
+ start = 0;
+ end = 0;
+ }
+
+ memset(buf, 0, sizeof(buf));
+ ret_val = calipso_genopt(buf, start & 3, sizeof(buf), doi_def, secattr);
+ if (ret_val < 0)
+ return ret_val;
+
+ new_end = start + ret_val;
+ /* At this point new_end aligns to 4n, so (new_end & 4) pads to 8n */
+ pad = ((new_end & 4) + (end & 7)) & 7;
+ len_delta = new_end - (int)end + pad;
+ ret_val = skb_cow(skb, skb_headroom(skb) + len_delta);
+ if (ret_val < 0)
+ return ret_val;
+
+ if (len_delta) {
+ if (len_delta > 0)
+ skb_push(skb, len_delta);
+ else
+ skb_pull(skb, -len_delta);
+ memmove((char *)ip6_hdr - len_delta, ip6_hdr,
+ sizeof(*ip6_hdr) + start);
+ skb_reset_network_header(skb);
+ ip6_hdr = ipv6_hdr(skb);
+ }
+
+ hop = (struct ipv6_opt_hdr *)(ip6_hdr + 1);
+ if (start == 0) {
+ struct ipv6_opt_hdr *new_hop = (struct ipv6_opt_hdr *)buf;
+
+ new_hop->nexthdr = ip6_hdr->nexthdr;
+ new_hop->hdrlen = len_delta / 8 - 1;
+ ip6_hdr->nexthdr = NEXTHDR_HOP;
+ } else {
+ hop->hdrlen += len_delta / 8;
+ }
+ memcpy((char *)hop + start, buf + (start & 3), new_end - start);
+ calipso_pad_write((unsigned char *)hop, new_end, pad);
+
+ return 0;
+}
+
+/**
+ * calipso_skbuff_delattr - Delete any CALIPSO options from a packet
+ * @skb: the packet
+ *
+ * Description:
+ * Removes any and all CALIPSO options from the given packet. Returns zero on
+ * success, negative values on failure.
+ *
+ */
+static int calipso_skbuff_delattr(struct sk_buff *skb)
+{
+ int ret_val;
+ struct ipv6hdr *ip6_hdr;
+ struct ipv6_opt_hdr *old_hop;
+ u32 old_hop_len, start = 0, end = 0, delta, size, pad;
+
+ if (!calipso_skbuff_optptr(skb))
+ return 0;
+
+ /* since we are changing the packet we should make a copy */
+ ret_val = skb_cow(skb, skb_headroom(skb));
+ if (ret_val < 0)
+ return ret_val;
+
+ ip6_hdr = ipv6_hdr(skb);
+ old_hop = (struct ipv6_opt_hdr *)(ip6_hdr + 1);
+ old_hop_len = ipv6_optlen(old_hop);
+
+ ret_val = calipso_opt_find(old_hop, &start, &end);
+ if (ret_val)
+ return ret_val;
+
+ if (start == sizeof(*old_hop) && end == old_hop_len) {
+ /* There's no other option in the header so we delete
+ * the whole thing. */
+ delta = old_hop_len;
+ size = sizeof(*ip6_hdr);
+ ip6_hdr->nexthdr = old_hop->nexthdr;
+ } else {
+ delta = (end - start) & ~7;
+ if (delta)
+ old_hop->hdrlen -= delta / 8;
+ pad = (end - start) & 7;
+ size = sizeof(*ip6_hdr) + start + pad;
+ calipso_pad_write((unsigned char *)old_hop, start, pad);
+ }
+
+ if (delta) {
+ skb_pull(skb, delta);
+ memmove((char *)ip6_hdr + delta, ip6_hdr, size);
+ skb_reset_network_header(skb);
+ }
+
+ return 0;
+}
+
+static const struct netlbl_calipso_ops ops = {
+ .doi_add = calipso_doi_add,
+ .doi_free = calipso_doi_free,
+ .doi_remove = calipso_doi_remove,
+ .doi_getdef = calipso_doi_getdef,
+ .doi_putdef = calipso_doi_putdef,
+ .doi_walk = calipso_doi_walk,
+ .sock_getattr = calipso_sock_getattr,
+ .sock_setattr = calipso_sock_setattr,
+ .sock_delattr = calipso_sock_delattr,
+ .req_setattr = calipso_req_setattr,
+ .req_delattr = calipso_req_delattr,
+ .opt_getattr = calipso_opt_getattr,
+ .skbuff_optptr = calipso_skbuff_optptr,
+ .skbuff_setattr = calipso_skbuff_setattr,
+ .skbuff_delattr = calipso_skbuff_delattr,
+ .cache_invalidate = calipso_cache_invalidate,
+ .cache_add = calipso_cache_add
+};
+
+/**
+ * calipso_init - Initialize the CALIPSO module
+ *
+ * Description:
+ * Initialize the CALIPSO module and prepare it for use. Returns zero on
+ * success and negative values on failure.
+ *
+ */
+int __init calipso_init(void)
+{
+ int ret_val;
+
+ ret_val = calipso_cache_init();
+ if (!ret_val)
+ netlbl_calipso_ops_register(&ops);
+ return ret_val;
+}
+
+void calipso_exit(void)
+{
+ netlbl_calipso_ops_register(NULL);
+ calipso_cache_invalidate();
+ kfree(calipso_cache);
+}
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 9dd3882fe6bf..37874e2f30ed 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -450,9 +450,10 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
copied = len;
}
err = skb_copy_datagram_msg(skb, 0, msg, copied);
- if (err)
- goto out_free_skb;
-
+ if (unlikely(err)) {
+ kfree_skb(skb);
+ return err;
+ }
sock_recv_timestamp(msg, sk, skb);
serr = SKB_EXT_ERR(skb);
@@ -509,8 +510,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
msg->msg_flags |= MSG_ERRQUEUE;
err = copied;
-out_free_skb:
- kfree_skb(skb);
+ consume_skb(skb);
out:
return err;
}
@@ -727,13 +727,13 @@ EXPORT_SYMBOL_GPL(ip6_datagram_recv_ctl);
int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
struct msghdr *msg, struct flowi6 *fl6,
- struct ipv6_txoptions *opt,
- int *hlimit, int *tclass, int *dontfrag)
+ struct ipcm6_cookie *ipc6, struct sockcm_cookie *sockc)
{
struct in6_pktinfo *src_info;
struct cmsghdr *cmsg;
struct ipv6_rt_hdr *rthdr;
struct ipv6_opt_hdr *hdr;
+ struct ipv6_txoptions *opt = ipc6->opt;
int len;
int err = 0;
@@ -745,6 +745,13 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
goto exit_f;
}
+ if (cmsg->cmsg_level == SOL_SOCKET) {
+ err = __sock_cmsg_send(sk, msg, cmsg, sockc);
+ if (err)
+ return err;
+ continue;
+ }
+
if (cmsg->cmsg_level != SOL_IPV6)
continue;
@@ -946,8 +953,8 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
goto exit_f;
}
- *hlimit = *(int *)CMSG_DATA(cmsg);
- if (*hlimit < -1 || *hlimit > 0xff) {
+ ipc6->hlimit = *(int *)CMSG_DATA(cmsg);
+ if (ipc6->hlimit < -1 || ipc6->hlimit > 0xff) {
err = -EINVAL;
goto exit_f;
}
@@ -967,7 +974,7 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
goto exit_f;
err = 0;
- *tclass = tc;
+ ipc6->tclass = tc;
break;
}
@@ -985,7 +992,7 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
goto exit_f;
err = 0;
- *dontfrag = df;
+ ipc6->dontfrag = df;
break;
}
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index ea7c4d64a00a..139ceb68bd37 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -43,6 +43,7 @@
#include <net/ndisc.h>
#include <net/ip6_route.h>
#include <net/addrconf.h>
+#include <net/calipso.h>
#if IS_ENABLED(CONFIG_IPV6_MIP6)
#include <net/xfrm.h>
#endif
@@ -258,8 +259,8 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
!pskb_may_pull(skb, (skb_transport_offset(skb) +
((skb_transport_header(skb)[1] + 1) << 3)))) {
- IP6_INC_STATS_BH(dev_net(dst->dev), ip6_dst_idev(dst),
- IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
+ IPSTATS_MIB_INHDRERRORS);
kfree_skb(skb);
return -1;
}
@@ -280,8 +281,8 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
return 1;
}
- IP6_INC_STATS_BH(dev_net(dst->dev),
- ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(dev_net(dst->dev),
+ ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
return -1;
}
@@ -309,8 +310,8 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
!pskb_may_pull(skb, (skb_transport_offset(skb) +
((skb_transport_header(skb)[1] + 1) << 3)))) {
- IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_INHDRERRORS);
kfree_skb(skb);
return -1;
}
@@ -319,8 +320,8 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) ||
skb->pkt_type != PACKET_HOST) {
- IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_INADDRERRORS);
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_INADDRERRORS);
kfree_skb(skb);
return -1;
}
@@ -334,8 +335,8 @@ looped_back:
* processed by own
*/
if (!addr) {
- IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_INADDRERRORS);
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_INADDRERRORS);
kfree_skb(skb);
return -1;
}
@@ -360,8 +361,8 @@ looped_back:
goto unknown_rh;
/* Silently discard invalid RTH type 2 */
if (hdr->hdrlen != 2 || hdr->segments_left != 1) {
- IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_INHDRERRORS);
kfree_skb(skb);
return -1;
}
@@ -379,8 +380,8 @@ looped_back:
n = hdr->hdrlen >> 1;
if (hdr->segments_left > n) {
- IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
((&hdr->segments_left) -
skb_network_header(skb)));
@@ -393,8 +394,8 @@ looped_back:
if (skb_cloned(skb)) {
/* the copy is a forwarded packet */
if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
- IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_OUTDISCARDS);
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_OUTDISCARDS);
kfree_skb(skb);
return -1;
}
@@ -416,14 +417,14 @@ looped_back:
if (xfrm6_input_addr(skb, (xfrm_address_t *)addr,
(xfrm_address_t *)&ipv6_hdr(skb)->saddr,
IPPROTO_ROUTING) < 0) {
- IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_INADDRERRORS);
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_INADDRERRORS);
kfree_skb(skb);
return -1;
}
if (!ipv6_chk_home_addr(dev_net(skb_dst(skb)->dev), addr)) {
- IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_INADDRERRORS);
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_INADDRERRORS);
kfree_skb(skb);
return -1;
}
@@ -434,8 +435,8 @@ looped_back:
}
if (ipv6_addr_is_multicast(addr)) {
- IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_INADDRERRORS);
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_INADDRERRORS);
kfree_skb(skb);
return -1;
}
@@ -454,8 +455,8 @@ looped_back:
if (skb_dst(skb)->dev->flags&IFF_LOOPBACK) {
if (ipv6_hdr(skb)->hop_limit <= 1) {
- IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_INHDRERRORS);
icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
0);
kfree_skb(skb);
@@ -470,7 +471,7 @@ looped_back:
return -1;
unknown_rh:
- IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
(&hdr->type) - skb_network_header(skb));
return -1;
@@ -568,28 +569,28 @@ static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
if (nh[optoff + 1] != 4 || (optoff & 3) != 2) {
net_dbg_ratelimited("ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n",
nh[optoff+1]);
- IP6_INC_STATS_BH(net, ipv6_skb_idev(skb),
- IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, ipv6_skb_idev(skb),
+ IPSTATS_MIB_INHDRERRORS);
goto drop;
}
pkt_len = ntohl(*(__be32 *)(nh + optoff + 2));
if (pkt_len <= IPV6_MAXPLEN) {
- IP6_INC_STATS_BH(net, ipv6_skb_idev(skb),
- IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, ipv6_skb_idev(skb),
+ IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2);
return false;
}
if (ipv6_hdr(skb)->payload_len) {
- IP6_INC_STATS_BH(net, ipv6_skb_idev(skb),
- IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, ipv6_skb_idev(skb),
+ IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff);
return false;
}
if (pkt_len > skb->len - sizeof(struct ipv6hdr)) {
- IP6_INC_STATS_BH(net, ipv6_skb_idev(skb),
- IPSTATS_MIB_INTRUNCATEDPKTS);
+ __IP6_INC_STATS(net, ipv6_skb_idev(skb),
+ IPSTATS_MIB_INTRUNCATEDPKTS);
goto drop;
}
@@ -603,6 +604,28 @@ drop:
return false;
}
+/* CALIPSO RFC 5570 */
+
+static bool ipv6_hop_calipso(struct sk_buff *skb, int optoff)
+{
+ const unsigned char *nh = skb_network_header(skb);
+
+ if (nh[optoff + 1] < 8)
+ goto drop;
+
+ if (nh[optoff + 6] * 4 + 8 > nh[optoff + 1])
+ goto drop;
+
+ if (!calipso_validate(skb, nh + optoff))
+ goto drop;
+
+ return true;
+
+drop:
+ kfree_skb(skb);
+ return false;
+}
+
static const struct tlvtype_proc tlvprochopopt_lst[] = {
{
.type = IPV6_TLV_ROUTERALERT,
@@ -612,6 +635,10 @@ static const struct tlvtype_proc tlvprochopopt_lst[] = {
.type = IPV6_TLV_JUMBO,
.func = ipv6_hop_jumbo,
},
+ {
+ .type = IPV6_TLV_CALIPSO,
+ .func = ipv6_hop_calipso,
+ },
{ -1, }
};
@@ -758,6 +785,27 @@ static int ipv6_renew_option(void *ohdr,
return 0;
}
+/**
+ * ipv6_renew_options - replace a specific ext hdr with a new one.
+ *
+ * @sk: sock from which to allocate memory
+ * @opt: original options
+ * @newtype: option type to replace in @opt
+ * @newopt: new option of type @newtype to replace (user-mem)
+ * @newoptlen: length of @newopt
+ *
+ * Returns a new set of options which is a copy of @opt with the
+ * option type @newtype replaced with @newopt.
+ *
+ * @opt may be NULL, in which case a new set of options is returned
+ * containing just @newopt.
+ *
+ * @newopt may be NULL, in which case the specified option type is
+ * not copied into the new set of options.
+ *
+ * The new set of options is allocated from the socket option memory
+ * buffer of @sk.
+ */
struct ipv6_txoptions *
ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
int newtype,
@@ -830,6 +878,34 @@ out:
return ERR_PTR(err);
}
+/**
+ * ipv6_renew_options_kern - replace a specific ext hdr with a new one.
+ *
+ * @sk: sock from which to allocate memory
+ * @opt: original options
+ * @newtype: option type to replace in @opt
+ * @newopt: new option of type @newtype to replace (kernel-mem)
+ * @newoptlen: length of @newopt
+ *
+ * See ipv6_renew_options(). The difference is that @newopt is
+ * kernel memory, rather than user memory.
+ */
+struct ipv6_txoptions *
+ipv6_renew_options_kern(struct sock *sk, struct ipv6_txoptions *opt,
+ int newtype, struct ipv6_opt_hdr *newopt,
+ int newoptlen)
+{
+ struct ipv6_txoptions *ret_val;
+ const mm_segment_t old_fs = get_fs();
+
+ set_fs(KERNEL_DS);
+ ret_val = ipv6_renew_options(sk, opt, newtype,
+ (struct ipv6_opt_hdr __user *)newopt,
+ newoptlen);
+ set_fs(old_fs);
+ return ret_val;
+}
+
struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space,
struct ipv6_txoptions *opt)
{
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 9508a20fbf61..305e2ed730bf 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -112,7 +112,7 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp,
}
EXPORT_SYMBOL(ipv6_skip_exthdr);
-int ipv6_find_tlv(struct sk_buff *skb, int offset, int type)
+int ipv6_find_tlv(const struct sk_buff *skb, int offset, int type)
{
const unsigned char *nh = skb_network_header(skb);
int packet_len = skb_tail_pointer(skb) - skb_network_header(skb);
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index ed33abf57abd..5857c1fc8b67 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -67,6 +67,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
struct net *net = rule->fr_net;
pol_lookup_t lookup = arg->lookup_ptr;
int err = 0;
+ u32 tb_id;
switch (rule->action) {
case FR_ACT_TO_TBL:
@@ -86,7 +87,8 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
goto discard_pkt;
}
- table = fib6_get_table(net, rule->table);
+ tb_id = fib_rule_get_table(rule, arg);
+ table = fib6_get_table(net, tb_id);
if (!table) {
err = -EAGAIN;
goto out;
@@ -199,7 +201,7 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
struct net *net = sock_net(skb->sk);
struct fib6_rule *rule6 = (struct fib6_rule *) rule;
- if (rule->action == FR_ACT_TO_TBL) {
+ if (rule->action == FR_ACT_TO_TBL && !rule->l3mdev) {
if (rule->table == RT6_TABLE_UNSPEC)
goto errout;
diff --git a/net/ipv6/fou6.c b/net/ipv6/fou6.c
new file mode 100644
index 000000000000..9ea249b9451e
--- /dev/null
+++ b/net/ipv6/fou6.c
@@ -0,0 +1,140 @@
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/socket.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <net/fou.h>
+#include <net/ip.h>
+#include <net/ip6_tunnel.h>
+#include <net/ip6_checksum.h>
+#include <net/protocol.h>
+#include <net/udp.h>
+#include <net/udp_tunnel.h>
+
+static void fou6_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e,
+ struct flowi6 *fl6, u8 *protocol, __be16 sport)
+{
+ struct udphdr *uh;
+
+ skb_push(skb, sizeof(struct udphdr));
+ skb_reset_transport_header(skb);
+
+ uh = udp_hdr(skb);
+
+ uh->dest = e->dport;
+ uh->source = sport;
+ uh->len = htons(skb->len);
+ udp6_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM6), skb,
+ &fl6->saddr, &fl6->daddr, skb->len);
+
+ *protocol = IPPROTO_UDP;
+}
+
+int fou6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
+ u8 *protocol, struct flowi6 *fl6)
+{
+ __be16 sport;
+ int err;
+ int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM6 ?
+ SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
+
+ err = __fou_build_header(skb, e, protocol, &sport, type);
+ if (err)
+ return err;
+
+ fou6_build_udp(skb, e, fl6, protocol, sport);
+
+ return 0;
+}
+EXPORT_SYMBOL(fou6_build_header);
+
+int gue6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
+ u8 *protocol, struct flowi6 *fl6)
+{
+ __be16 sport;
+ int err;
+ int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM6 ?
+ SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
+
+ err = __gue_build_header(skb, e, protocol, &sport, type);
+ if (err)
+ return err;
+
+ fou6_build_udp(skb, e, fl6, protocol, sport);
+
+ return 0;
+}
+EXPORT_SYMBOL(gue6_build_header);
+
+#if IS_ENABLED(CONFIG_IPV6_FOU_TUNNEL)
+
+static const struct ip6_tnl_encap_ops fou_ip6tun_ops = {
+ .encap_hlen = fou_encap_hlen,
+ .build_header = fou6_build_header,
+};
+
+static const struct ip6_tnl_encap_ops gue_ip6tun_ops = {
+ .encap_hlen = gue_encap_hlen,
+ .build_header = gue6_build_header,
+};
+
+static int ip6_tnl_encap_add_fou_ops(void)
+{
+ int ret;
+
+ ret = ip6_tnl_encap_add_ops(&fou_ip6tun_ops, TUNNEL_ENCAP_FOU);
+ if (ret < 0) {
+ pr_err("can't add fou6 ops\n");
+ return ret;
+ }
+
+ ret = ip6_tnl_encap_add_ops(&gue_ip6tun_ops, TUNNEL_ENCAP_GUE);
+ if (ret < 0) {
+ pr_err("can't add gue6 ops\n");
+ ip6_tnl_encap_del_ops(&fou_ip6tun_ops, TUNNEL_ENCAP_FOU);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void ip6_tnl_encap_del_fou_ops(void)
+{
+ ip6_tnl_encap_del_ops(&fou_ip6tun_ops, TUNNEL_ENCAP_FOU);
+ ip6_tnl_encap_del_ops(&gue_ip6tun_ops, TUNNEL_ENCAP_GUE);
+}
+
+#else
+
+static int ip6_tnl_encap_add_fou_ops(void)
+{
+ return 0;
+}
+
+static void ip6_tnl_encap_del_fou_ops(void)
+{
+}
+
+#endif
+
+static int __init fou6_init(void)
+{
+ int ret;
+
+ ret = ip6_tnl_encap_add_fou_ops();
+
+ return ret;
+}
+
+static void __exit fou6_fini(void)
+{
+ ip6_tnl_encap_del_fou_ops();
+}
+
+module_init(fou6_init);
+module_exit(fou6_fini);
+MODULE_AUTHOR("Tom Herbert <therbert@google.com>");
+MODULE_LICENSE("GPL");
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 0013cacf7164..bd59c343d35f 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -98,7 +98,7 @@ static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (!(type & ICMPV6_INFOMSG_MASK))
if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
- ping_err(skb, offset, info);
+ ping_err(skb, offset, ntohl(info));
}
static int icmpv6_rcv(struct sk_buff *skb);
@@ -388,7 +388,8 @@ relookup_failed:
/*
* Send an ICMP message in response to a packet in error
*/
-static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
+static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
+ const struct in6_addr *force_saddr)
{
struct net *net = dev_net(skb->dev);
struct inet6_dev *idev = NULL;
@@ -400,10 +401,11 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
struct icmp6hdr tmp_hdr;
struct flowi6 fl6;
struct icmpv6_msg msg;
+ struct sockcm_cookie sockc_unused = {0};
+ struct ipcm6_cookie ipc6;
int iif = 0;
int addr_type = 0;
int len;
- int hlimit;
int err = 0;
u32 mark = IP6_REPLY_MARK(net, skb->mark);
@@ -474,6 +476,8 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_proto = IPPROTO_ICMPV6;
fl6.daddr = hdr->saddr;
+ if (force_saddr)
+ saddr = force_saddr;
if (saddr)
fl6.saddr = *saddr;
fl6.flowi6_mark = mark;
@@ -501,11 +505,16 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
else if (!fl6.flowi6_oif)
fl6.flowi6_oif = np->ucast_oif;
+ ipc6.tclass = np->tclass;
+ fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
+
dst = icmpv6_route_lookup(net, skb, sk, &fl6);
if (IS_ERR(dst))
goto out;
- hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
+ ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
+ ipc6.dontfrag = np->dontfrag;
+ ipc6.opt = NULL;
msg.skb = skb;
msg.offset = skb_network_offset(skb);
@@ -524,9 +533,9 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
err = ip6_append_data(sk, icmpv6_getfrag, &msg,
len + sizeof(struct icmp6hdr),
- sizeof(struct icmp6hdr), hlimit,
- np->tclass, NULL, &fl6, (struct rt6_info *)dst,
- MSG_DONTWAIT, np->dontfrag);
+ sizeof(struct icmp6hdr),
+ &ipc6, &fl6, (struct rt6_info *)dst,
+ MSG_DONTWAIT, &sockc_unused);
if (err) {
ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
ip6_flush_pending_frames(sk);
@@ -545,10 +554,75 @@ out:
*/
void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
{
- icmp6_send(skb, ICMPV6_PARAMPROB, code, pos);
+ icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL);
kfree_skb(skb);
}
+/* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
+ * if sufficient data bytes are available
+ * @nhs is the size of the tunnel header(s) :
+ * Either an IPv4 header for SIT encap
+ * an IPv4 header + GRE header for GRE encap
+ */
+int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
+ unsigned int data_len)
+{
+ struct in6_addr temp_saddr;
+ struct rt6_info *rt;
+ struct sk_buff *skb2;
+ u32 info = 0;
+
+ if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
+ return 1;
+
+ /* RFC 4884 (partial) support for ICMP extensions */
+ if (data_len < 128 || (data_len & 7) || skb->len < data_len)
+ data_len = 0;
+
+ skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
+
+ if (!skb2)
+ return 1;
+
+ skb_dst_drop(skb2);
+ skb_pull(skb2, nhs);
+ skb_reset_network_header(skb2);
+
+ rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0);
+
+ if (rt && rt->dst.dev)
+ skb2->dev = rt->dst.dev;
+
+ ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
+
+ if (data_len) {
+ /* RFC 4884 (partial) support :
+ * insert 0 padding at the end, before the extensions
+ */
+ __skb_push(skb2, nhs);
+ skb_reset_network_header(skb2);
+ memmove(skb2->data, skb2->data + nhs, data_len - nhs);
+ memset(skb2->data + data_len - nhs, 0, nhs);
+ /* RFC 4884 4.5 : Length is measured in 64-bit words,
+ * and stored in reserved[0]
+ */
+ info = (data_len/8) << 24;
+ }
+ if (type == ICMP_TIME_EXCEEDED)
+ icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
+ info, &temp_saddr);
+ else
+ icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
+ info, &temp_saddr);
+ if (rt)
+ ip6_rt_put(rt);
+
+ kfree_skb(skb2);
+
+ return 0;
+}
+EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
+
static void icmpv6_echo_reply(struct sk_buff *skb)
{
struct net *net = dev_net(skb->dev);
@@ -561,10 +635,10 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
struct flowi6 fl6;
struct icmpv6_msg msg;
struct dst_entry *dst;
+ struct ipcm6_cookie ipc6;
int err = 0;
- int hlimit;
- u8 tclass;
u32 mark = IP6_REPLY_MARK(net, skb->mark);
+ struct sockcm_cookie sockc_unused = {0};
saddr = &ipv6_hdr(skb)->daddr;
@@ -581,7 +655,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
fl6.daddr = ipv6_hdr(skb)->saddr;
if (saddr)
fl6.saddr = *saddr;
- fl6.flowi6_oif = l3mdev_fib_oif(skb->dev);
+ fl6.flowi6_oif = skb->dev->ifindex;
fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
fl6.flowi6_mark = mark;
security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
@@ -604,22 +678,24 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
if (IS_ERR(dst))
goto out;
- hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
-
idev = __in6_dev_get(skb->dev);
msg.skb = skb;
msg.offset = 0;
msg.type = ICMPV6_ECHO_REPLY;
- tclass = ipv6_get_dsfield(ipv6_hdr(skb));
+ ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
+ ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
+ ipc6.dontfrag = np->dontfrag;
+ ipc6.opt = NULL;
+
err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
- sizeof(struct icmp6hdr), hlimit, tclass, NULL, &fl6,
+ sizeof(struct icmp6hdr), &ipc6, &fl6,
(struct rt6_info *)dst, MSG_DONTWAIT,
- np->dontfrag);
+ &sockc_unused);
if (err) {
- ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
+ __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
ip6_flush_pending_frames(sk);
} else {
err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
@@ -671,7 +747,7 @@ void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
return;
out:
- ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
+ __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
}
/*
@@ -707,7 +783,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
skb_set_network_header(skb, nh);
}
- ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INMSGS);
+ __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
saddr = &ipv6_hdr(skb)->saddr;
daddr = &ipv6_hdr(skb)->daddr;
@@ -725,7 +801,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
type = hdr->icmp6_type;
- ICMP6MSGIN_INC_STATS_BH(dev_net(dev), idev, type);
+ ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
switch (type) {
case ICMPV6_ECHO_REQUEST:
@@ -809,9 +885,9 @@ static int icmpv6_rcv(struct sk_buff *skb)
return 0;
csum_error:
- ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
+ __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
discard_it:
- ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INERRORS);
+ __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
drop_no_count:
kfree_skb(skb);
return 0;
diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h
index 28542cb2b387..e0170f62bc39 100644
--- a/net/ipv6/ila/ila.h
+++ b/net/ipv6/ila/ila.h
@@ -23,10 +23,76 @@
#include <net/protocol.h>
#include <uapi/linux/ila.h>
+struct ila_locator {
+ union {
+ __u8 v8[8];
+ __be16 v16[4];
+ __be32 v32[2];
+ __be64 v64;
+ };
+};
+
+struct ila_identifier {
+ union {
+ struct {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ u8 __space:4;
+ u8 csum_neutral:1;
+ u8 type:3;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ u8 type:3;
+ u8 csum_neutral:1;
+ u8 __space:4;
+#else
+#error "Adjust your <asm/byteorder.h> defines"
+#endif
+ u8 __space2[7];
+ };
+ __u8 v8[8];
+ __be16 v16[4];
+ __be32 v32[2];
+ __be64 v64;
+ };
+};
+
+enum {
+ ILA_ATYPE_IID = 0,
+ ILA_ATYPE_LUID,
+ ILA_ATYPE_VIRT_V4,
+ ILA_ATYPE_VIRT_UNI_V6,
+ ILA_ATYPE_VIRT_MULTI_V6,
+ ILA_ATYPE_RSVD_1,
+ ILA_ATYPE_RSVD_2,
+ ILA_ATYPE_RSVD_3,
+};
+
+#define CSUM_NEUTRAL_FLAG htonl(0x10000000)
+
+struct ila_addr {
+ union {
+ struct in6_addr addr;
+ struct {
+ struct ila_locator loc;
+ struct ila_identifier ident;
+ };
+ };
+};
+
+static inline struct ila_addr *ila_a2i(struct in6_addr *addr)
+{
+ return (struct ila_addr *)addr;
+}
+
+static inline bool ila_addr_is_ila(struct ila_addr *iaddr)
+{
+ return (iaddr->ident.type != ILA_ATYPE_IID);
+}
+
struct ila_params {
- __be64 locator;
- __be64 locator_match;
+ struct ila_locator locator;
+ struct ila_locator locator_match;
__wsum csum_diff;
+ u8 csum_mode;
};
static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to)
@@ -38,7 +104,15 @@ static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to)
return csum_partial(diff, sizeof(diff), 0);
}
-void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p);
+static inline bool ila_csum_neutral_set(struct ila_identifier ident)
+{
+ return !!(ident.csum_neutral);
+}
+
+void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p,
+ bool set_csum_neutral);
+
+void ila_init_saved_csum(struct ila_params *p);
int ila_lwt_init(void);
void ila_lwt_fini(void);
diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c
index 30613050e4ca..ec9efbcdad35 100644
--- a/net/ipv6/ila/ila_common.c
+++ b/net/ipv6/ila/ila_common.c
@@ -15,20 +15,52 @@
static __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p)
{
- if (*(__be64 *)&ip6h->daddr == p->locator_match)
+ struct ila_addr *iaddr = ila_a2i(&ip6h->daddr);
+
+ if (p->locator_match.v64)
return p->csum_diff;
else
- return compute_csum_diff8((__be32 *)&ip6h->daddr,
+ return compute_csum_diff8((__be32 *)&iaddr->loc,
(__be32 *)&p->locator);
}
-void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p)
+static void ila_csum_do_neutral(struct ila_addr *iaddr,
+ struct ila_params *p)
+{
+ __sum16 *adjust = (__force __sum16 *)&iaddr->ident.v16[3];
+ __wsum diff, fval;
+
+ /* Check if checksum adjust value has been cached */
+ if (p->locator_match.v64) {
+ diff = p->csum_diff;
+ } else {
+ diff = compute_csum_diff8((__be32 *)&p->locator,
+ (__be32 *)iaddr);
+ }
+
+ fval = (__force __wsum)(ila_csum_neutral_set(iaddr->ident) ?
+ CSUM_NEUTRAL_FLAG : ~CSUM_NEUTRAL_FLAG);
+
+ diff = csum_add(diff, fval);
+
+ *adjust = ~csum_fold(csum_add(diff, csum_unfold(*adjust)));
+
+ /* Flip the csum-neutral bit. Either we are doing a SIR->ILA
+ * translation with ILA_CSUM_NEUTRAL_MAP as the csum_method
+ * and the C-bit is not set, or we are doing an ILA-SIR
+ * tranlsation and the C-bit is set.
+ */
+ iaddr->ident.csum_neutral ^= 1;
+}
+
+static void ila_csum_adjust_transport(struct sk_buff *skb,
+ struct ila_params *p)
{
__wsum diff;
struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ struct ila_addr *iaddr = ila_a2i(&ip6h->daddr);
size_t nhoff = sizeof(struct ipv6hdr);
- /* First update checksum */
switch (ip6h->nexthdr) {
case NEXTHDR_TCP:
if (likely(pskb_may_pull(skb, nhoff + sizeof(struct tcphdr)))) {
@@ -68,7 +100,48 @@ void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p)
}
/* Now change destination address */
- *(__be64 *)&ip6h->daddr = p->locator;
+ iaddr->loc = p->locator;
+}
+
+void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p,
+ bool set_csum_neutral)
+{
+ struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ struct ila_addr *iaddr = ila_a2i(&ip6h->daddr);
+
+ /* First deal with the transport checksum */
+ if (ila_csum_neutral_set(iaddr->ident)) {
+ /* C-bit is set in the locator indicating that this
+ * is a locator being translated to a SIR address.
+ * Perform (receiver) checksum-neutral translation.
+ */
+ if (!set_csum_neutral)
+ ila_csum_do_neutral(iaddr, p);
+ } else {
+ switch (p->csum_mode) {
+ case ILA_CSUM_ADJUST_TRANSPORT:
+ ila_csum_adjust_transport(skb, p);
+ break;
+ case ILA_CSUM_NEUTRAL_MAP:
+ ila_csum_do_neutral(iaddr, p);
+ break;
+ case ILA_CSUM_NO_ACTION:
+ break;
+ }
+ }
+
+ /* Now change destination address */
+ iaddr->loc = p->locator;
+}
+
+void ila_init_saved_csum(struct ila_params *p)
+{
+ if (!p->locator_match.v64)
+ return;
+
+ p->csum_diff = compute_csum_diff8(
+ (__be32 *)&p->locator,
+ (__be32 *)&p->locator_match);
}
static int __init ila_init(void)
diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c
index 41f18de5dcc2..c8314c6b6154 100644
--- a/net/ipv6/ila/ila_lwt.c
+++ b/net/ipv6/ila/ila_lwt.c
@@ -26,7 +26,7 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (skb->protocol != htons(ETH_P_IPV6))
goto drop;
- update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate));
+ ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate), true);
return dst->lwtstate->orig_output(net, sk, skb);
@@ -42,7 +42,7 @@ static int ila_input(struct sk_buff *skb)
if (skb->protocol != htons(ETH_P_IPV6))
goto drop;
- update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate));
+ ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate), false);
return dst->lwtstate->orig_input(skb);
@@ -53,6 +53,7 @@ drop:
static struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
[ILA_ATTR_LOCATOR] = { .type = NLA_U64, },
+ [ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, },
};
static int ila_build_state(struct net_device *dev, struct nlattr *nla,
@@ -64,11 +65,28 @@ static int ila_build_state(struct net_device *dev, struct nlattr *nla,
size_t encap_len = sizeof(*p);
struct lwtunnel_state *newts;
const struct fib6_config *cfg6 = cfg;
+ struct ila_addr *iaddr;
int ret;
if (family != AF_INET6)
return -EINVAL;
+ if (cfg6->fc_dst_len < sizeof(struct ila_locator) + 1) {
+ /* Need to have full locator and at least type field
+ * included in destination
+ */
+ return -EINVAL;
+ }
+
+ iaddr = (struct ila_addr *)&cfg6->fc_dst;
+
+ if (!ila_addr_is_ila(iaddr) || ila_csum_neutral_set(iaddr->ident)) {
+ /* Don't allow translation for a non-ILA address or checksum
+ * neutral flag to be set.
+ */
+ return -EINVAL;
+ }
+
ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla,
ila_nl_policy);
if (ret < 0)
@@ -84,16 +102,19 @@ static int ila_build_state(struct net_device *dev, struct nlattr *nla,
newts->len = encap_len;
p = ila_params_lwtunnel(newts);
- p->locator = (__force __be64)nla_get_u64(tb[ILA_ATTR_LOCATOR]);
+ p->locator.v64 = (__force __be64)nla_get_u64(tb[ILA_ATTR_LOCATOR]);
- if (cfg6->fc_dst_len > sizeof(__be64)) {
- /* Precompute checksum difference for translation since we
- * know both the old locator and the new one.
- */
- p->locator_match = *(__be64 *)&cfg6->fc_dst;
- p->csum_diff = compute_csum_diff8(
- (__be32 *)&p->locator_match, (__be32 *)&p->locator);
- }
+ /* Precompute checksum difference for translation since we
+ * know both the old locator and the new one.
+ */
+ p->locator_match = iaddr->loc;
+ p->csum_diff = compute_csum_diff8(
+ (__be32 *)&p->locator_match, (__be32 *)&p->locator);
+
+ if (tb[ILA_ATTR_CSUM_MODE])
+ p->csum_mode = nla_get_u8(tb[ILA_ATTR_CSUM_MODE]);
+
+ ila_init_saved_csum(p);
newts->type = LWTUNNEL_ENCAP_ILA;
newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT |
@@ -109,7 +130,10 @@ static int ila_fill_encap_info(struct sk_buff *skb,
{
struct ila_params *p = ila_params_lwtunnel(lwtstate);
- if (nla_put_u64(skb, ILA_ATTR_LOCATOR, (__force u64)p->locator))
+ if (nla_put_u64_64bit(skb, ILA_ATTR_LOCATOR, (__force u64)p->locator.v64,
+ ILA_ATTR_PAD))
+ goto nla_put_failure;
+ if (nla_put_u8(skb, ILA_ATTR_CSUM_MODE, (__force u8)p->csum_mode))
goto nla_put_failure;
return 0;
@@ -120,7 +144,9 @@ nla_put_failure:
static int ila_encap_nlsize(struct lwtunnel_state *lwtstate)
{
- return nla_total_size(sizeof(u64)); /* ILA_ATTR_LOCATOR */
+ return nla_total_size_64bit(sizeof(u64)) + /* ILA_ATTR_LOCATOR */
+ nla_total_size(sizeof(u8)) + /* ILA_ATTR_CSUM_MODE */
+ 0;
}
static int ila_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
@@ -128,7 +154,7 @@ static int ila_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
struct ila_params *a_p = ila_params_lwtunnel(a);
struct ila_params *b_p = ila_params_lwtunnel(b);
- return (a_p->locator != b_p->locator);
+ return (a_p->locator.v64 != b_p->locator.v64);
}
static const struct lwtunnel_encap_ops ila_encap_ops = {
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 295ca29a23c3..e6eca5fdf4c9 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -11,13 +11,11 @@
struct ila_xlat_params {
struct ila_params ip;
- __be64 identifier;
int ifindex;
- unsigned int dir;
};
struct ila_map {
- struct ila_xlat_params p;
+ struct ila_xlat_params xp;
struct rhash_head node;
struct ila_map __rcu *next;
struct rcu_head rcu;
@@ -66,31 +64,29 @@ static __always_inline void __ila_hash_secret_init(void)
net_get_random_once(&hashrnd, sizeof(hashrnd));
}
-static inline u32 ila_identifier_hash(__be64 identifier)
+static inline u32 ila_locator_hash(struct ila_locator loc)
{
- u32 *v = (u32 *)&identifier;
+ u32 *v = (u32 *)loc.v32;
return jhash_2words(v[0], v[1], hashrnd);
}
-static inline spinlock_t *ila_get_lock(struct ila_net *ilan, __be64 identifier)
+static inline spinlock_t *ila_get_lock(struct ila_net *ilan,
+ struct ila_locator loc)
{
- return &ilan->locks[ila_identifier_hash(identifier) & ilan->locks_mask];
+ return &ilan->locks[ila_locator_hash(loc) & ilan->locks_mask];
}
-static inline int ila_cmp_wildcards(struct ila_map *ila, __be64 loc,
- int ifindex, unsigned int dir)
+static inline int ila_cmp_wildcards(struct ila_map *ila,
+ struct ila_addr *iaddr, int ifindex)
{
- return (ila->p.ip.locator_match && ila->p.ip.locator_match != loc) ||
- (ila->p.ifindex && ila->p.ifindex != ifindex) ||
- !(ila->p.dir & dir);
+ return (ila->xp.ifindex && ila->xp.ifindex != ifindex);
}
-static inline int ila_cmp_params(struct ila_map *ila, struct ila_xlat_params *p)
+static inline int ila_cmp_params(struct ila_map *ila,
+ struct ila_xlat_params *xp)
{
- return (ila->p.ip.locator_match != p->ip.locator_match) ||
- (ila->p.ifindex != p->ifindex) ||
- (ila->p.dir != p->dir);
+ return (ila->xp.ifindex != xp->ifindex);
}
static int ila_cmpfn(struct rhashtable_compare_arg *arg,
@@ -98,17 +94,14 @@ static int ila_cmpfn(struct rhashtable_compare_arg *arg,
{
const struct ila_map *ila = obj;
- return (ila->p.identifier != *(__be64 *)arg->key);
+ return (ila->xp.ip.locator_match.v64 != *(__be64 *)arg->key);
}
static inline int ila_order(struct ila_map *ila)
{
int score = 0;
- if (ila->p.ip.locator_match)
- score += 1 << 0;
-
- if (ila->p.ifindex)
+ if (ila->xp.ifindex)
score += 1 << 1;
return score;
@@ -117,7 +110,7 @@ static inline int ila_order(struct ila_map *ila)
static const struct rhashtable_params rht_params = {
.nelem_hint = 1024,
.head_offset = offsetof(struct ila_map, node),
- .key_offset = offsetof(struct ila_map, p.identifier),
+ .key_offset = offsetof(struct ila_map, xp.ip.locator_match),
.key_len = sizeof(u64), /* identifier */
.max_size = 1048576,
.min_size = 256,
@@ -136,50 +129,45 @@ static struct genl_family ila_nl_family = {
};
static struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
- [ILA_ATTR_IDENTIFIER] = { .type = NLA_U64, },
[ILA_ATTR_LOCATOR] = { .type = NLA_U64, },
[ILA_ATTR_LOCATOR_MATCH] = { .type = NLA_U64, },
[ILA_ATTR_IFINDEX] = { .type = NLA_U32, },
- [ILA_ATTR_DIR] = { .type = NLA_U32, },
+ [ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, },
};
static int parse_nl_config(struct genl_info *info,
- struct ila_xlat_params *p)
+ struct ila_xlat_params *xp)
{
- memset(p, 0, sizeof(*p));
-
- if (info->attrs[ILA_ATTR_IDENTIFIER])
- p->identifier = (__force __be64)nla_get_u64(
- info->attrs[ILA_ATTR_IDENTIFIER]);
+ memset(xp, 0, sizeof(*xp));
if (info->attrs[ILA_ATTR_LOCATOR])
- p->ip.locator = (__force __be64)nla_get_u64(
+ xp->ip.locator.v64 = (__force __be64)nla_get_u64(
info->attrs[ILA_ATTR_LOCATOR]);
if (info->attrs[ILA_ATTR_LOCATOR_MATCH])
- p->ip.locator_match = (__force __be64)nla_get_u64(
+ xp->ip.locator_match.v64 = (__force __be64)nla_get_u64(
info->attrs[ILA_ATTR_LOCATOR_MATCH]);
- if (info->attrs[ILA_ATTR_IFINDEX])
- p->ifindex = nla_get_s32(info->attrs[ILA_ATTR_IFINDEX]);
+ if (info->attrs[ILA_ATTR_CSUM_MODE])
+ xp->ip.csum_mode = nla_get_u8(info->attrs[ILA_ATTR_CSUM_MODE]);
- if (info->attrs[ILA_ATTR_DIR])
- p->dir = nla_get_u32(info->attrs[ILA_ATTR_DIR]);
+ if (info->attrs[ILA_ATTR_IFINDEX])
+ xp->ifindex = nla_get_s32(info->attrs[ILA_ATTR_IFINDEX]);
return 0;
}
/* Must be called with rcu readlock */
-static inline struct ila_map *ila_lookup_wildcards(__be64 id, __be64 loc,
+static inline struct ila_map *ila_lookup_wildcards(struct ila_addr *iaddr,
int ifindex,
- unsigned int dir,
struct ila_net *ilan)
{
struct ila_map *ila;
- ila = rhashtable_lookup_fast(&ilan->rhash_table, &id, rht_params);
+ ila = rhashtable_lookup_fast(&ilan->rhash_table, &iaddr->loc,
+ rht_params);
while (ila) {
- if (!ila_cmp_wildcards(ila, loc, ifindex, dir))
+ if (!ila_cmp_wildcards(ila, iaddr, ifindex))
return ila;
ila = rcu_access_pointer(ila->next);
}
@@ -188,15 +176,16 @@ static inline struct ila_map *ila_lookup_wildcards(__be64 id, __be64 loc,
}
/* Must be called with rcu readlock */
-static inline struct ila_map *ila_lookup_by_params(struct ila_xlat_params *p,
+static inline struct ila_map *ila_lookup_by_params(struct ila_xlat_params *xp,
struct ila_net *ilan)
{
struct ila_map *ila;
- ila = rhashtable_lookup_fast(&ilan->rhash_table, &p->identifier,
+ ila = rhashtable_lookup_fast(&ilan->rhash_table,
+ &xp->ip.locator_match,
rht_params);
while (ila) {
- if (!ila_cmp_params(ila, p))
+ if (!ila_cmp_params(ila, xp))
return ila;
ila = rcu_access_pointer(ila->next);
}
@@ -221,14 +210,14 @@ static void ila_free_cb(void *ptr, void *arg)
}
}
-static int ila_xlat_addr(struct sk_buff *skb, int dir);
+static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral);
static unsigned int
ila_nf_input(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- ila_xlat_addr(skb, ILA_DIR_IN);
+ ila_xlat_addr(skb, false);
return NF_ACCEPT;
}
@@ -241,11 +230,11 @@ static struct nf_hook_ops ila_nf_hook_ops[] __read_mostly = {
},
};
-static int ila_add_mapping(struct net *net, struct ila_xlat_params *p)
+static int ila_add_mapping(struct net *net, struct ila_xlat_params *xp)
{
struct ila_net *ilan = net_generic(net, ila_net_id);
struct ila_map *ila, *head;
- spinlock_t *lock = ila_get_lock(ilan, p->identifier);
+ spinlock_t *lock = ila_get_lock(ilan, xp->ip.locator_match);
int err = 0, order;
if (!ilan->hooks_registered) {
@@ -264,22 +253,16 @@ static int ila_add_mapping(struct net *net, struct ila_xlat_params *p)
if (!ila)
return -ENOMEM;
- ila->p = *p;
+ ila_init_saved_csum(&xp->ip);
- if (p->ip.locator_match) {
- /* Precompute checksum difference for translation since we
- * know both the old identifier and the new one.
- */
- ila->p.ip.csum_diff = compute_csum_diff8(
- (__be32 *)&p->ip.locator_match,
- (__be32 *)&p->ip.locator);
- }
+ ila->xp = *xp;
order = ila_order(ila);
spin_lock(lock);
- head = rhashtable_lookup_fast(&ilan->rhash_table, &p->identifier,
+ head = rhashtable_lookup_fast(&ilan->rhash_table,
+ &xp->ip.locator_match,
rht_params);
if (!head) {
/* New entry for the rhash_table */
@@ -289,7 +272,7 @@ static int ila_add_mapping(struct net *net, struct ila_xlat_params *p)
struct ila_map *tila = head, *prev = NULL;
do {
- if (!ila_cmp_params(tila, p)) {
+ if (!ila_cmp_params(tila, xp)) {
err = -EEXIST;
goto out;
}
@@ -326,23 +309,23 @@ out:
return err;
}
-static int ila_del_mapping(struct net *net, struct ila_xlat_params *p)
+static int ila_del_mapping(struct net *net, struct ila_xlat_params *xp)
{
struct ila_net *ilan = net_generic(net, ila_net_id);
struct ila_map *ila, *head, *prev;
- spinlock_t *lock = ila_get_lock(ilan, p->identifier);
+ spinlock_t *lock = ila_get_lock(ilan, xp->ip.locator_match);
int err = -ENOENT;
spin_lock(lock);
head = rhashtable_lookup_fast(&ilan->rhash_table,
- &p->identifier, rht_params);
+ &xp->ip.locator_match, rht_params);
ila = head;
prev = NULL;
while (ila) {
- if (ila_cmp_params(ila, p)) {
+ if (ila_cmp_params(ila, xp)) {
prev = ila;
ila = rcu_dereference_protected(ila->next,
lockdep_is_held(lock));
@@ -404,28 +387,28 @@ static int ila_nl_cmd_add_mapping(struct sk_buff *skb, struct genl_info *info)
static int ila_nl_cmd_del_mapping(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = genl_info_net(info);
- struct ila_xlat_params p;
+ struct ila_xlat_params xp;
int err;
- err = parse_nl_config(info, &p);
+ err = parse_nl_config(info, &xp);
if (err)
return err;
- ila_del_mapping(net, &p);
+ ila_del_mapping(net, &xp);
return 0;
}
static int ila_fill_info(struct ila_map *ila, struct sk_buff *msg)
{
- if (nla_put_u64(msg, ILA_ATTR_IDENTIFIER,
- (__force u64)ila->p.identifier) ||
- nla_put_u64(msg, ILA_ATTR_LOCATOR,
- (__force u64)ila->p.ip.locator) ||
- nla_put_u64(msg, ILA_ATTR_LOCATOR_MATCH,
- (__force u64)ila->p.ip.locator_match) ||
- nla_put_s32(msg, ILA_ATTR_IFINDEX, ila->p.ifindex) ||
- nla_put_u32(msg, ILA_ATTR_DIR, ila->p.dir))
+ if (nla_put_u64_64bit(msg, ILA_ATTR_LOCATOR,
+ (__force u64)ila->xp.ip.locator.v64,
+ ILA_ATTR_PAD) ||
+ nla_put_u64_64bit(msg, ILA_ATTR_LOCATOR_MATCH,
+ (__force u64)ila->xp.ip.locator_match.v64,
+ ILA_ATTR_PAD) ||
+ nla_put_s32(msg, ILA_ATTR_IFINDEX, ila->xp.ifindex) ||
+ nla_put_u32(msg, ILA_ATTR_CSUM_MODE, ila->xp.ip.csum_mode))
return -1;
return 0;
@@ -457,11 +440,11 @@ static int ila_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info)
struct net *net = genl_info_net(info);
struct ila_net *ilan = net_generic(net, ila_net_id);
struct sk_buff *msg;
- struct ila_xlat_params p;
+ struct ila_xlat_params xp;
struct ila_map *ila;
int ret;
- ret = parse_nl_config(info, &p);
+ ret = parse_nl_config(info, &xp);
if (ret)
return ret;
@@ -471,7 +454,7 @@ static int ila_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info)
rcu_read_lock();
- ila = ila_lookup_by_params(&p, ilan);
+ ila = ila_lookup_by_params(&xp, ilan);
if (ila) {
ret = ila_dump_info(ila,
info->snd_portid,
@@ -501,7 +484,8 @@ static int ila_nl_dump_start(struct netlink_callback *cb)
struct ila_net *ilan = net_generic(net, ila_net_id);
struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args;
- return rhashtable_walk_init(&ilan->rhash_table, &iter->rhiter);
+ return rhashtable_walk_init(&ilan->rhash_table, &iter->rhiter,
+ GFP_KERNEL);
}
static int ila_nl_dump_done(struct netlink_callback *cb)
@@ -613,45 +597,32 @@ static struct pernet_operations ila_net_ops = {
.size = sizeof(struct ila_net),
};
-static int ila_xlat_addr(struct sk_buff *skb, int dir)
+static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral)
{
struct ila_map *ila;
struct ipv6hdr *ip6h = ipv6_hdr(skb);
struct net *net = dev_net(skb->dev);
struct ila_net *ilan = net_generic(net, ila_net_id);
- __be64 identifier, locator_match;
- size_t nhoff;
+ struct ila_addr *iaddr = ila_a2i(&ip6h->daddr);
/* Assumes skb contains a valid IPv6 header that is pulled */
- identifier = *(__be64 *)&ip6h->daddr.in6_u.u6_addr8[8];
- locator_match = *(__be64 *)&ip6h->daddr.in6_u.u6_addr8[0];
- nhoff = sizeof(struct ipv6hdr);
+ if (!ila_addr_is_ila(iaddr)) {
+ /* Type indicates this is not an ILA address */
+ return 0;
+ }
rcu_read_lock();
- ila = ila_lookup_wildcards(identifier, locator_match,
- skb->dev->ifindex, dir, ilan);
+ ila = ila_lookup_wildcards(iaddr, skb->dev->ifindex, ilan);
if (ila)
- update_ipv6_locator(skb, &ila->p.ip);
+ ila_update_ipv6_locator(skb, &ila->xp.ip, set_csum_neutral);
rcu_read_unlock();
return 0;
}
-int ila_xlat_incoming(struct sk_buff *skb)
-{
- return ila_xlat_addr(skb, ILA_DIR_IN);
-}
-EXPORT_SYMBOL(ila_xlat_incoming);
-
-int ila_xlat_outgoing(struct sk_buff *skb)
-{
- return ila_xlat_addr(skb, ILA_DIR_OUT);
-}
-EXPORT_SYMBOL(ila_xlat_outgoing);
-
int ila_xlat_init(void)
{
int ret;
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 70f2628be6fa..00cf28ad4565 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -69,7 +69,6 @@ struct sock *__inet6_lookup_established(struct net *net,
struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
- rcu_read_lock();
begin:
sk_nulls_for_each_rcu(sk, node, &head->chain) {
if (sk->sk_hash != hash)
@@ -90,7 +89,6 @@ begin:
out:
sk = NULL;
found:
- rcu_read_unlock();
return sk;
}
EXPORT_SYMBOL(__inet6_lookup_established);
@@ -122,6 +120,7 @@ static inline int compute_score(struct sock *sk, struct net *net,
return score;
}
+/* called with rcu_read_lock() */
struct sock *inet6_lookup_listener(struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
@@ -129,39 +128,27 @@ struct sock *inet6_lookup_listener(struct net *net,
const __be16 sport, const struct in6_addr *daddr,
const unsigned short hnum, const int dif)
{
- struct sock *sk;
- const struct hlist_nulls_node *node;
- struct sock *result;
- int score, hiscore, matches = 0, reuseport = 0;
- bool select_ok = true;
- u32 phash = 0;
unsigned int hash = inet_lhashfn(net, hnum);
struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
+ int score, hiscore = 0, matches = 0, reuseport = 0;
+ struct sock *sk, *result = NULL;
+ u32 phash = 0;
- rcu_read_lock();
-begin:
- result = NULL;
- hiscore = 0;
- sk_nulls_for_each(sk, node, &ilb->head) {
+ sk_for_each(sk, &ilb->head) {
score = compute_score(sk, net, hnum, daddr, dif);
if (score > hiscore) {
- hiscore = score;
- result = sk;
reuseport = sk->sk_reuseport;
if (reuseport) {
phash = inet6_ehashfn(net, daddr, hnum,
saddr, sport);
- if (select_ok) {
- struct sock *sk2;
- sk2 = reuseport_select_sock(sk, phash,
- skb, doff);
- if (sk2) {
- result = sk2;
- goto found;
- }
- }
+ result = reuseport_select_sock(sk, phash,
+ skb, doff);
+ if (result)
+ return result;
matches = 1;
}
+ result = sk;
+ hiscore = score;
} else if (score == hiscore && reuseport) {
matches++;
if (reciprocal_scale(phash, matches) == 0)
@@ -169,25 +156,6 @@ begin:
phash = next_pseudo_random32(phash);
}
}
- /*
- * if the nulls value we got at the end of this lookup is
- * not the expected one, we must restart lookup.
- * We probably met an item that was moved to another chain.
- */
- if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE)
- goto begin;
- if (result) {
-found:
- if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
- result = NULL;
- else if (unlikely(compute_score(result, net, hnum, daddr,
- dif) < hiscore)) {
- sock_put(result);
- select_ok = false;
- goto begin;
- }
- }
- rcu_read_unlock();
return result;
}
EXPORT_SYMBOL_GPL(inet6_lookup_listener);
@@ -199,12 +167,12 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
const int dif)
{
struct sock *sk;
+ bool refcounted;
- local_bh_disable();
sk = __inet6_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
- ntohs(dport), dif);
- local_bh_enable();
-
+ ntohs(dport), dif, &refcounted);
+ if (sk && !refcounted && !atomic_inc_not_zero(&sk->sk_refcnt))
+ sk = NULL;
return sk;
}
EXPORT_SYMBOL_GPL(inet6_lookup);
@@ -254,7 +222,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
__sk_nulls_add_node_rcu(sk, &head->chain);
if (tw) {
sk_nulls_del_node_init_rcu((struct sock *)tw);
- NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
+ __NET_INC_STATS(net, LINUX_MIB_TIMEWAITRECYCLED);
}
spin_unlock(lock);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c
index b2025bf3da4a..c0cbcb259f5a 100644
--- a/net/ipv6/ip6_checksum.c
+++ b/net/ipv6/ip6_checksum.c
@@ -78,9 +78,12 @@ int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto)
* we accept a checksum of zero here. When we find the socket
* for the UDP packet we'll check if that socket allows zero checksum
* for IPv6 (set by socket option).
+ *
+ * Note, we are only interested in != 0 or == 0, thus the
+ * force to int.
*/
- return skb_checksum_init_zero_check(skb, proto, uh->check,
- ip6_compute_pseudo);
+ return (__force int)skb_checksum_init_zero_check(skb, proto, uh->check,
+ ip6_compute_pseudo);
}
EXPORT_SYMBOL(udp6_csum_init);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index ea071fad67a0..771be1fa4176 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -177,6 +177,7 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
}
}
+ free_percpu(non_pcpu_rt->rt6i_pcpu);
non_pcpu_rt->rt6i_pcpu = NULL;
}
@@ -240,6 +241,7 @@ struct fib6_table *fib6_new_table(struct net *net, u32 id)
return tb;
}
+EXPORT_SYMBOL_GPL(fib6_new_table);
struct fib6_table *fib6_get_table(struct net *net, u32 id)
{
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index dc2db4f7b182..b912f0dbaf72 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -372,7 +372,8 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
if (olen > 0) {
struct msghdr msg;
struct flowi6 flowi6;
- int junk;
+ struct sockcm_cookie sockc_junk;
+ struct ipcm6_cookie ipc6;
err = -ENOMEM;
fl->opt = kmalloc(sizeof(*fl->opt) + olen, GFP_KERNEL);
@@ -389,8 +390,8 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
msg.msg_control = (void *)(fl->opt+1);
memset(&flowi6, 0, sizeof(flowi6));
- err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, fl->opt,
- &junk, &junk, &junk);
+ ipc6.opt = fl->opt;
+ err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, &ipc6, &sockc_junk);
if (err)
goto done;
err = -EINVAL;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 4e636e60a360..704274cbd495 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -54,6 +54,7 @@
#include <net/ip6_fib.h>
#include <net/ip6_route.h>
#include <net/ip6_tunnel.h>
+#include <net/gre.h>
static bool log_ecn_error = true;
@@ -342,7 +343,7 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
goto failed_free;
/* Can use a lockless transmit, unless we generate output sequences */
- if (!(nt->parms.o_flags & GRE_SEQ))
+ if (!(nt->parms.o_flags & TUNNEL_SEQ))
dev->features |= NETIF_F_LLTX;
dev_hold(dev);
@@ -443,137 +444,41 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
t->err_time = jiffies;
}
-static int ip6gre_rcv(struct sk_buff *skb)
+static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
{
const struct ipv6hdr *ipv6h;
- u8 *h;
- __be16 flags;
- __sum16 csum = 0;
- __be32 key = 0;
- u32 seqno = 0;
struct ip6_tnl *tunnel;
- int offset = 4;
- __be16 gre_proto;
- int err;
-
- if (!pskb_may_pull(skb, sizeof(struct in6_addr)))
- goto drop;
ipv6h = ipv6_hdr(skb);
- h = skb->data;
- flags = *(__be16 *)h;
-
- if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
- /* - Version must be 0.
- - We do not support routing headers.
- */
- if (flags&(GRE_VERSION|GRE_ROUTING))
- goto drop;
-
- if (flags&GRE_CSUM) {
- csum = skb_checksum_simple_validate(skb);
- offset += 4;
- }
- if (flags&GRE_KEY) {
- key = *(__be32 *)(h + offset);
- offset += 4;
- }
- if (flags&GRE_SEQ) {
- seqno = ntohl(*(__be32 *)(h + offset));
- offset += 4;
- }
- }
-
- gre_proto = *(__be16 *)(h + 2);
-
tunnel = ip6gre_tunnel_lookup(skb->dev,
- &ipv6h->saddr, &ipv6h->daddr, key,
- gre_proto);
+ &ipv6h->saddr, &ipv6h->daddr, tpi->key,
+ tpi->proto);
if (tunnel) {
- struct pcpu_sw_netstats *tstats;
-
- if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
- goto drop;
+ ip6_tnl_rcv(tunnel, skb, tpi, NULL, false);
- if (!ip6_tnl_rcv_ctl(tunnel, &ipv6h->daddr, &ipv6h->saddr)) {
- tunnel->dev->stats.rx_dropped++;
- goto drop;
- }
-
- skb->protocol = gre_proto;
- /* WCCP version 1 and 2 protocol decoding.
- * - Change protocol to IPv6
- * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
- */
- if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
- skb->protocol = htons(ETH_P_IPV6);
- if ((*(h + offset) & 0xF0) != 0x40)
- offset += 4;
- }
-
- skb->mac_header = skb->network_header;
- __pskb_pull(skb, offset);
- skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
-
- if (((flags&GRE_CSUM) && csum) ||
- (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
- tunnel->dev->stats.rx_crc_errors++;
- tunnel->dev->stats.rx_errors++;
- goto drop;
- }
- if (tunnel->parms.i_flags&GRE_SEQ) {
- if (!(flags&GRE_SEQ) ||
- (tunnel->i_seqno &&
- (s32)(seqno - tunnel->i_seqno) < 0)) {
- tunnel->dev->stats.rx_fifo_errors++;
- tunnel->dev->stats.rx_errors++;
- goto drop;
- }
- tunnel->i_seqno = seqno + 1;
- }
-
- /* Warning: All skb pointers will be invalidated! */
- if (tunnel->dev->type == ARPHRD_ETHER) {
- if (!pskb_may_pull(skb, ETH_HLEN)) {
- tunnel->dev->stats.rx_length_errors++;
- tunnel->dev->stats.rx_errors++;
- goto drop;
- }
-
- ipv6h = ipv6_hdr(skb);
- skb->protocol = eth_type_trans(skb, tunnel->dev);
- skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
- }
-
- __skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
+ return PACKET_RCVD;
+ }
- skb_reset_network_header(skb);
+ return PACKET_REJECT;
+}
- err = IP6_ECN_decapsulate(ipv6h, skb);
- if (unlikely(err)) {
- if (log_ecn_error)
- net_info_ratelimited("non-ECT from %pI6 with dsfield=%#x\n",
- &ipv6h->saddr,
- ipv6_get_dsfield(ipv6h));
- if (err > 1) {
- ++tunnel->dev->stats.rx_frame_errors;
- ++tunnel->dev->stats.rx_errors;
- goto drop;
- }
- }
+static int gre_rcv(struct sk_buff *skb)
+{
+ struct tnl_ptk_info tpi;
+ bool csum_err = false;
+ int hdr_len;
- tstats = this_cpu_ptr(tunnel->dev->tstats);
- u64_stats_update_begin(&tstats->syncp);
- tstats->rx_packets++;
- tstats->rx_bytes += skb->len;
- u64_stats_update_end(&tstats->syncp);
+ hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IPV6), 0);
+ if (hdr_len < 0)
+ goto drop;
- netif_rx(skb);
+ if (iptunnel_pull_header(skb, hdr_len, tpi.proto, false))
+ goto drop;
+ if (ip6gre_rcv(skb, &tpi) == PACKET_RCVD)
return 0;
- }
- icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
drop:
kfree_skb(skb);
return 0;
@@ -584,187 +489,38 @@ struct ipv6_tel_txoption {
__u8 dst_opt[8];
};
-static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
+static int gre_handle_offloads(struct sk_buff *skb, bool csum)
{
- memset(opt, 0, sizeof(struct ipv6_tel_txoption));
-
- opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT;
- opt->dst_opt[3] = 1;
- opt->dst_opt[4] = encap_limit;
- opt->dst_opt[5] = IPV6_TLV_PADN;
- opt->dst_opt[6] = 1;
-
- opt->ops.dst0opt = (struct ipv6_opt_hdr *) opt->dst_opt;
- opt->ops.opt_nflen = 8;
+ return iptunnel_handle_offloads(skb,
+ csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
}
-static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
- struct net_device *dev,
- __u8 dsfield,
- struct flowi6 *fl6,
- int encap_limit,
- __u32 *pmtu)
+static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
+ struct net_device *dev, __u8 dsfield,
+ struct flowi6 *fl6, int encap_limit,
+ __u32 *pmtu, __be16 proto)
{
struct ip6_tnl *tunnel = netdev_priv(dev);
- struct net *net = tunnel->net;
- struct net_device *tdev; /* Device to other host */
- struct ipv6hdr *ipv6h; /* Our new IP header */
- unsigned int max_headroom = 0; /* The extra header space needed */
- int gre_hlen;
- struct ipv6_tel_txoption opt;
- int mtu;
- struct dst_entry *dst = NULL, *ndst = NULL;
- struct net_device_stats *stats = &tunnel->dev->stats;
- int err = -1;
- u8 proto;
- struct sk_buff *new_skb;
- __be16 protocol;
+ __be16 protocol = (dev->type == ARPHRD_ETHER) ?
+ htons(ETH_P_TEB) : proto;
if (dev->type == ARPHRD_ETHER)
IPCB(skb)->flags = 0;
- if (dev->header_ops && dev->type == ARPHRD_IP6GRE) {
- gre_hlen = 0;
- ipv6h = (struct ipv6hdr *)skb->data;
- fl6->daddr = ipv6h->daddr;
- } else {
- gre_hlen = tunnel->hlen;
+ if (dev->header_ops && dev->type == ARPHRD_IP6GRE)
+ fl6->daddr = ((struct ipv6hdr *)skb->data)->daddr;
+ else
fl6->daddr = tunnel->parms.raddr;
- }
-
- if (!fl6->flowi6_mark)
- dst = dst_cache_get(&tunnel->dst_cache);
-
- if (!dst) {
- dst = ip6_route_output(net, NULL, fl6);
-
- if (dst->error)
- goto tx_err_link_failure;
- dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0);
- if (IS_ERR(dst)) {
- err = PTR_ERR(dst);
- dst = NULL;
- goto tx_err_link_failure;
- }
- ndst = dst;
- }
-
- tdev = dst->dev;
-
- if (tdev == dev) {
- stats->collisions++;
- net_warn_ratelimited("%s: Local routing loop detected!\n",
- tunnel->parms.name);
- goto tx_err_dst_release;
- }
-
- mtu = dst_mtu(dst) - sizeof(*ipv6h);
- if (encap_limit >= 0) {
- max_headroom += 8;
- mtu -= 8;
- }
- if (mtu < IPV6_MIN_MTU)
- mtu = IPV6_MIN_MTU;
- if (skb_dst(skb))
- skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
- if (skb->len > mtu) {
- *pmtu = mtu;
- err = -EMSGSIZE;
- goto tx_err_dst_release;
- }
- if (tunnel->err_count > 0) {
- if (time_before(jiffies,
- tunnel->err_time + IP6TUNNEL_ERR_TIMEO)) {
- tunnel->err_count--;
+ if (tunnel->parms.o_flags & TUNNEL_SEQ)
+ tunnel->o_seqno++;
- dst_link_failure(skb);
- } else
- tunnel->err_count = 0;
- }
-
- skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev)));
-
- max_headroom += LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len;
-
- if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
- (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
- new_skb = skb_realloc_headroom(skb, max_headroom);
- if (max_headroom > dev->needed_headroom)
- dev->needed_headroom = max_headroom;
- if (!new_skb)
- goto tx_err_dst_release;
-
- if (skb->sk)
- skb_set_owner_w(new_skb, skb->sk);
- consume_skb(skb);
- skb = new_skb;
- }
-
- if (!fl6->flowi6_mark && ndst)
- dst_cache_set_ip6(&tunnel->dst_cache, ndst, &fl6->saddr);
- skb_dst_set(skb, dst);
-
- proto = NEXTHDR_GRE;
- if (encap_limit >= 0) {
- init_tel_txopt(&opt, encap_limit);
- ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
- }
-
- if (likely(!skb->encapsulation)) {
- skb_reset_inner_headers(skb);
- skb->encapsulation = 1;
- }
+ /* Push GRE header. */
+ gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
+ protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno));
- skb_push(skb, gre_hlen);
- skb_reset_network_header(skb);
- skb_set_transport_header(skb, sizeof(*ipv6h));
-
- /*
- * Push down and install the IP header.
- */
- ipv6h = ipv6_hdr(skb);
- ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield),
- ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6));
- ipv6h->hop_limit = tunnel->parms.hop_limit;
- ipv6h->nexthdr = proto;
- ipv6h->saddr = fl6->saddr;
- ipv6h->daddr = fl6->daddr;
-
- ((__be16 *)(ipv6h + 1))[0] = tunnel->parms.o_flags;
- protocol = (dev->type == ARPHRD_ETHER) ?
- htons(ETH_P_TEB) : skb->protocol;
- ((__be16 *)(ipv6h + 1))[1] = protocol;
-
- if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
- __be32 *ptr = (__be32 *)(((u8 *)ipv6h) + tunnel->hlen - 4);
-
- if (tunnel->parms.o_flags&GRE_SEQ) {
- ++tunnel->o_seqno;
- *ptr = htonl(tunnel->o_seqno);
- ptr--;
- }
- if (tunnel->parms.o_flags&GRE_KEY) {
- *ptr = tunnel->parms.o_key;
- ptr--;
- }
- if (tunnel->parms.o_flags&GRE_CSUM) {
- *ptr = 0;
- *(__sum16 *)ptr = ip_compute_csum((void *)(ipv6h+1),
- skb->len - sizeof(struct ipv6hdr));
- }
- }
-
- skb_set_inner_protocol(skb, protocol);
-
- ip6tunnel_xmit(NULL, skb, dev);
- return 0;
-tx_err_link_failure:
- stats->tx_carrier_errors++;
- dst_link_failure(skb);
-tx_err_dst_release:
- dst_release(dst);
- return err;
+ return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu,
+ NEXTHDR_GRE);
}
static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
@@ -783,7 +539,6 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
encap_limit = t->parms.encap_limit;
memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
- fl6.flowi6_proto = IPPROTO_GRE;
dsfield = ipv4_get_dsfield(iph);
@@ -793,7 +548,12 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
fl6.flowi6_mark = skb->mark;
- err = ip6gre_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
+ err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM));
+ if (err)
+ return -1;
+
+ err = __gre6_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
+ skb->protocol);
if (err != 0) {
/* XXX: send ICMP error even if DF is not set. */
if (err == -EMSGSIZE)
@@ -833,7 +593,6 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
encap_limit = t->parms.encap_limit;
memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
- fl6.flowi6_proto = IPPROTO_GRE;
dsfield = ipv6_get_dsfield(ipv6h);
if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
@@ -843,7 +602,11 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
fl6.flowi6_mark = skb->mark;
- err = ip6gre_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
+ if (gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)))
+ return -1;
+
+ err = __gre6_xmit(skb, dev, dsfield, &fl6, encap_limit,
+ &mtu, skb->protocol);
if (err != 0) {
if (err == -EMSGSIZE)
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
@@ -887,7 +650,11 @@ static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev)
memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
fl6.flowi6_proto = skb->protocol;
- err = ip6gre_xmit2(skb, dev, 0, &fl6, encap_limit, &mtu);
+ err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM));
+ if (err)
+ return err;
+
+ err = __gre6_xmit(skb, dev, 0, &fl6, encap_limit, &mtu, skb->protocol);
return err;
}
@@ -931,7 +698,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
struct net_device *dev = t->dev;
struct __ip6_tnl_parm *p = &t->parms;
struct flowi6 *fl6 = &t->fl.u.ip6;
- int addend = sizeof(struct ipv6hdr) + 4;
+ int t_hlen;
if (dev->type != ARPHRD_ETHER) {
memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
@@ -943,6 +710,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
fl6->daddr = p->raddr;
fl6->flowi6_oif = p->link;
fl6->flowlabel = 0;
+ fl6->flowi6_proto = IPPROTO_GRE;
if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS))
fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo;
@@ -958,16 +726,11 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
else
dev->flags &= ~IFF_POINTOPOINT;
- /* Precalculate GRE options length */
- if (t->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
- if (t->parms.o_flags&GRE_CSUM)
- addend += 4;
- if (t->parms.o_flags&GRE_KEY)
- addend += 4;
- if (t->parms.o_flags&GRE_SEQ)
- addend += 4;
- }
- t->hlen = addend;
+ t->tun_hlen = gre_calc_hlen(t->parms.o_flags);
+
+ t->hlen = t->encap_hlen + t->tun_hlen;
+
+ t_hlen = t->hlen + sizeof(struct ipv6hdr);
if (p->flags & IP6_TNL_F_CAP_XMIT) {
int strict = (ipv6_addr_type(&p->raddr) &
@@ -981,12 +744,15 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
return;
if (rt->dst.dev) {
- dev->hard_header_len = rt->dst.dev->hard_header_len + addend;
+ dev->hard_header_len = rt->dst.dev->hard_header_len +
+ t_hlen;
if (set_mtu) {
- dev->mtu = rt->dst.dev->mtu - addend;
+ dev->mtu = rt->dst.dev->mtu - t_hlen;
if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
dev->mtu -= 8;
+ if (dev->type == ARPHRD_ETHER)
+ dev->mtu -= ETH_HLEN;
if (dev->mtu < IPV6_MIN_MTU)
dev->mtu = IPV6_MIN_MTU;
@@ -1028,8 +794,8 @@ static void ip6gre_tnl_parm_from_user(struct __ip6_tnl_parm *p,
p->link = u->link;
p->i_key = u->i_key;
p->o_key = u->o_key;
- p->i_flags = u->i_flags;
- p->o_flags = u->o_flags;
+ p->i_flags = gre_flags_to_tnl_flags(u->i_flags);
+ p->o_flags = gre_flags_to_tnl_flags(u->o_flags);
memcpy(p->name, u->name, sizeof(u->name));
}
@@ -1046,8 +812,8 @@ static void ip6gre_tnl_parm_to_user(struct ip6_tnl_parm2 *u,
u->link = p->link;
u->i_key = p->i_key;
u->o_key = p->o_key;
- u->i_flags = p->i_flags;
- u->o_flags = p->o_flags;
+ u->i_flags = gre_tnl_flags_to_gre_flags(p->i_flags);
+ u->o_flags = gre_tnl_flags_to_gre_flags(p->o_flags);
memcpy(u->name, p->name, sizeof(u->name));
}
@@ -1061,6 +827,8 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev,
struct net *net = t->net;
struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+ memset(&p1, 0, sizeof(p1));
+
switch (cmd) {
case SIOCGETTUNNEL:
if (dev == ign->fb_tunnel_dev) {
@@ -1160,15 +928,6 @@ done:
return err;
}
-static int ip6gre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
-{
- if (new_mtu < 68 ||
- new_mtu > 0xFFF8 - dev->hard_header_len)
- return -EINVAL;
- dev->mtu = new_mtu;
- return 0;
-}
-
static int ip6gre_header(struct sk_buff *skb, struct net_device *dev,
unsigned short type,
const void *daddr, const void *saddr, unsigned int len)
@@ -1212,7 +971,7 @@ static const struct net_device_ops ip6gre_netdev_ops = {
.ndo_uninit = ip6gre_tunnel_uninit,
.ndo_start_xmit = ip6gre_tunnel_xmit,
.ndo_do_ioctl = ip6gre_tunnel_ioctl,
- .ndo_change_mtu = ip6gre_tunnel_change_mtu,
+ .ndo_change_mtu = ip6_tnl_change_mtu,
.ndo_get_stats64 = ip_tunnel_get_stats64,
.ndo_get_iflink = ip6_tnl_get_iflink,
};
@@ -1228,17 +987,11 @@ static void ip6gre_dev_free(struct net_device *dev)
static void ip6gre_tunnel_setup(struct net_device *dev)
{
- struct ip6_tnl *t;
-
dev->netdev_ops = &ip6gre_netdev_ops;
dev->destructor = ip6gre_dev_free;
dev->type = ARPHRD_IP6GRE;
- dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr) + 4;
- dev->mtu = ETH_DATA_LEN - sizeof(struct ipv6hdr) - 4;
- t = netdev_priv(dev);
- if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
- dev->mtu -= 8;
+
dev->flags |= IFF_NOARP;
dev->addr_len = sizeof(struct in6_addr);
netif_keep_dst(dev);
@@ -1248,6 +1001,7 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
{
struct ip6_tnl *tunnel;
int ret;
+ int t_hlen;
tunnel = netdev_priv(dev);
@@ -1266,6 +1020,17 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
return ret;
}
+ tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
+ tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
+ t_hlen = tunnel->hlen + sizeof(struct ipv6hdr);
+
+ dev->hard_header_len = LL_MAX_HEADER + t_hlen;
+ dev->mtu = ETH_DATA_LEN - t_hlen;
+ if (dev->type == ARPHRD_ETHER)
+ dev->mtu -= ETH_HLEN;
+ if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ dev->mtu -= 8;
+
return 0;
}
@@ -1304,7 +1069,7 @@ static void ip6gre_fb_tunnel_init(struct net_device *dev)
static struct inet6_protocol ip6gre_protocol __read_mostly = {
- .handler = ip6gre_rcv,
+ .handler = gre_rcv,
.err_handler = ip6gre_err,
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
};
@@ -1448,10 +1213,12 @@ static void ip6gre_netlink_parms(struct nlattr *data[],
parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
if (data[IFLA_GRE_IFLAGS])
- parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
+ parms->i_flags = gre_flags_to_tnl_flags(
+ nla_get_be16(data[IFLA_GRE_IFLAGS]));
if (data[IFLA_GRE_OFLAGS])
- parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
+ parms->o_flags = gre_flags_to_tnl_flags(
+ nla_get_be16(data[IFLA_GRE_OFLAGS]));
if (data[IFLA_GRE_IKEY])
parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
@@ -1487,6 +1254,8 @@ static int ip6gre_tap_init(struct net_device *dev)
if (ret)
return ret;
+ dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+
tunnel = netdev_priv(dev);
ip6gre_tnl_link_config(tunnel, 1);
@@ -1500,11 +1269,16 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = {
.ndo_start_xmit = ip6gre_tunnel_xmit,
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
- .ndo_change_mtu = ip6gre_tunnel_change_mtu,
+ .ndo_change_mtu = ip6_tnl_change_mtu,
.ndo_get_stats64 = ip_tunnel_get_stats64,
.ndo_get_iflink = ip6_tnl_get_iflink,
};
+#define GRE6_FEATURES (NETIF_F_SG | \
+ NETIF_F_FRAGLIST | \
+ NETIF_F_HIGHDMA | \
+ NETIF_F_HW_CSUM)
+
static void ip6gre_tap_setup(struct net_device *dev)
{
@@ -1515,6 +1289,40 @@ static void ip6gre_tap_setup(struct net_device *dev)
dev->features |= NETIF_F_NETNS_LOCAL;
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+ dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+}
+
+static bool ip6gre_netlink_encap_parms(struct nlattr *data[],
+ struct ip_tunnel_encap *ipencap)
+{
+ bool ret = false;
+
+ memset(ipencap, 0, sizeof(*ipencap));
+
+ if (!data)
+ return ret;
+
+ if (data[IFLA_GRE_ENCAP_TYPE]) {
+ ret = true;
+ ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
+ }
+
+ if (data[IFLA_GRE_ENCAP_FLAGS]) {
+ ret = true;
+ ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
+ }
+
+ if (data[IFLA_GRE_ENCAP_SPORT]) {
+ ret = true;
+ ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
+ }
+
+ if (data[IFLA_GRE_ENCAP_DPORT]) {
+ ret = true;
+ ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
+ }
+
+ return ret;
}
static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
@@ -1523,9 +1331,18 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
struct ip6_tnl *nt;
struct net *net = dev_net(dev);
struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+ struct ip_tunnel_encap ipencap;
int err;
nt = netdev_priv(dev);
+
+ if (ip6gre_netlink_encap_parms(data, &ipencap)) {
+ int err = ip6_tnl_encap_setup(nt, &ipencap);
+
+ if (err < 0)
+ return err;
+ }
+
ip6gre_netlink_parms(data, &nt->parms);
if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
@@ -1538,9 +1355,25 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
nt->net = dev_net(dev);
ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
- /* Can use a lockless transmit, unless we generate output sequences */
- if (!(nt->parms.o_flags & GRE_SEQ))
+ dev->features |= GRE6_FEATURES;
+ dev->hw_features |= GRE6_FEATURES;
+
+ if (!(nt->parms.o_flags & TUNNEL_SEQ)) {
+ /* TCP offload with GRE SEQ is not supported, nor
+ * can we support 2 levels of outer headers requiring
+ * an update.
+ */
+ if (!(nt->parms.o_flags & TUNNEL_CSUM) ||
+ (nt->encap.type == TUNNEL_ENCAP_NONE)) {
+ dev->features |= NETIF_F_GSO_SOFTWARE;
+ dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+ }
+
+ /* Can use a lockless transmit, unless we generate
+ * output sequences
+ */
dev->features |= NETIF_F_LLTX;
+ }
err = register_netdevice(dev);
if (err)
@@ -1560,10 +1393,18 @@ static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[],
struct net *net = nt->net;
struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
struct __ip6_tnl_parm p;
+ struct ip_tunnel_encap ipencap;
if (dev == ign->fb_tunnel_dev)
return -EINVAL;
+ if (ip6gre_netlink_encap_parms(data, &ipencap)) {
+ int err = ip6_tnl_encap_setup(nt, &ipencap);
+
+ if (err < 0)
+ return err;
+ }
+
ip6gre_netlink_parms(data, &p);
t = ip6gre_tunnel_locate(net, &p, 0);
@@ -1609,14 +1450,20 @@ static size_t ip6gre_get_size(const struct net_device *dev)
nla_total_size(sizeof(struct in6_addr)) +
/* IFLA_GRE_TTL */
nla_total_size(1) +
- /* IFLA_GRE_TOS */
- nla_total_size(1) +
/* IFLA_GRE_ENCAP_LIMIT */
nla_total_size(1) +
/* IFLA_GRE_FLOWINFO */
nla_total_size(4) +
/* IFLA_GRE_FLAGS */
nla_total_size(4) +
+ /* IFLA_GRE_ENCAP_TYPE */
+ nla_total_size(2) +
+ /* IFLA_GRE_ENCAP_FLAGS */
+ nla_total_size(2) +
+ /* IFLA_GRE_ENCAP_SPORT */
+ nla_total_size(2) +
+ /* IFLA_GRE_ENCAP_DPORT */
+ nla_total_size(2) +
0;
}
@@ -1626,18 +1473,30 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
struct __ip6_tnl_parm *p = &t->parms;
if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
- nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) ||
- nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) ||
+ nla_put_be16(skb, IFLA_GRE_IFLAGS,
+ gre_tnl_flags_to_gre_flags(p->i_flags)) ||
+ nla_put_be16(skb, IFLA_GRE_OFLAGS,
+ gre_tnl_flags_to_gre_flags(p->o_flags)) ||
nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
nla_put_in6_addr(skb, IFLA_GRE_LOCAL, &p->laddr) ||
nla_put_in6_addr(skb, IFLA_GRE_REMOTE, &p->raddr) ||
nla_put_u8(skb, IFLA_GRE_TTL, p->hop_limit) ||
- /*nla_put_u8(skb, IFLA_GRE_TOS, t->priority) ||*/
nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) ||
nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) ||
nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags))
goto nla_put_failure;
+
+ if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
+ t->encap.type) ||
+ nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
+ t->encap.sport) ||
+ nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
+ t->encap.dport) ||
+ nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
+ t->encap.flags))
+ goto nla_put_failure;
+
return 0;
nla_put_failure:
@@ -1656,6 +1515,10 @@ static const struct nla_policy ip6gre_policy[IFLA_GRE_MAX + 1] = {
[IFLA_GRE_ENCAP_LIMIT] = { .type = NLA_U8 },
[IFLA_GRE_FLOWINFO] = { .type = NLA_U32 },
[IFLA_GRE_FLAGS] = { .type = NLA_U32 },
+ [IFLA_GRE_ENCAP_TYPE] = { .type = NLA_U16 },
+ [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 },
+ [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 },
+ [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 },
};
static struct rtnl_link_ops ip6gre_link_ops __read_mostly = {
diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c
index 14dacc544c3e..713676f14a0e 100644
--- a/net/ipv6/ip6_icmp.c
+++ b/net/ipv6/ip6_icmp.c
@@ -39,7 +39,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
if (!send)
goto out;
- send(skb, type, code, info);
+ send(skb, type, code, info, NULL);
out:
rcu_read_unlock();
}
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index c05c425c2389..aacfb4bce153 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -49,6 +49,13 @@
int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
+ /* if ingress device is enslaved to an L3 master device pass the
+ * skb to its handler for processing
+ */
+ skb = l3mdev_ip6_rcv(skb);
+ if (!skb)
+ return NET_RX_SUCCESS;
+
if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) {
const struct inet6_protocol *ipprot;
@@ -78,11 +85,11 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
idev = __in6_dev_get(skb->dev);
- IP6_UPD_PO_STATS_BH(net, idev, IPSTATS_MIB_IN, skb->len);
+ __IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_IN, skb->len);
if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL ||
!idev || unlikely(idev->cnf.disable_ipv6)) {
- IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDISCARDS);
+ __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
goto drop;
}
@@ -109,10 +116,10 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
if (hdr->version != 6)
goto err;
- IP6_ADD_STATS_BH(net, idev,
- IPSTATS_MIB_NOECTPKTS +
+ __IP6_ADD_STATS(net, idev,
+ IPSTATS_MIB_NOECTPKTS +
(ipv6_get_dsfield(hdr) & INET_ECN_MASK),
- max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
+ max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
/*
* RFC4291 2.5.3
* A packet received on an interface with a destination address
@@ -169,12 +176,12 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
/* pkt_len may be zero if Jumbo payload option is present */
if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) {
if (pkt_len + sizeof(struct ipv6hdr) > skb->len) {
- IP6_INC_STATS_BH(net,
- idev, IPSTATS_MIB_INTRUNCATEDPKTS);
+ __IP6_INC_STATS(net,
+ idev, IPSTATS_MIB_INTRUNCATEDPKTS);
goto drop;
}
if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) {
- IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
goto drop;
}
hdr = ipv6_hdr(skb);
@@ -182,7 +189,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
if (hdr->nexthdr == NEXTHDR_HOP) {
if (ipv6_parse_hopopts(skb) < 0) {
- IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
rcu_read_unlock();
return NET_RX_DROP;
}
@@ -197,7 +204,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
net, NULL, skb, dev, NULL,
ip6_rcv_finish);
err:
- IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
drop:
rcu_read_unlock();
kfree_skb(skb);
@@ -216,6 +223,7 @@ static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *sk
unsigned int nhoff;
int nexthdr;
bool raw;
+ bool have_final = false;
/*
* Parse extension headers
@@ -229,14 +237,27 @@ resubmit:
nhoff = IP6CB(skb)->nhoff;
nexthdr = skb_network_header(skb)[nhoff];
+resubmit_final:
raw = raw6_local_deliver(skb, nexthdr);
ipprot = rcu_dereference(inet6_protos[nexthdr]);
if (ipprot) {
int ret;
- if (ipprot->flags & INET6_PROTO_FINAL) {
+ if (have_final) {
+ if (!(ipprot->flags & INET6_PROTO_FINAL)) {
+ /* Once we've seen a final protocol don't
+ * allow encapsulation on any non-final
+ * ones. This allows foo in UDP encapsulation
+ * to work.
+ */
+ goto discard;
+ }
+ } else if (ipprot->flags & INET6_PROTO_FINAL) {
const struct ipv6hdr *hdr;
+ /* Only do this once for first final protocol */
+ have_final = true;
+
/* Free reference early: we don't need it any more,
and it may hold ip_conntrack module loaded
indefinitely. */
@@ -256,21 +277,32 @@ resubmit:
goto discard;
ret = ipprot->handler(skb);
- if (ret > 0)
- goto resubmit;
- else if (ret == 0)
- IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDELIVERS);
+ if (ret > 0) {
+ if (ipprot->flags & INET6_PROTO_FINAL) {
+ /* Not an extension header, most likely UDP
+ * encapsulation. Use return value as nexthdr
+ * protocol not nhoff (which presumably is
+ * not set by handler).
+ */
+ nexthdr = ret;
+ goto resubmit_final;
+ } else {
+ goto resubmit;
+ }
+ } else if (ret == 0) {
+ __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDELIVERS);
+ }
} else {
if (!raw) {
if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
- IP6_INC_STATS_BH(net, idev,
- IPSTATS_MIB_INUNKNOWNPROTOS);
+ __IP6_INC_STATS(net, idev,
+ IPSTATS_MIB_INUNKNOWNPROTOS);
icmpv6_send(skb, ICMPV6_PARAMPROB,
ICMPV6_UNK_NEXTHDR, nhoff);
}
kfree_skb(skb);
} else {
- IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDELIVERS);
+ __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDELIVERS);
consume_skb(skb);
}
}
@@ -278,7 +310,7 @@ resubmit:
return 0;
discard:
- IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDISCARDS);
+ __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
rcu_read_unlock();
kfree_skb(skb);
return 0;
@@ -291,13 +323,14 @@ int ip6_input(struct sk_buff *skb)
dev_net(skb->dev), NULL, skb, skb->dev, NULL,
ip6_input_finish);
}
+EXPORT_SYMBOL_GPL(ip6_input);
int ip6_mc_input(struct sk_buff *skb)
{
const struct ipv6hdr *hdr;
bool deliver;
- IP6_UPD_PO_STATS_BH(dev_net(skb_dst(skb)->dev),
+ __IP6_UPD_PO_STATS(dev_net(skb_dst(skb)->dev),
ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INMCAST,
skb->len);
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 82e9f3076028..22e90e56b5a9 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -16,6 +16,7 @@
#include <net/protocol.h>
#include <net/ipv6.h>
+#include <net/inet_common.h>
#include "ip6_offload.h"
@@ -63,27 +64,12 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
int proto;
struct frag_hdr *fptr;
unsigned int unfrag_ip6hlen;
+ unsigned int payload_len;
u8 *prevhdr;
int offset = 0;
bool encap, udpfrag;
int nhoff;
- if (unlikely(skb_shinfo(skb)->gso_type &
- ~(SKB_GSO_TCPV4 |
- SKB_GSO_UDP |
- SKB_GSO_DODGY |
- SKB_GSO_TCP_ECN |
- SKB_GSO_GRE |
- SKB_GSO_GRE_CSUM |
- SKB_GSO_IPIP |
- SKB_GSO_SIT |
- SKB_GSO_UDP_TUNNEL |
- SKB_GSO_UDP_TUNNEL_CSUM |
- SKB_GSO_TUNNEL_REMCSUM |
- SKB_GSO_TCPV6 |
- 0)))
- goto out;
-
skb_reset_network_header(skb);
nhoff = skb_network_header(skb) - skb_mac_header(skb);
if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
@@ -101,7 +87,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
proto = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr);
if (skb->encapsulation &&
- skb_shinfo(skb)->gso_type & (SKB_GSO_SIT|SKB_GSO_IPIP))
+ skb_shinfo(skb)->gso_type & (SKB_GSO_IPXIP4 | SKB_GSO_IPXIP6))
udpfrag = proto == IPPROTO_UDP && encap;
else
udpfrag = proto == IPPROTO_UDP && !skb->encapsulation;
@@ -117,7 +103,13 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
for (skb = segs; skb; skb = skb->next) {
ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff);
- ipv6h->payload_len = htons(skb->len - nhoff - sizeof(*ipv6h));
+ if (skb_is_gso(skb))
+ payload_len = skb_shinfo(skb)->gso_size +
+ SKB_GSO_CB(skb)->data_offset +
+ skb->head - (unsigned char *)(ipv6h + 1);
+ else
+ payload_len = skb->len - nhoff - sizeof(*ipv6h);
+ ipv6h->payload_len = htons(payload_len);
skb->network_header = (u8 *)ipv6h - skb->head;
if (udpfrag) {
@@ -239,10 +231,14 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
NAPI_GRO_CB(p)->flush |= !!(first_word & htonl(0x0FF00000));
NAPI_GRO_CB(p)->flush |= flush;
- /* Clear flush_id, there's really no concept of ID in IPv6. */
- NAPI_GRO_CB(p)->flush_id = 0;
+ /* If the previous IP ID value was based on an atomic
+ * datagram we can overwrite the value and ignore it.
+ */
+ if (NAPI_GRO_CB(skb)->is_atomic)
+ NAPI_GRO_CB(p)->flush_id = 0;
}
+ NAPI_GRO_CB(skb)->is_atomic = true;
NAPI_GRO_CB(skb)->flush |= flush;
skb_gro_postpull_rcsum(skb, iph, nlen);
@@ -258,9 +254,11 @@ out:
return pp;
}
-static struct sk_buff **sit_gro_receive(struct sk_buff **head,
- struct sk_buff *skb)
+static struct sk_buff **sit_ip6ip6_gro_receive(struct sk_buff **head,
+ struct sk_buff *skb)
{
+ /* Common GRO receive for SIT and IP6IP6 */
+
if (NAPI_GRO_CB(skb)->encap_mark) {
NAPI_GRO_CB(skb)->flush = 1;
return NULL;
@@ -271,6 +269,21 @@ static struct sk_buff **sit_gro_receive(struct sk_buff **head,
return ipv6_gro_receive(head, skb);
}
+static struct sk_buff **ip4ip6_gro_receive(struct sk_buff **head,
+ struct sk_buff *skb)
+{
+ /* Common GRO receive for SIT and IP6IP6 */
+
+ if (NAPI_GRO_CB(skb)->encap_mark) {
+ NAPI_GRO_CB(skb)->flush = 1;
+ return NULL;
+ }
+
+ NAPI_GRO_CB(skb)->encap_mark = 1;
+
+ return inet_gro_receive(head, skb);
+}
+
static int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
{
const struct net_offload *ops;
@@ -299,10 +312,24 @@ out_unlock:
static int sit_gro_complete(struct sk_buff *skb, int nhoff)
{
skb->encapsulation = 1;
- skb_shinfo(skb)->gso_type |= SKB_GSO_SIT;
+ skb_shinfo(skb)->gso_type |= SKB_GSO_IPXIP4;
+ return ipv6_gro_complete(skb, nhoff);
+}
+
+static int ip6ip6_gro_complete(struct sk_buff *skb, int nhoff)
+{
+ skb->encapsulation = 1;
+ skb_shinfo(skb)->gso_type |= SKB_GSO_IPXIP6;
return ipv6_gro_complete(skb, nhoff);
}
+static int ip4ip6_gro_complete(struct sk_buff *skb, int nhoff)
+{
+ skb->encapsulation = 1;
+ skb_shinfo(skb)->gso_type |= SKB_GSO_IPXIP6;
+ return inet_gro_complete(skb, nhoff);
+}
+
static struct packet_offload ipv6_packet_offload __read_mostly = {
.type = cpu_to_be16(ETH_P_IPV6),
.callbacks = {
@@ -315,24 +342,39 @@ static struct packet_offload ipv6_packet_offload __read_mostly = {
static const struct net_offload sit_offload = {
.callbacks = {
.gso_segment = ipv6_gso_segment,
- .gro_receive = sit_gro_receive,
+ .gro_receive = sit_ip6ip6_gro_receive,
.gro_complete = sit_gro_complete,
},
};
+static const struct net_offload ip4ip6_offload = {
+ .callbacks = {
+ .gso_segment = inet_gso_segment,
+ .gro_receive = ip4ip6_gro_receive,
+ .gro_complete = ip4ip6_gro_complete,
+ },
+};
+
+static const struct net_offload ip6ip6_offload = {
+ .callbacks = {
+ .gso_segment = ipv6_gso_segment,
+ .gro_receive = sit_ip6ip6_gro_receive,
+ .gro_complete = ip6ip6_gro_complete,
+ },
+};
static int __init ipv6_offload_init(void)
{
if (tcpv6_offload_init() < 0)
pr_crit("%s: Cannot add TCP protocol offload\n", __func__);
- if (udp_offload_init() < 0)
- pr_crit("%s: Cannot add UDP protocol offload\n", __func__);
if (ipv6_exthdrs_offload_init() < 0)
pr_crit("%s: Cannot add EXTHDRS protocol offload\n", __func__);
dev_add_offload(&ipv6_packet_offload);
inet_add_offload(&sit_offload, IPPROTO_IPV6);
+ inet6_add_offload(&ip6ip6_offload, IPPROTO_IPV6);
+ inet6_add_offload(&ip4ip6_offload, IPPROTO_IPIP);
return 0;
}
diff --git a/net/ipv6/ip6_offload.h b/net/ipv6/ip6_offload.h
index 2e155c651b35..96b40e41ac53 100644
--- a/net/ipv6/ip6_offload.h
+++ b/net/ipv6/ip6_offload.h
@@ -12,7 +12,8 @@
#define __ip6_offload_h
int ipv6_exthdrs_offload_init(void);
-int udp_offload_init(void);
+int udpv6_offload_init(void);
+int udpv6_offload_exit(void);
int tcpv6_offload_init(void);
#endif
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index bc972e7152c7..1dfc402d9ad1 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -368,7 +368,7 @@ static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
if (skb->ignore_df)
return false;
- if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
+ if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
return false;
return true;
@@ -395,8 +395,8 @@ int ip6_forward(struct sk_buff *skb)
goto drop;
if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
- IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
- IPSTATS_MIB_INDISCARDS);
+ __IP6_INC_STATS(net, ip6_dst_idev(dst),
+ IPSTATS_MIB_INDISCARDS);
goto drop;
}
@@ -427,8 +427,8 @@ int ip6_forward(struct sk_buff *skb)
/* Force OUTPUT device used as source address */
skb->dev = dst->dev;
icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
- IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
- IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, ip6_dst_idev(dst),
+ IPSTATS_MIB_INHDRERRORS);
kfree_skb(skb);
return -ETIMEDOUT;
@@ -441,15 +441,15 @@ int ip6_forward(struct sk_buff *skb)
if (proxied > 0)
return ip6_input(skb);
else if (proxied < 0) {
- IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
- IPSTATS_MIB_INDISCARDS);
+ __IP6_INC_STATS(net, ip6_dst_idev(dst),
+ IPSTATS_MIB_INDISCARDS);
goto drop;
}
}
if (!xfrm6_route_forward(skb)) {
- IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
- IPSTATS_MIB_INDISCARDS);
+ __IP6_INC_STATS(net, ip6_dst_idev(dst),
+ IPSTATS_MIB_INDISCARDS);
goto drop;
}
dst = skb_dst(skb);
@@ -505,17 +505,17 @@ int ip6_forward(struct sk_buff *skb)
/* Again, force OUTPUT device used as source address */
skb->dev = dst->dev;
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
- IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
- IPSTATS_MIB_INTOOBIGERRORS);
- IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
- IPSTATS_MIB_FRAGFAILS);
+ __IP6_INC_STATS(net, ip6_dst_idev(dst),
+ IPSTATS_MIB_INTOOBIGERRORS);
+ __IP6_INC_STATS(net, ip6_dst_idev(dst),
+ IPSTATS_MIB_FRAGFAILS);
kfree_skb(skb);
return -EMSGSIZE;
}
if (skb_cow(skb, dst->dev->hard_header_len)) {
- IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
- IPSTATS_MIB_OUTDISCARDS);
+ __IP6_INC_STATS(net, ip6_dst_idev(dst),
+ IPSTATS_MIB_OUTDISCARDS);
goto drop;
}
@@ -525,14 +525,14 @@ int ip6_forward(struct sk_buff *skb)
hdr->hop_limit--;
- IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
- IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
+ __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
+ __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
net, NULL, skb, skb->dev, dst->dev,
ip6_forward_finish);
error:
- IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
+ __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
drop:
kfree_skb(skb);
return -EINVAL;
@@ -910,6 +910,13 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
int err;
int flags = 0;
+ if (ipv6_addr_any(&fl6->saddr) && fl6->flowi6_oif &&
+ (!*dst || !(*dst)->error)) {
+ err = l3mdev_get_saddr6(net, sk, fl6);
+ if (err)
+ goto out_err;
+ }
+
/* The correct way to handle this would be to do
* ip6_route_get_saddr, and then ip6_route_output; however,
* the route-specific preferred source forces the
@@ -999,10 +1006,11 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
return 0;
out_err_release:
- if (err == -ENETUNREACH)
- IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
dst_release(*dst);
*dst = NULL;
+out_err:
+ if (err == -ENETUNREACH)
+ IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
return err;
}
@@ -1071,17 +1079,12 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
const struct in6_addr *final_dst)
{
struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
- int err;
dst = ip6_sk_dst_check(sk, dst, fl6);
+ if (!dst)
+ dst = ip6_dst_lookup_flow(sk, fl6, final_dst);
- err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
- if (err)
- return ERR_PTR(err);
- if (final_dst)
- fl6->daddr = *final_dst;
-
- return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
+ return dst;
}
EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
@@ -1182,12 +1185,12 @@ static void ip6_append_data_mtu(unsigned int *mtu,
}
static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
- struct inet6_cork *v6_cork,
- int hlimit, int tclass, struct ipv6_txoptions *opt,
+ struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
struct rt6_info *rt, struct flowi6 *fl6)
{
struct ipv6_pinfo *np = inet6_sk(sk);
unsigned int mtu;
+ struct ipv6_txoptions *opt = ipc6->opt;
/*
* setup for corking
@@ -1229,8 +1232,8 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
dst_hold(&rt->dst);
cork->base.dst = &rt->dst;
cork->fl.u.ip6 = *fl6;
- v6_cork->hop_limit = hlimit;
- v6_cork->tclass = tclass;
+ v6_cork->hop_limit = ipc6->hlimit;
+ v6_cork->tclass = ipc6->tclass;
if (rt->dst.flags & DST_XFRM_TUNNEL)
mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
rt->dst.dev->mtu : dst_mtu(&rt->dst);
@@ -1258,7 +1261,8 @@ static int __ip6_append_data(struct sock *sk,
int getfrag(void *from, char *to, int offset,
int len, int odd, struct sk_buff *skb),
void *from, int length, int transhdrlen,
- unsigned int flags, int dontfrag)
+ unsigned int flags, struct ipcm6_cookie *ipc6,
+ const struct sockcm_cookie *sockc)
{
struct sk_buff *skb, *skb_prev = NULL;
unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
@@ -1297,7 +1301,7 @@ static int __ip6_append_data(struct sock *sk,
sizeof(struct frag_hdr) : 0) +
rt->rt6i_nfheader_len;
- if (cork->length + length > mtu - headersize && dontfrag &&
+ if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
(sk->sk_protocol == IPPROTO_UDP ||
sk->sk_protocol == IPPROTO_RAW)) {
ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
@@ -1329,7 +1333,7 @@ emsgsize:
csummode = CHECKSUM_PARTIAL;
if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
- sock_tx_timestamp(sk, &tx_flags);
+ sock_tx_timestamp(sk, sockc->tsflags, &tx_flags);
if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
tskey = sk->sk_tskey++;
@@ -1563,9 +1567,10 @@ error:
int ip6_append_data(struct sock *sk,
int getfrag(void *from, char *to, int offset, int len,
int odd, struct sk_buff *skb),
- void *from, int length, int transhdrlen, int hlimit,
- int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
- struct rt6_info *rt, unsigned int flags, int dontfrag)
+ void *from, int length, int transhdrlen,
+ struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
+ struct rt6_info *rt, unsigned int flags,
+ const struct sockcm_cookie *sockc)
{
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -1578,12 +1583,12 @@ int ip6_append_data(struct sock *sk,
/*
* setup for corking
*/
- err = ip6_setup_cork(sk, &inet->cork, &np->cork, hlimit,
- tclass, opt, rt, fl6);
+ err = ip6_setup_cork(sk, &inet->cork, &np->cork,
+ ipc6, rt, fl6);
if (err)
return err;
- exthdrlen = (opt ? opt->opt_flen : 0);
+ exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
length += exthdrlen;
transhdrlen += exthdrlen;
} else {
@@ -1593,7 +1598,7 @@ int ip6_append_data(struct sock *sk,
return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
&np->cork, sk_page_frag(sk), getfrag,
- from, length, transhdrlen, flags, dontfrag);
+ from, length, transhdrlen, flags, ipc6, sockc);
}
EXPORT_SYMBOL_GPL(ip6_append_data);
@@ -1749,15 +1754,14 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
int getfrag(void *from, char *to, int offset,
int len, int odd, struct sk_buff *skb),
void *from, int length, int transhdrlen,
- int hlimit, int tclass,
- struct ipv6_txoptions *opt, struct flowi6 *fl6,
+ struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
struct rt6_info *rt, unsigned int flags,
- int dontfrag)
+ const struct sockcm_cookie *sockc)
{
struct inet_cork_full cork;
struct inet6_cork v6_cork;
struct sk_buff_head queue;
- int exthdrlen = (opt ? opt->opt_flen : 0);
+ int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
int err;
if (flags & MSG_PROBE)
@@ -1769,17 +1773,17 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
cork.base.addr = 0;
cork.base.opt = NULL;
v6_cork.opt = NULL;
- err = ip6_setup_cork(sk, &cork, &v6_cork, hlimit, tclass, opt, rt, fl6);
+ err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6);
if (err)
return ERR_PTR(err);
- if (dontfrag < 0)
- dontfrag = inet6_sk(sk)->dontfrag;
+ if (ipc6->dontfrag < 0)
+ ipc6->dontfrag = inet6_sk(sk)->dontfrag;
err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
&current->task_frag, getfrag, from,
length + exthdrlen, transhdrlen + exthdrlen,
- flags, dontfrag);
+ flags, ipc6, sockc);
if (err) {
__ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
return ERR_PTR(err);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 1f20345cbc97..888543debe4e 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -238,6 +238,7 @@ static void ip6_dev_free(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
+ gro_cells_destroy(&t->gro_cells);
dst_cache_destroy(&t->dst_cache);
free_percpu(dev->tstats);
free_netdev(dev);
@@ -753,97 +754,157 @@ int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
}
EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl);
-/**
- * ip6_tnl_rcv - decapsulate IPv6 packet and retransmit it locally
- * @skb: received socket buffer
- * @protocol: ethernet protocol ID
- * @dscp_ecn_decapsulate: the function to decapsulate DSCP code and ECN
- *
- * Return: 0
- **/
-
-static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
- __u8 ipproto,
- int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t,
- const struct ipv6hdr *ipv6h,
- struct sk_buff *skb))
+static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
+ const struct tnl_ptk_info *tpi,
+ struct metadata_dst *tun_dst,
+ int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t,
+ const struct ipv6hdr *ipv6h,
+ struct sk_buff *skb),
+ bool log_ecn_err)
{
- struct ip6_tnl *t;
+ struct pcpu_sw_netstats *tstats;
const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
- u8 tproto;
int err;
- rcu_read_lock();
- t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr);
- if (t) {
- struct pcpu_sw_netstats *tstats;
+ if ((!(tpi->flags & TUNNEL_CSUM) &&
+ (tunnel->parms.i_flags & TUNNEL_CSUM)) ||
+ ((tpi->flags & TUNNEL_CSUM) &&
+ !(tunnel->parms.i_flags & TUNNEL_CSUM))) {
+ tunnel->dev->stats.rx_crc_errors++;
+ tunnel->dev->stats.rx_errors++;
+ goto drop;
+ }
- tproto = ACCESS_ONCE(t->parms.proto);
- if (tproto != ipproto && tproto != 0) {
- rcu_read_unlock();
- goto discard;
+ if (tunnel->parms.i_flags & TUNNEL_SEQ) {
+ if (!(tpi->flags & TUNNEL_SEQ) ||
+ (tunnel->i_seqno &&
+ (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
+ tunnel->dev->stats.rx_fifo_errors++;
+ tunnel->dev->stats.rx_errors++;
+ goto drop;
}
+ tunnel->i_seqno = ntohl(tpi->seq) + 1;
+ }
- if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
- rcu_read_unlock();
- goto discard;
- }
+ skb->protocol = tpi->proto;
- if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) {
- t->dev->stats.rx_dropped++;
- rcu_read_unlock();
- goto discard;
+ /* Warning: All skb pointers will be invalidated! */
+ if (tunnel->dev->type == ARPHRD_ETHER) {
+ if (!pskb_may_pull(skb, ETH_HLEN)) {
+ tunnel->dev->stats.rx_length_errors++;
+ tunnel->dev->stats.rx_errors++;
+ goto drop;
}
- skb->mac_header = skb->network_header;
- skb_reset_network_header(skb);
- skb->protocol = htons(protocol);
- memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
-
- __skb_tunnel_rx(skb, t->dev, t->net);
-
- err = dscp_ecn_decapsulate(t, ipv6h, skb);
- if (unlikely(err)) {
- if (log_ecn_error)
- net_info_ratelimited("non-ECT from %pI6 with dsfield=%#x\n",
- &ipv6h->saddr,
- ipv6_get_dsfield(ipv6h));
- if (err > 1) {
- ++t->dev->stats.rx_frame_errors;
- ++t->dev->stats.rx_errors;
- rcu_read_unlock();
- goto discard;
- }
+
+ ipv6h = ipv6_hdr(skb);
+ skb->protocol = eth_type_trans(skb, tunnel->dev);
+ skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+ } else {
+ skb->dev = tunnel->dev;
+ }
+
+ skb_reset_network_header(skb);
+ memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
+
+ __skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
+
+ err = dscp_ecn_decapsulate(tunnel, ipv6h, skb);
+ if (unlikely(err)) {
+ if (log_ecn_err)
+ net_info_ratelimited("non-ECT from %pI6 with DS=%#x\n",
+ &ipv6h->saddr,
+ ipv6_get_dsfield(ipv6h));
+ if (err > 1) {
+ ++tunnel->dev->stats.rx_frame_errors;
+ ++tunnel->dev->stats.rx_errors;
+ goto drop;
}
+ }
- tstats = this_cpu_ptr(t->dev->tstats);
- u64_stats_update_begin(&tstats->syncp);
- tstats->rx_packets++;
- tstats->rx_bytes += skb->len;
- u64_stats_update_end(&tstats->syncp);
+ tstats = this_cpu_ptr(tunnel->dev->tstats);
+ u64_stats_update_begin(&tstats->syncp);
+ tstats->rx_packets++;
+ tstats->rx_bytes += skb->len;
+ u64_stats_update_end(&tstats->syncp);
- netif_rx(skb);
+ skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
- rcu_read_unlock();
- return 0;
+ gro_cells_receive(&tunnel->gro_cells, skb);
+ return 0;
+
+drop:
+ kfree_skb(skb);
+ return 0;
+}
+
+int ip6_tnl_rcv(struct ip6_tnl *t, struct sk_buff *skb,
+ const struct tnl_ptk_info *tpi,
+ struct metadata_dst *tun_dst,
+ bool log_ecn_err)
+{
+ return __ip6_tnl_rcv(t, skb, tpi, NULL, ip6ip6_dscp_ecn_decapsulate,
+ log_ecn_err);
+}
+EXPORT_SYMBOL(ip6_tnl_rcv);
+
+static const struct tnl_ptk_info tpi_v6 = {
+ /* no tunnel info required for ipxip6. */
+ .proto = htons(ETH_P_IPV6),
+};
+
+static const struct tnl_ptk_info tpi_v4 = {
+ /* no tunnel info required for ipxip6. */
+ .proto = htons(ETH_P_IP),
+};
+
+static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto,
+ const struct tnl_ptk_info *tpi,
+ int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t,
+ const struct ipv6hdr *ipv6h,
+ struct sk_buff *skb))
+{
+ struct ip6_tnl *t;
+ const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ int ret = -1;
+
+ rcu_read_lock();
+ t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr);
+
+ if (t) {
+ u8 tproto = ACCESS_ONCE(t->parms.proto);
+
+ if (tproto != ipproto && tproto != 0)
+ goto drop;
+ if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
+ goto drop;
+ if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr))
+ goto drop;
+ if (iptunnel_pull_header(skb, 0, tpi->proto, false))
+ goto drop;
+ ret = __ip6_tnl_rcv(t, skb, tpi, NULL, dscp_ecn_decapsulate,
+ log_ecn_error);
}
+
rcu_read_unlock();
- return 1;
-discard:
+ return ret;
+
+drop:
+ rcu_read_unlock();
kfree_skb(skb);
return 0;
}
static int ip4ip6_rcv(struct sk_buff *skb)
{
- return ip6_tnl_rcv(skb, ETH_P_IP, IPPROTO_IPIP,
- ip4ip6_dscp_ecn_decapsulate);
+ return ipxip6_rcv(skb, IPPROTO_IPIP, &tpi_v4,
+ ip4ip6_dscp_ecn_decapsulate);
}
static int ip6ip6_rcv(struct sk_buff *skb)
{
- return ip6_tnl_rcv(skb, ETH_P_IPV6, IPPROTO_IPV6,
- ip6ip6_dscp_ecn_decapsulate);
+ return ipxip6_rcv(skb, IPPROTO_IPV6, &tpi_v6,
+ ip6ip6_dscp_ecn_decapsulate);
}
struct ipv6_tel_txoption {
@@ -918,13 +979,14 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
EXPORT_SYMBOL_GPL(ip6_tnl_xmit_ctl);
/**
- * ip6_tnl_xmit2 - encapsulate packet and send
+ * ip6_tnl_xmit - encapsulate packet and send
* @skb: the outgoing socket buffer
* @dev: the outgoing tunnel device
* @dsfield: dscp code for outer header
- * @fl: flow of tunneled packet
+ * @fl6: flow of tunneled packet
* @encap_limit: encapsulation limit
* @pmtu: Path MTU is stored if packet is too big
+ * @proto: next header value
*
* Description:
* Build new header and do some sanity checks on the packet before sending
@@ -936,12 +998,9 @@ EXPORT_SYMBOL_GPL(ip6_tnl_xmit_ctl);
* %-EMSGSIZE message too big. return mtu in this case.
**/
-static int ip6_tnl_xmit2(struct sk_buff *skb,
- struct net_device *dev,
- __u8 dsfield,
- struct flowi6 *fl6,
- int encap_limit,
- __u32 *pmtu)
+int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
+ struct flowi6 *fl6, int encap_limit, __u32 *pmtu,
+ __u8 proto)
{
struct ip6_tnl *t = netdev_priv(dev);
struct net *net = t->net;
@@ -951,8 +1010,8 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
struct dst_entry *dst = NULL, *ndst = NULL;
struct net_device *tdev;
int mtu;
- unsigned int max_headroom = sizeof(struct ipv6hdr);
- u8 proto;
+ unsigned int psh_hlen = sizeof(struct ipv6hdr) + t->encap_hlen;
+ unsigned int max_headroom = psh_hlen;
int err = -1;
/* NBMA tunnel */
@@ -1005,7 +1064,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
t->parms.name);
goto tx_err_dst_release;
}
- mtu = dst_mtu(dst) - sizeof(*ipv6h);
+ mtu = dst_mtu(dst) - psh_hlen;
if (encap_limit >= 0) {
max_headroom += 8;
mtu -= 8;
@@ -1014,12 +1073,23 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
mtu = IPV6_MIN_MTU;
if (skb_dst(skb))
skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
- if (skb->len > mtu) {
+ if (skb->len > mtu && !skb_is_gso(skb)) {
*pmtu = mtu;
err = -EMSGSIZE;
goto tx_err_dst_release;
}
+ if (t->err_count > 0) {
+ if (time_before(jiffies,
+ t->err_time + IP6TUNNEL_ERR_TIMEO)) {
+ t->err_count--;
+
+ dst_link_failure(skb);
+ } else {
+ t->err_count = 0;
+ }
+ }
+
skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev)));
/*
@@ -1045,18 +1115,22 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr);
skb_dst_set(skb, dst);
- skb->transport_header = skb->network_header;
-
- proto = fl6->flowi6_proto;
if (encap_limit >= 0) {
init_tel_txopt(&opt, encap_limit);
ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
}
- if (likely(!skb->encapsulation)) {
- skb_reset_inner_headers(skb);
- skb->encapsulation = 1;
- }
+ /* Calculate max headroom for all the headers and adjust
+ * needed_headroom if necessary.
+ */
+ max_headroom = LL_RESERVED_SPACE(dst->dev) + sizeof(struct ipv6hdr)
+ + dst->header_len + t->hlen;
+ if (max_headroom > dev->needed_headroom)
+ dev->needed_headroom = max_headroom;
+
+ err = ip6_tnl_encap(skb, t, &proto, fl6);
+ if (err)
+ return err;
skb_push(skb, sizeof(struct ipv6hdr));
skb_reset_network_header(skb);
@@ -1076,6 +1150,7 @@ tx_err_dst_release:
dst_release(dst);
return err;
}
+EXPORT_SYMBOL(ip6_tnl_xmit);
static inline int
ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -1109,7 +1184,13 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
fl6.flowi6_mark = skb->mark;
- err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
+ if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
+ return -1;
+
+ skb_set_inner_ipproto(skb, IPPROTO_IPIP);
+
+ err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
+ IPPROTO_IPIP);
if (err != 0) {
/* XXX: send ICMP error even if DF is not set. */
if (err == -EMSGSIZE)
@@ -1163,7 +1244,13 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
fl6.flowi6_mark = skb->mark;
- err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
+ if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
+ return -1;
+
+ skb_set_inner_ipproto(skb, IPPROTO_IPV6);
+
+ err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
+ IPPROTO_IPV6);
if (err != 0) {
if (err == -EMSGSIZE)
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
@@ -1174,7 +1261,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
}
static netdev_tx_t
-ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
+ip6_tnl_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
struct net_device_stats *stats = &t->dev->stats;
@@ -1208,6 +1295,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
struct net_device *dev = t->dev;
struct __ip6_tnl_parm *p = &t->parms;
struct flowi6 *fl6 = &t->fl.u.ip6;
+ int t_hlen;
memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
@@ -1231,6 +1319,10 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
else
dev->flags &= ~IFF_POINTOPOINT;
+ t->tun_hlen = 0;
+ t->hlen = t->encap_hlen + t->tun_hlen;
+ t_hlen = t->hlen + sizeof(struct ipv6hdr);
+
if (p->flags & IP6_TNL_F_CAP_XMIT) {
int strict = (ipv6_addr_type(&p->raddr) &
(IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL));
@@ -1244,9 +1336,9 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
if (rt->dst.dev) {
dev->hard_header_len = rt->dst.dev->hard_header_len +
- sizeof(struct ipv6hdr);
+ t_hlen;
- dev->mtu = rt->dst.dev->mtu - sizeof(struct ipv6hdr);
+ dev->mtu = rt->dst.dev->mtu - t_hlen;
if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
dev->mtu -= 8;
@@ -1370,6 +1462,8 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
struct net *net = t->net;
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
+ memset(&p1, 0, sizeof(p1));
+
switch (cmd) {
case SIOCGETTUNNEL:
if (dev == ip6n->fb_tnl_dev) {
@@ -1464,8 +1558,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
* %-EINVAL if mtu too small
**/
-static int
-ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
+int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
{
struct ip6_tnl *tnl = netdev_priv(dev);
@@ -1481,6 +1574,7 @@ ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
dev->mtu = new_mtu;
return 0;
}
+EXPORT_SYMBOL(ip6_tnl_change_mtu);
int ip6_tnl_get_iflink(const struct net_device *dev)
{
@@ -1490,16 +1584,74 @@ int ip6_tnl_get_iflink(const struct net_device *dev)
}
EXPORT_SYMBOL(ip6_tnl_get_iflink);
+int ip6_tnl_encap_add_ops(const struct ip6_tnl_encap_ops *ops,
+ unsigned int num)
+{
+ if (num >= MAX_IPTUN_ENCAP_OPS)
+ return -ERANGE;
+
+ return !cmpxchg((const struct ip6_tnl_encap_ops **)
+ &ip6tun_encaps[num],
+ NULL, ops) ? 0 : -1;
+}
+EXPORT_SYMBOL(ip6_tnl_encap_add_ops);
+
+int ip6_tnl_encap_del_ops(const struct ip6_tnl_encap_ops *ops,
+ unsigned int num)
+{
+ int ret;
+
+ if (num >= MAX_IPTUN_ENCAP_OPS)
+ return -ERANGE;
+
+ ret = (cmpxchg((const struct ip6_tnl_encap_ops **)
+ &ip6tun_encaps[num],
+ ops, NULL) == ops) ? 0 : -1;
+
+ synchronize_net();
+
+ return ret;
+}
+EXPORT_SYMBOL(ip6_tnl_encap_del_ops);
+
+int ip6_tnl_encap_setup(struct ip6_tnl *t,
+ struct ip_tunnel_encap *ipencap)
+{
+ int hlen;
+
+ memset(&t->encap, 0, sizeof(t->encap));
+
+ hlen = ip6_encap_hlen(ipencap);
+ if (hlen < 0)
+ return hlen;
+
+ t->encap.type = ipencap->type;
+ t->encap.sport = ipencap->sport;
+ t->encap.dport = ipencap->dport;
+ t->encap.flags = ipencap->flags;
+
+ t->encap_hlen = hlen;
+ t->hlen = t->encap_hlen + t->tun_hlen;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ip6_tnl_encap_setup);
+
static const struct net_device_ops ip6_tnl_netdev_ops = {
.ndo_init = ip6_tnl_dev_init,
.ndo_uninit = ip6_tnl_dev_uninit,
- .ndo_start_xmit = ip6_tnl_xmit,
+ .ndo_start_xmit = ip6_tnl_start_xmit,
.ndo_do_ioctl = ip6_tnl_ioctl,
.ndo_change_mtu = ip6_tnl_change_mtu,
.ndo_get_stats = ip6_get_stats,
.ndo_get_iflink = ip6_tnl_get_iflink,
};
+#define IPXIPX_FEATURES (NETIF_F_SG | \
+ NETIF_F_FRAGLIST | \
+ NETIF_F_HIGHDMA | \
+ NETIF_F_GSO_SOFTWARE | \
+ NETIF_F_HW_CSUM)
/**
* ip6_tnl_dev_setup - setup virtual tunnel device
@@ -1511,20 +1663,18 @@ static const struct net_device_ops ip6_tnl_netdev_ops = {
static void ip6_tnl_dev_setup(struct net_device *dev)
{
- struct ip6_tnl *t;
-
dev->netdev_ops = &ip6_tnl_netdev_ops;
dev->destructor = ip6_dev_free;
dev->type = ARPHRD_TUNNEL6;
- dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr);
- dev->mtu = ETH_DATA_LEN - sizeof(struct ipv6hdr);
- t = netdev_priv(dev);
- if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
- dev->mtu -= 8;
dev->flags |= IFF_NOARP;
dev->addr_len = sizeof(struct in6_addr);
+ dev->features |= NETIF_F_LLTX;
netif_keep_dst(dev);
+
+ dev->features |= IPXIPX_FEATURES;
+ dev->hw_features |= IPXIPX_FEATURES;
+
/* This perm addr will be used as interface identifier by IPv6 */
dev->addr_assign_type = NET_ADDR_RANDOM;
eth_random_addr(dev->perm_addr);
@@ -1541,6 +1691,7 @@ ip6_tnl_dev_init_gen(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
int ret;
+ int t_hlen;
t->dev = dev;
t->net = dev_net(dev);
@@ -1549,13 +1700,32 @@ ip6_tnl_dev_init_gen(struct net_device *dev)
return -ENOMEM;
ret = dst_cache_init(&t->dst_cache, GFP_KERNEL);
- if (ret) {
- free_percpu(dev->tstats);
- dev->tstats = NULL;
- return ret;
- }
+ if (ret)
+ goto free_stats;
+
+ ret = gro_cells_init(&t->gro_cells, dev);
+ if (ret)
+ goto destroy_dst;
+
+ t->tun_hlen = 0;
+ t->hlen = t->encap_hlen + t->tun_hlen;
+ t_hlen = t->hlen + sizeof(struct ipv6hdr);
+
+ dev->type = ARPHRD_TUNNEL6;
+ dev->hard_header_len = LL_MAX_HEADER + t_hlen;
+ dev->mtu = ETH_DATA_LEN - t_hlen;
+ if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ dev->mtu -= 8;
return 0;
+
+destroy_dst:
+ dst_cache_destroy(&t->dst_cache);
+free_stats:
+ free_percpu(dev->tstats);
+ dev->tstats = NULL;
+
+ return ret;
}
/**
@@ -1643,13 +1813,55 @@ static void ip6_tnl_netlink_parms(struct nlattr *data[],
parms->proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
}
+static bool ip6_tnl_netlink_encap_parms(struct nlattr *data[],
+ struct ip_tunnel_encap *ipencap)
+{
+ bool ret = false;
+
+ memset(ipencap, 0, sizeof(*ipencap));
+
+ if (!data)
+ return ret;
+
+ if (data[IFLA_IPTUN_ENCAP_TYPE]) {
+ ret = true;
+ ipencap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]);
+ }
+
+ if (data[IFLA_IPTUN_ENCAP_FLAGS]) {
+ ret = true;
+ ipencap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]);
+ }
+
+ if (data[IFLA_IPTUN_ENCAP_SPORT]) {
+ ret = true;
+ ipencap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]);
+ }
+
+ if (data[IFLA_IPTUN_ENCAP_DPORT]) {
+ ret = true;
+ ipencap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]);
+ }
+
+ return ret;
+}
+
static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[])
{
struct net *net = dev_net(dev);
struct ip6_tnl *nt, *t;
+ struct ip_tunnel_encap ipencap;
nt = netdev_priv(dev);
+
+ if (ip6_tnl_netlink_encap_parms(data, &ipencap)) {
+ int err = ip6_tnl_encap_setup(nt, &ipencap);
+
+ if (err < 0)
+ return err;
+ }
+
ip6_tnl_netlink_parms(data, &nt->parms);
t = ip6_tnl_locate(net, &nt->parms, 0);
@@ -1666,10 +1878,17 @@ static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[],
struct __ip6_tnl_parm p;
struct net *net = t->net;
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
+ struct ip_tunnel_encap ipencap;
if (dev == ip6n->fb_tnl_dev)
return -EINVAL;
+ if (ip6_tnl_netlink_encap_parms(data, &ipencap)) {
+ int err = ip6_tnl_encap_setup(t, &ipencap);
+
+ if (err < 0)
+ return err;
+ }
ip6_tnl_netlink_parms(data, &p);
t = ip6_tnl_locate(net, &p, 0);
@@ -1710,6 +1929,14 @@ static size_t ip6_tnl_get_size(const struct net_device *dev)
nla_total_size(4) +
/* IFLA_IPTUN_PROTO */
nla_total_size(1) +
+ /* IFLA_IPTUN_ENCAP_TYPE */
+ nla_total_size(2) +
+ /* IFLA_IPTUN_ENCAP_FLAGS */
+ nla_total_size(2) +
+ /* IFLA_IPTUN_ENCAP_SPORT */
+ nla_total_size(2) +
+ /* IFLA_IPTUN_ENCAP_DPORT */
+ nla_total_size(2) +
0;
}
@@ -1727,6 +1954,17 @@ static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags) ||
nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto))
goto nla_put_failure;
+
+ if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE,
+ tunnel->encap.type) ||
+ nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT,
+ tunnel->encap.sport) ||
+ nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT,
+ tunnel->encap.dport) ||
+ nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS,
+ tunnel->encap.flags))
+ goto nla_put_failure;
+
return 0;
nla_put_failure:
@@ -1750,6 +1988,10 @@ static const struct nla_policy ip6_tnl_policy[IFLA_IPTUN_MAX + 1] = {
[IFLA_IPTUN_FLOWINFO] = { .type = NLA_U32 },
[IFLA_IPTUN_FLAGS] = { .type = NLA_U32 },
[IFLA_IPTUN_PROTO] = { .type = NLA_U8 },
+ [IFLA_IPTUN_ENCAP_TYPE] = { .type = NLA_U16 },
+ [IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 },
+ [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 },
+ [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 },
};
static struct rtnl_link_ops ip6_link_ops __read_mostly = {
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index d90a11f14040..5bd3afdcc771 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -321,11 +321,9 @@ static int vti6_rcv(struct sk_buff *skb)
goto discard;
}
- XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t;
-
rcu_read_unlock();
- return xfrm6_rcv(skb);
+ return xfrm6_rcv_tnl(skb, t);
}
rcu_read_unlock();
return -EINVAL;
@@ -340,6 +338,7 @@ static int vti6_rcv_cb(struct sk_buff *skb, int err)
struct net_device *dev;
struct pcpu_sw_netstats *tstats;
struct xfrm_state *x;
+ struct xfrm_mode *inner_mode;
struct ip6_tnl *t = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6;
u32 orig_mark = skb->mark;
int ret;
@@ -357,7 +356,19 @@ static int vti6_rcv_cb(struct sk_buff *skb, int err)
}
x = xfrm_input_state(skb);
- family = x->inner_mode->afinfo->family;
+
+ inner_mode = x->inner_mode;
+
+ if (x->sel.family == AF_UNSPEC) {
+ inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
+ if (inner_mode == NULL) {
+ XFRM_INC_STATS(dev_net(skb->dev),
+ LINUX_MIB_XFRMINSTATEMODEERROR);
+ return -EINVAL;
+ }
+ }
+
+ family = inner_mode->afinfo->family;
skb->mark = be32_to_cpu(t->parms.i_key);
ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index a10e77103c88..fccb5dd91902 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -921,6 +921,7 @@ static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *ca
cache->mfc_un.res.maxvif = vifi + 1;
}
}
+ cache->mfc_un.res.lastuse = jiffies;
}
static int mif6_add(struct net *net, struct mr6_table *mrt,
@@ -1074,6 +1075,7 @@ static struct mfc6_cache *ip6mr_cache_alloc(void)
struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
if (!c)
return NULL;
+ c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
c->mfc_un.res.minvif = MAXMIFS;
return c;
}
@@ -1591,14 +1593,15 @@ static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
if (likely(mrt->mroute6_sk == NULL)) {
mrt->mroute6_sk = sk;
net->ipv6.devconf_all->mc_forwarding++;
- inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
- NETCONFA_IFINDEX_ALL,
- net->ipv6.devconf_all);
- }
- else
+ } else {
err = -EADDRINUSE;
+ }
write_unlock_bh(&mrt_lock);
+ if (!err)
+ inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
+ NETCONFA_IFINDEX_ALL,
+ net->ipv6.devconf_all);
rtnl_unlock();
return err;
@@ -1616,11 +1619,11 @@ int ip6mr_sk_done(struct sock *sk)
write_lock_bh(&mrt_lock);
mrt->mroute6_sk = NULL;
net->ipv6.devconf_all->mc_forwarding--;
+ write_unlock_bh(&mrt_lock);
inet6_netconf_notify_devconf(net,
NETCONFA_MC_FORWARDING,
NETCONFA_IFINDEX_ALL,
net->ipv6.devconf_all);
- write_unlock_bh(&mrt_lock);
mroute_clean_tables(mrt, false);
err = 0;
@@ -1984,10 +1987,10 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_OUTFORWDATAGRAMS);
- IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_OUTOCTETS, skb->len);
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_OUTFORWDATAGRAMS);
+ __IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_OUTOCTETS, skb->len);
return dst_output(net, sk, skb);
}
@@ -2090,6 +2093,7 @@ static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
vif = cache->mf6c_parent;
cache->mfc_un.res.pkt++;
cache->mfc_un.res.bytes += skb->len;
+ cache->mfc_un.res.lastuse = jiffies;
if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
struct mfc6_cache *cache_proxy;
@@ -2232,10 +2236,11 @@ int ip6_mr_input(struct sk_buff *skb)
static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
struct mfc6_cache *c, struct rtmsg *rtm)
{
- int ct;
- struct rtnexthop *nhp;
- struct nlattr *mp_attr;
struct rta_mfc_stats mfcs;
+ struct nlattr *mp_attr;
+ struct rtnexthop *nhp;
+ unsigned long lastuse;
+ int ct;
/* If cache is unresolved, don't try to parse IIF and OIF */
if (c->mf6c_parent >= MAXMIFS)
@@ -2265,10 +2270,15 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
nla_nest_end(skb, mp_attr);
+ lastuse = READ_ONCE(c->mfc_un.res.lastuse);
+ lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
+
mfcs.mfcs_packets = c->mfc_un.res.pkt;
mfcs.mfcs_bytes = c->mfc_un.res.bytes;
mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
- if (nla_put(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs) < 0)
+ if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
+ nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
+ RTA_PAD))
return -EMSGSIZE;
rtm->rtm_type = RTN_MULTICAST;
@@ -2411,7 +2421,7 @@ static int mr6_msgsize(bool unresolved, int maxvif)
+ nla_total_size(0) /* RTA_MULTIPATH */
+ maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
/* RTA_MFC_STATS */
- + nla_total_size(sizeof(struct rta_mfc_stats))
+ + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
;
return len;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 4449ad1f8114..5330262ab673 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -98,7 +98,6 @@ int ip6_ra_control(struct sock *sk, int sel)
return 0;
}
-static
struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
struct ipv6_txoptions *opt)
{
@@ -407,7 +406,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW))
break;
- opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+ opt = rcu_dereference_protected(np->opt,
+ lockdep_sock_is_held(sk));
opt = ipv6_renew_options(sk, opt, optname,
(struct ipv6_opt_hdr __user *)optval,
optlen);
@@ -471,7 +471,8 @@ sticky_done:
struct ipv6_txoptions *opt = NULL;
struct msghdr msg;
struct flowi6 fl6;
- int junk;
+ struct sockcm_cookie sockc_junk;
+ struct ipcm6_cookie ipc6;
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_oif = sk->sk_bound_dev_if;
@@ -501,9 +502,9 @@ sticky_done:
msg.msg_controllen = optlen;
msg.msg_control = (void *)(opt+1);
+ ipc6.opt = opt;
- retv = ip6_datagram_send_ctl(net, sk, &msg, &fl6, opt, &junk,
- &junk, &junk);
+ retv = ip6_datagram_send_ctl(net, sk, &msg, &fl6, &ipc6, &sockc_junk);
if (retv)
goto done;
update:
@@ -1123,7 +1124,8 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
struct ipv6_txoptions *opt;
lock_sock(sk);
- opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+ opt = rcu_dereference_protected(np->opt,
+ lockdep_sock_is_held(sk));
len = ipv6_getsockopt_sticky(sk, opt, optname, optval, len);
release_sock(sk);
/* check if ipv6_getsockopt_sticky() returns err code */
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index c245895a3d41..fe65cdc28a45 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -73,15 +73,6 @@
#include <linux/netfilter.h>
#include <linux/netfilter_ipv6.h>
-/* Set to 3 to get tracing... */
-#define ND_DEBUG 1
-
-#define ND_PRINTK(val, level, fmt, ...) \
-do { \
- if (val <= ND_DEBUG) \
- net_##level##_ratelimited(fmt, ##__VA_ARGS__); \
-} while (0)
-
static u32 ndisc_hash(const void *pkey,
const struct net_device *dev,
__u32 *hash_rnd);
@@ -150,11 +141,10 @@ struct neigh_table nd_tbl = {
};
EXPORT_SYMBOL_GPL(nd_tbl);
-static void ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data)
+void __ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data,
+ int data_len, int pad)
{
- int pad = ndisc_addr_option_pad(skb->dev->type);
- int data_len = skb->dev->addr_len;
- int space = ndisc_opt_addr_space(skb->dev);
+ int space = __ndisc_opt_addr_space(data_len, pad);
u8 *opt = skb_put(skb, space);
opt[0] = type;
@@ -171,6 +161,23 @@ static void ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data)
if (space > 0)
memset(opt, 0, space);
}
+EXPORT_SYMBOL_GPL(__ndisc_fill_addr_option);
+
+static inline void ndisc_fill_addr_option(struct sk_buff *skb, int type,
+ void *data, u8 icmp6_type)
+{
+ __ndisc_fill_addr_option(skb, type, data, skb->dev->addr_len,
+ ndisc_addr_option_pad(skb->dev->type));
+ ndisc_ops_fill_addr_option(skb->dev, skb, icmp6_type);
+}
+
+static inline void ndisc_fill_redirect_addr_option(struct sk_buff *skb,
+ void *ha,
+ const u8 *ops_data)
+{
+ ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR, ha, NDISC_REDIRECT);
+ ndisc_ops_fill_redirect_addr_option(skb->dev, skb, ops_data);
+}
static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
struct nd_opt_hdr *end)
@@ -185,24 +192,28 @@ static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
return cur <= end && cur->nd_opt_type == type ? cur : NULL;
}
-static inline int ndisc_is_useropt(struct nd_opt_hdr *opt)
+static inline int ndisc_is_useropt(const struct net_device *dev,
+ struct nd_opt_hdr *opt)
{
return opt->nd_opt_type == ND_OPT_RDNSS ||
- opt->nd_opt_type == ND_OPT_DNSSL;
+ opt->nd_opt_type == ND_OPT_DNSSL ||
+ ndisc_ops_is_useropt(dev, opt->nd_opt_type);
}
-static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur,
+static struct nd_opt_hdr *ndisc_next_useropt(const struct net_device *dev,
+ struct nd_opt_hdr *cur,
struct nd_opt_hdr *end)
{
if (!cur || !end || cur >= end)
return NULL;
do {
cur = ((void *)cur) + (cur->nd_opt_len << 3);
- } while (cur < end && !ndisc_is_useropt(cur));
- return cur <= end && ndisc_is_useropt(cur) ? cur : NULL;
+ } while (cur < end && !ndisc_is_useropt(dev, cur));
+ return cur <= end && ndisc_is_useropt(dev, cur) ? cur : NULL;
}
-struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
+struct ndisc_options *ndisc_parse_options(const struct net_device *dev,
+ u8 *opt, int opt_len,
struct ndisc_options *ndopts)
{
struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)opt;
@@ -217,6 +228,8 @@ struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
l = nd_opt->nd_opt_len << 3;
if (opt_len < l || l == 0)
return NULL;
+ if (ndisc_ops_parse_options(dev, nd_opt, ndopts))
+ goto next_opt;
switch (nd_opt->nd_opt_type) {
case ND_OPT_SOURCE_LL_ADDR:
case ND_OPT_TARGET_LL_ADDR:
@@ -243,7 +256,7 @@ struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
break;
#endif
default:
- if (ndisc_is_useropt(nd_opt)) {
+ if (ndisc_is_useropt(dev, nd_opt)) {
ndopts->nd_useropts_end = nd_opt;
if (!ndopts->nd_useropts)
ndopts->nd_useropts = nd_opt;
@@ -260,6 +273,7 @@ struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
nd_opt->nd_opt_len);
}
}
+next_opt:
opt_len -= l;
nd_opt = ((void *)nd_opt) + l;
}
@@ -509,7 +523,8 @@ void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr,
if (!dev->addr_len)
inc_opt = 0;
if (inc_opt)
- optlen += ndisc_opt_addr_space(dev);
+ optlen += ndisc_opt_addr_space(dev,
+ NDISC_NEIGHBOUR_ADVERTISEMENT);
skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
if (!skb)
@@ -528,8 +543,8 @@ void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr,
if (inc_opt)
ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR,
- dev->dev_addr);
-
+ dev->dev_addr,
+ NDISC_NEIGHBOUR_ADVERTISEMENT);
ndisc_send_skb(skb, daddr, src_addr);
}
@@ -574,7 +589,8 @@ void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
if (ipv6_addr_any(saddr))
inc_opt = false;
if (inc_opt)
- optlen += ndisc_opt_addr_space(dev);
+ optlen += ndisc_opt_addr_space(dev,
+ NDISC_NEIGHBOUR_SOLICITATION);
skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
if (!skb)
@@ -590,7 +606,8 @@ void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
if (inc_opt)
ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
- dev->dev_addr);
+ dev->dev_addr,
+ NDISC_NEIGHBOUR_SOLICITATION);
ndisc_send_skb(skb, daddr, saddr);
}
@@ -626,7 +643,7 @@ void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr,
}
#endif
if (send_sllao)
- optlen += ndisc_opt_addr_space(dev);
+ optlen += ndisc_opt_addr_space(dev, NDISC_ROUTER_SOLICITATION);
skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen);
if (!skb)
@@ -641,7 +658,8 @@ void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr,
if (send_sllao)
ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
- dev->dev_addr);
+ dev->dev_addr,
+ NDISC_ROUTER_SOLICITATION);
ndisc_send_skb(skb, daddr, saddr);
}
@@ -702,6 +720,15 @@ static int pndisc_is_router(const void *pkey,
return ret;
}
+void ndisc_update(const struct net_device *dev, struct neighbour *neigh,
+ const u8 *lladdr, u8 new, u32 flags, u8 icmp6_type,
+ struct ndisc_options *ndopts)
+{
+ neigh_update(neigh, lladdr, new, flags);
+ /* report ndisc ops about neighbour update */
+ ndisc_ops_update(dev, neigh, flags, icmp6_type, ndopts);
+}
+
static void ndisc_recv_ns(struct sk_buff *skb)
{
struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
@@ -738,7 +765,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
return;
}
- if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
+ if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts)) {
ND_PRINTK(2, warn, "NS: invalid ND options\n");
return;
}
@@ -856,9 +883,10 @@ have_ifp:
neigh = __neigh_lookup(&nd_tbl, saddr, dev,
!inc || lladdr || !dev->addr_len);
if (neigh)
- neigh_update(neigh, lladdr, NUD_STALE,
+ ndisc_update(dev, neigh, lladdr, NUD_STALE,
NEIGH_UPDATE_F_WEAK_OVERRIDE|
- NEIGH_UPDATE_F_OVERRIDE);
+ NEIGH_UPDATE_F_OVERRIDE,
+ NDISC_NEIGHBOUR_SOLICITATION, &ndopts);
if (neigh || !dev->header_ops) {
ndisc_send_na(dev, saddr, &msg->target, !!is_router,
true, (ifp != NULL && inc), inc);
@@ -911,7 +939,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
idev->cnf.drop_unsolicited_na)
return;
- if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
+ if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts)) {
ND_PRINTK(2, warn, "NS: invalid ND option\n");
return;
}
@@ -967,12 +995,13 @@ static void ndisc_recv_na(struct sk_buff *skb)
goto out;
}
- neigh_update(neigh, lladdr,
+ ndisc_update(dev, neigh, lladdr,
msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE,
NEIGH_UPDATE_F_WEAK_OVERRIDE|
(msg->icmph.icmp6_override ? NEIGH_UPDATE_F_OVERRIDE : 0)|
NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
- (msg->icmph.icmp6_router ? NEIGH_UPDATE_F_ISROUTER : 0));
+ (msg->icmph.icmp6_router ? NEIGH_UPDATE_F_ISROUTER : 0),
+ NDISC_NEIGHBOUR_ADVERTISEMENT, &ndopts);
if ((old_flags & ~neigh->flags) & NTF_ROUTER) {
/*
@@ -1017,7 +1046,7 @@ static void ndisc_recv_rs(struct sk_buff *skb)
goto out;
/* Parse ND options */
- if (!ndisc_parse_options(rs_msg->opt, ndoptlen, &ndopts)) {
+ if (!ndisc_parse_options(skb->dev, rs_msg->opt, ndoptlen, &ndopts)) {
ND_PRINTK(2, notice, "NS: invalid ND option, ignored\n");
goto out;
}
@@ -1031,10 +1060,11 @@ static void ndisc_recv_rs(struct sk_buff *skb)
neigh = __neigh_lookup(&nd_tbl, saddr, skb->dev, 1);
if (neigh) {
- neigh_update(neigh, lladdr, NUD_STALE,
+ ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
NEIGH_UPDATE_F_WEAK_OVERRIDE|
NEIGH_UPDATE_F_OVERRIDE|
- NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
+ NEIGH_UPDATE_F_OVERRIDE_ISROUTER,
+ NDISC_ROUTER_SOLICITATION, &ndopts);
neigh_release(neigh);
}
out:
@@ -1135,7 +1165,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
return;
}
- if (!ndisc_parse_options(opt, optlen, &ndopts)) {
+ if (!ndisc_parse_options(skb->dev, opt, optlen, &ndopts)) {
ND_PRINTK(2, warn, "RA: invalid ND options\n");
return;
}
@@ -1329,11 +1359,12 @@ skip_linkparms:
goto out;
}
}
- neigh_update(neigh, lladdr, NUD_STALE,
+ ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
NEIGH_UPDATE_F_WEAK_OVERRIDE|
NEIGH_UPDATE_F_OVERRIDE|
NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
- NEIGH_UPDATE_F_ISROUTER);
+ NEIGH_UPDATE_F_ISROUTER,
+ NDISC_ROUTER_ADVERTISEMENT, &ndopts);
}
if (!ipv6_accept_ra(in6_dev)) {
@@ -1421,7 +1452,8 @@ skip_routeinfo:
struct nd_opt_hdr *p;
for (p = ndopts.nd_useropts;
p;
- p = ndisc_next_useropt(p, ndopts.nd_useropts_end)) {
+ p = ndisc_next_useropt(skb->dev, p,
+ ndopts.nd_useropts_end)) {
ndisc_ra_useropt(skb, p);
}
}
@@ -1459,7 +1491,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
return;
}
- if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts))
+ if (!ndisc_parse_options(skb->dev, msg->opt, ndoptlen, &ndopts))
return;
if (!ndopts.nd_opts_rh) {
@@ -1504,7 +1536,8 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
struct dst_entry *dst;
struct flowi6 fl6;
int rd_len;
- u8 ha_buf[MAX_ADDR_LEN], *ha = NULL;
+ u8 ha_buf[MAX_ADDR_LEN], *ha = NULL,
+ ops_data_buf[NDISC_OPS_REDIRECT_DATA_SPACE], *ops_data = NULL;
int oif = l3mdev_fib_oif(dev);
bool ret;
@@ -1563,7 +1596,9 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
memcpy(ha_buf, neigh->ha, dev->addr_len);
read_unlock_bh(&neigh->lock);
ha = ha_buf;
- optlen += ndisc_opt_addr_space(dev);
+ optlen += ndisc_redirect_opt_addr_space(dev, neigh,
+ ops_data_buf,
+ &ops_data);
} else
read_unlock_bh(&neigh->lock);
@@ -1594,7 +1629,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
*/
if (ha)
- ndisc_fill_addr_option(buff, ND_OPT_TARGET_LL_ADDR, ha);
+ ndisc_fill_redirect_addr_option(buff, ha, ops_data);
/*
* build redirect option and copy skb over to the new packet.
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 86b67b70b626..552fac2f390a 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -39,34 +39,12 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
MODULE_DESCRIPTION("IPv6 packet filter");
-/*#define DEBUG_IP_FIREWALL*/
-/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
-/*#define DEBUG_IP_FIREWALL_USER*/
-
-#ifdef DEBUG_IP_FIREWALL
-#define dprintf(format, args...) pr_info(format , ## args)
-#else
-#define dprintf(format, args...)
-#endif
-
-#ifdef DEBUG_IP_FIREWALL_USER
-#define duprintf(format, args...) pr_info(format , ## args)
-#else
-#define duprintf(format, args...)
-#endif
-
#ifdef CONFIG_NETFILTER_DEBUG
#define IP_NF_ASSERT(x) WARN_ON(!(x))
#else
#define IP_NF_ASSERT(x)
#endif
-#if 0
-/* All the better to debug you with... */
-#define static
-#define inline
-#endif
-
void *ip6t_alloc_initial_table(const struct xt_table *info)
{
return xt_alloc_initial_table(ip6t, IP6T);
@@ -95,40 +73,23 @@ ip6_packet_match(const struct sk_buff *skb,
unsigned long ret;
const struct ipv6hdr *ipv6 = ipv6_hdr(skb);
-#define FWINV(bool, invflg) ((bool) ^ !!(ip6info->invflags & (invflg)))
-
- if (FWINV(ipv6_masked_addr_cmp(&ipv6->saddr, &ip6info->smsk,
- &ip6info->src), IP6T_INV_SRCIP) ||
- FWINV(ipv6_masked_addr_cmp(&ipv6->daddr, &ip6info->dmsk,
- &ip6info->dst), IP6T_INV_DSTIP)) {
- dprintf("Source or dest mismatch.\n");
-/*
- dprintf("SRC: %u. Mask: %u. Target: %u.%s\n", ip->saddr,
- ipinfo->smsk.s_addr, ipinfo->src.s_addr,
- ipinfo->invflags & IP6T_INV_SRCIP ? " (INV)" : "");
- dprintf("DST: %u. Mask: %u. Target: %u.%s\n", ip->daddr,
- ipinfo->dmsk.s_addr, ipinfo->dst.s_addr,
- ipinfo->invflags & IP6T_INV_DSTIP ? " (INV)" : "");*/
+ if (NF_INVF(ip6info, IP6T_INV_SRCIP,
+ ipv6_masked_addr_cmp(&ipv6->saddr, &ip6info->smsk,
+ &ip6info->src)) ||
+ NF_INVF(ip6info, IP6T_INV_DSTIP,
+ ipv6_masked_addr_cmp(&ipv6->daddr, &ip6info->dmsk,
+ &ip6info->dst)))
return false;
- }
ret = ifname_compare_aligned(indev, ip6info->iniface, ip6info->iniface_mask);
- if (FWINV(ret != 0, IP6T_INV_VIA_IN)) {
- dprintf("VIA in mismatch (%s vs %s).%s\n",
- indev, ip6info->iniface,
- ip6info->invflags & IP6T_INV_VIA_IN ? " (INV)" : "");
+ if (NF_INVF(ip6info, IP6T_INV_VIA_IN, ret != 0))
return false;
- }
ret = ifname_compare_aligned(outdev, ip6info->outiface, ip6info->outiface_mask);
- if (FWINV(ret != 0, IP6T_INV_VIA_OUT)) {
- dprintf("VIA out mismatch (%s vs %s).%s\n",
- outdev, ip6info->outiface,
- ip6info->invflags & IP6T_INV_VIA_OUT ? " (INV)" : "");
+ if (NF_INVF(ip6info, IP6T_INV_VIA_OUT, ret != 0))
return false;
- }
/* ... might want to do something with class and flowlabel here ... */
@@ -145,11 +106,6 @@ ip6_packet_match(const struct sk_buff *skb,
}
*fragoff = _frag_off;
- dprintf("Packet protocol %hi ?= %s%hi.\n",
- protohdr,
- ip6info->invflags & IP6T_INV_PROTO ? "!":"",
- ip6info->proto);
-
if (ip6info->proto == protohdr) {
if (ip6info->invflags & IP6T_INV_PROTO)
return false;
@@ -169,16 +125,11 @@ ip6_packet_match(const struct sk_buff *skb,
static bool
ip6_checkentry(const struct ip6t_ip6 *ipv6)
{
- if (ipv6->flags & ~IP6T_F_MASK) {
- duprintf("Unknown flag bits set: %08X\n",
- ipv6->flags & ~IP6T_F_MASK);
+ if (ipv6->flags & ~IP6T_F_MASK)
return false;
- }
- if (ipv6->invflags & ~IP6T_INV_MASK) {
- duprintf("Unknown invflag bits set: %08X\n",
- ipv6->invflags & ~IP6T_INV_MASK);
+ if (ipv6->invflags & ~IP6T_INV_MASK)
return false;
- }
+
return true;
}
@@ -446,20 +397,17 @@ ip6t_do_table(struct sk_buff *skb,
xt_write_recseq_end(addend);
local_bh_enable();
-#ifdef DEBUG_ALLOW_ALL
- return NF_ACCEPT;
-#else
if (acpar.hotdrop)
return NF_DROP;
else return verdict;
-#endif
}
/* Figures out from what hook each rule can be called: returns 0 if
there are loops. Puts hook bitmask in comefrom. */
static int
mark_source_chains(const struct xt_table_info *newinfo,
- unsigned int valid_hooks, void *entry0)
+ unsigned int valid_hooks, void *entry0,
+ unsigned int *offsets)
{
unsigned int hook;
@@ -480,11 +428,9 @@ mark_source_chains(const struct xt_table_info *newinfo,
= (void *)ip6t_get_target_c(e);
int visited = e->comefrom & (1 << hook);
- if (e->comefrom & (1 << NF_INET_NUMHOOKS)) {
- pr_err("iptables: loop hook %u pos %u %08X.\n",
- hook, pos, e->comefrom);
+ if (e->comefrom & (1 << NF_INET_NUMHOOKS))
return 0;
- }
+
e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
/* Unconditional return/END. */
@@ -496,26 +442,13 @@ mark_source_chains(const struct xt_table_info *newinfo,
if ((strcmp(t->target.u.user.name,
XT_STANDARD_TARGET) == 0) &&
- t->verdict < -NF_MAX_VERDICT - 1) {
- duprintf("mark_source_chains: bad "
- "negative verdict (%i)\n",
- t->verdict);
+ t->verdict < -NF_MAX_VERDICT - 1)
return 0;
- }
/* Return: backtrack through the last
big jump. */
do {
e->comefrom ^= (1<<NF_INET_NUMHOOKS);
-#ifdef DEBUG_IP_FIREWALL_USER
- if (e->comefrom
- & (1 << NF_INET_NUMHOOKS)) {
- duprintf("Back unset "
- "on hook %u "
- "rule %u\n",
- hook, pos);
- }
-#endif
oldpos = pos;
pos = e->counters.pcnt;
e->counters.pcnt = 0;
@@ -532,6 +465,8 @@ mark_source_chains(const struct xt_table_info *newinfo,
size = e->next_offset;
e = (struct ip6t_entry *)
(entry0 + pos + size);
+ if (pos + size >= newinfo->size)
+ return 0;
e->counters.pcnt = pos;
pos += size;
} else {
@@ -540,19 +475,17 @@ mark_source_chains(const struct xt_table_info *newinfo,
if (strcmp(t->target.u.user.name,
XT_STANDARD_TARGET) == 0 &&
newpos >= 0) {
- if (newpos > newinfo->size -
- sizeof(struct ip6t_entry)) {
- duprintf("mark_source_chains: "
- "bad verdict (%i)\n",
- newpos);
- return 0;
- }
/* This a jump; chase it. */
- duprintf("Jump rule %u -> %u\n",
- pos, newpos);
+ if (!xt_find_jump_offset(offsets, newpos,
+ newinfo->number))
+ return 0;
+ e = (struct ip6t_entry *)
+ (entry0 + newpos);
} else {
/* ... this is a fallthru */
newpos = pos + e->next_offset;
+ if (newpos >= newinfo->size)
+ return 0;
}
e = (struct ip6t_entry *)
(entry0 + newpos);
@@ -560,8 +493,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
pos = newpos;
}
}
-next:
- duprintf("Finished chain %u\n", hook);
+next: ;
}
return 1;
}
@@ -579,41 +511,15 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net)
module_put(par.match->me);
}
-static int
-check_entry(const struct ip6t_entry *e)
-{
- const struct xt_entry_target *t;
-
- if (!ip6_checkentry(&e->ipv6))
- return -EINVAL;
-
- if (e->target_offset + sizeof(struct xt_entry_target) >
- e->next_offset)
- return -EINVAL;
-
- t = ip6t_get_target_c(e);
- if (e->target_offset + t->u.target_size > e->next_offset)
- return -EINVAL;
-
- return 0;
-}
-
static int check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
{
const struct ip6t_ip6 *ipv6 = par->entryinfo;
- int ret;
par->match = m->u.kernel.match;
par->matchinfo = m->data;
- ret = xt_check_match(par, m->u.match_size - sizeof(*m),
- ipv6->proto, ipv6->invflags & IP6T_INV_PROTO);
- if (ret < 0) {
- duprintf("ip_tables: check failed for `%s'.\n",
- par.match->name);
- return ret;
- }
- return 0;
+ return xt_check_match(par, m->u.match_size - sizeof(*m),
+ ipv6->proto, ipv6->invflags & IP6T_INV_PROTO);
}
static int
@@ -624,10 +530,9 @@ find_check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
match = xt_request_find_match(NFPROTO_IPV6, m->u.user.name,
m->u.user.revision);
- if (IS_ERR(match)) {
- duprintf("find_check_match: `%s' not found\n", m->u.user.name);
+ if (IS_ERR(match))
return PTR_ERR(match);
- }
+
m->u.kernel.match = match;
ret = check_match(m, par);
@@ -652,17 +557,11 @@ static int check_target(struct ip6t_entry *e, struct net *net, const char *name)
.hook_mask = e->comefrom,
.family = NFPROTO_IPV6,
};
- int ret;
t = ip6t_get_target(e);
- ret = xt_check_target(&par, t->u.target_size - sizeof(*t),
- e->ipv6.proto, e->ipv6.invflags & IP6T_INV_PROTO);
- if (ret < 0) {
- duprintf("ip_tables: check failed for `%s'.\n",
- t->u.kernel.target->name);
- return ret;
- }
- return 0;
+ return xt_check_target(&par, t->u.target_size - sizeof(*t),
+ e->ipv6.proto,
+ e->ipv6.invflags & IP6T_INV_PROTO);
}
static int
@@ -675,10 +574,12 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
unsigned int j;
struct xt_mtchk_param mtpar;
struct xt_entry_match *ematch;
+ unsigned long pcnt;
- e->counters.pcnt = xt_percpu_counter_alloc();
- if (IS_ERR_VALUE(e->counters.pcnt))
+ pcnt = xt_percpu_counter_alloc();
+ if (IS_ERR_VALUE(pcnt))
return -ENOMEM;
+ e->counters.pcnt = pcnt;
j = 0;
mtpar.net = net;
@@ -697,7 +598,6 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
target = xt_request_find_target(NFPROTO_IPV6, t->u.user.name,
t->u.user.revision);
if (IS_ERR(target)) {
- duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
ret = PTR_ERR(target);
goto cleanup_matches;
}
@@ -750,19 +650,18 @@ check_entry_size_and_hooks(struct ip6t_entry *e,
if ((unsigned long)e % __alignof__(struct ip6t_entry) != 0 ||
(unsigned char *)e + sizeof(struct ip6t_entry) >= limit ||
- (unsigned char *)e + e->next_offset > limit) {
- duprintf("Bad offset %p\n", e);
+ (unsigned char *)e + e->next_offset > limit)
return -EINVAL;
- }
if (e->next_offset
- < sizeof(struct ip6t_entry) + sizeof(struct xt_entry_target)) {
- duprintf("checking: element %p size %u\n",
- e, e->next_offset);
+ < sizeof(struct ip6t_entry) + sizeof(struct xt_entry_target))
return -EINVAL;
- }
- err = check_entry(e);
+ if (!ip6_checkentry(&e->ipv6))
+ return -EINVAL;
+
+ err = xt_check_entry_offsets(e, e->elems, e->target_offset,
+ e->next_offset);
if (err)
return err;
@@ -773,12 +672,9 @@ check_entry_size_and_hooks(struct ip6t_entry *e,
if ((unsigned char *)e - base == hook_entries[h])
newinfo->hook_entry[h] = hook_entries[h];
if ((unsigned char *)e - base == underflows[h]) {
- if (!check_underflow(e)) {
- pr_debug("Underflows must be unconditional and "
- "use the STANDARD target with "
- "ACCEPT/DROP\n");
+ if (!check_underflow(e))
return -EINVAL;
- }
+
newinfo->underflow[h] = underflows[h];
}
}
@@ -818,6 +714,7 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
const struct ip6t_replace *repl)
{
struct ip6t_entry *iter;
+ unsigned int *offsets;
unsigned int i;
int ret = 0;
@@ -830,7 +727,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
newinfo->underflow[i] = 0xFFFFFFFF;
}
- duprintf("translate_table: size %u\n", newinfo->size);
+ offsets = xt_alloc_entry_offsets(newinfo->number);
+ if (!offsets)
+ return -ENOMEM;
i = 0;
/* Walk through entries, checking offsets. */
xt_entry_foreach(iter, entry0, newinfo->size) {
@@ -840,38 +739,35 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
repl->underflow,
repl->valid_hooks);
if (ret != 0)
- return ret;
+ goto out_free;
+ if (i < repl->num_entries)
+ offsets[i] = (void *)iter - entry0;
++i;
if (strcmp(ip6t_get_target(iter)->u.user.name,
XT_ERROR_TARGET) == 0)
++newinfo->stacksize;
}
- if (i != repl->num_entries) {
- duprintf("translate_table: %u not %u entries\n",
- i, repl->num_entries);
- return -EINVAL;
- }
+ ret = -EINVAL;
+ if (i != repl->num_entries)
+ goto out_free;
/* Check hooks all assigned */
for (i = 0; i < NF_INET_NUMHOOKS; i++) {
/* Only hooks which are valid */
if (!(repl->valid_hooks & (1 << i)))
continue;
- if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
- duprintf("Invalid hook entry %u %u\n",
- i, repl->hook_entry[i]);
- return -EINVAL;
- }
- if (newinfo->underflow[i] == 0xFFFFFFFF) {
- duprintf("Invalid underflow %u %u\n",
- i, repl->underflow[i]);
- return -EINVAL;
- }
+ if (newinfo->hook_entry[i] == 0xFFFFFFFF)
+ goto out_free;
+ if (newinfo->underflow[i] == 0xFFFFFFFF)
+ goto out_free;
}
- if (!mark_source_chains(newinfo, repl->valid_hooks, entry0))
- return -ELOOP;
+ if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) {
+ ret = -ELOOP;
+ goto out_free;
+ }
+ kvfree(offsets);
/* Finally, each sanity check must pass */
i = 0;
@@ -892,6 +788,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
}
return ret;
+ out_free:
+ kvfree(offsets);
+ return ret;
}
static void
@@ -1095,11 +994,8 @@ static int get_info(struct net *net, void __user *user,
struct xt_table *t;
int ret;
- if (*len != sizeof(struct ip6t_getinfo)) {
- duprintf("length %u != %zu\n", *len,
- sizeof(struct ip6t_getinfo));
+ if (*len != sizeof(struct ip6t_getinfo))
return -EINVAL;
- }
if (copy_from_user(name, user, sizeof(name)) != 0)
return -EFAULT;
@@ -1157,31 +1053,24 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr,
struct ip6t_get_entries get;
struct xt_table *t;
- if (*len < sizeof(get)) {
- duprintf("get_entries: %u < %zu\n", *len, sizeof(get));
+ if (*len < sizeof(get))
return -EINVAL;
- }
if (copy_from_user(&get, uptr, sizeof(get)) != 0)
return -EFAULT;
- if (*len != sizeof(struct ip6t_get_entries) + get.size) {
- duprintf("get_entries: %u != %zu\n",
- *len, sizeof(get) + get.size);
+ if (*len != sizeof(struct ip6t_get_entries) + get.size)
return -EINVAL;
- }
+
get.name[sizeof(get.name) - 1] = '\0';
t = xt_find_table_lock(net, AF_INET6, get.name);
if (!IS_ERR_OR_NULL(t)) {
struct xt_table_info *private = t->private;
- duprintf("t->private->number = %u\n", private->number);
if (get.size == private->size)
ret = copy_entries_to_user(private->size,
t, uptr->entrytable);
- else {
- duprintf("get_entries: I've got %u not %u!\n",
- private->size, get.size);
+ else
ret = -EAGAIN;
- }
+
module_put(t->me);
xt_table_unlock(t);
} else
@@ -1217,8 +1106,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
/* You lied! */
if (valid_hooks != t->valid_hooks) {
- duprintf("Valid hook crap: %08X vs %08X\n",
- valid_hooks, t->valid_hooks);
ret = -EINVAL;
goto put_module;
}
@@ -1228,8 +1115,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
goto put_module;
/* Update module usage count based on number of rules */
- duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
- oldinfo->number, oldinfo->initial_entries, newinfo->number);
if ((oldinfo->number > oldinfo->initial_entries) ||
(newinfo->number <= oldinfo->initial_entries))
module_put(t->me);
@@ -1298,8 +1183,6 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
if (ret != 0)
goto free_newinfo;
- duprintf("ip_tables: Translated table\n");
-
ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
tmp.num_counters, tmp.counters);
if (ret)
@@ -1321,55 +1204,16 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
unsigned int i;
struct xt_counters_info tmp;
struct xt_counters *paddc;
- unsigned int num_counters;
- char *name;
- int size;
- void *ptmp;
struct xt_table *t;
const struct xt_table_info *private;
int ret = 0;
struct ip6t_entry *iter;
unsigned int addend;
-#ifdef CONFIG_COMPAT
- struct compat_xt_counters_info compat_tmp;
- if (compat) {
- ptmp = &compat_tmp;
- size = sizeof(struct compat_xt_counters_info);
- } else
-#endif
- {
- ptmp = &tmp;
- size = sizeof(struct xt_counters_info);
- }
-
- if (copy_from_user(ptmp, user, size) != 0)
- return -EFAULT;
-
-#ifdef CONFIG_COMPAT
- if (compat) {
- num_counters = compat_tmp.num_counters;
- name = compat_tmp.name;
- } else
-#endif
- {
- num_counters = tmp.num_counters;
- name = tmp.name;
- }
-
- if (len != size + num_counters * sizeof(struct xt_counters))
- return -EINVAL;
-
- paddc = vmalloc(len - size);
- if (!paddc)
- return -ENOMEM;
-
- if (copy_from_user(paddc, user + size, len - size) != 0) {
- ret = -EFAULT;
- goto free;
- }
-
- t = xt_find_table_lock(net, AF_INET6, name);
+ paddc = xt_copy_counters_from_user(user, len, &tmp, compat);
+ if (IS_ERR(paddc))
+ return PTR_ERR(paddc);
+ t = xt_find_table_lock(net, AF_INET6, tmp.name);
if (IS_ERR_OR_NULL(t)) {
ret = t ? PTR_ERR(t) : -ENOENT;
goto free;
@@ -1377,7 +1221,7 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
local_bh_disable();
private = t->private;
- if (private->number != num_counters) {
+ if (private->number != tmp.num_counters) {
ret = -EINVAL;
goto unlock_up_free;
}
@@ -1456,7 +1300,6 @@ compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr,
static int
compat_find_calc_match(struct xt_entry_match *m,
- const char *name,
const struct ip6t_ip6 *ipv6,
int *size)
{
@@ -1464,11 +1307,9 @@ compat_find_calc_match(struct xt_entry_match *m,
match = xt_request_find_match(NFPROTO_IPV6, m->u.user.name,
m->u.user.revision);
- if (IS_ERR(match)) {
- duprintf("compat_check_calc_match: `%s' not found\n",
- m->u.user.name);
+ if (IS_ERR(match))
return PTR_ERR(match);
- }
+
m->u.kernel.match = match;
*size += xt_compat_match_offset(match);
return 0;
@@ -1491,35 +1332,29 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
struct xt_table_info *newinfo,
unsigned int *size,
const unsigned char *base,
- const unsigned char *limit,
- const unsigned int *hook_entries,
- const unsigned int *underflows,
- const char *name)
+ const unsigned char *limit)
{
struct xt_entry_match *ematch;
struct xt_entry_target *t;
struct xt_target *target;
unsigned int entry_offset;
unsigned int j;
- int ret, off, h;
+ int ret, off;
- duprintf("check_compat_entry_size_and_hooks %p\n", e);
if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0 ||
(unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit ||
- (unsigned char *)e + e->next_offset > limit) {
- duprintf("Bad offset %p, limit = %p\n", e, limit);
+ (unsigned char *)e + e->next_offset > limit)
return -EINVAL;
- }
if (e->next_offset < sizeof(struct compat_ip6t_entry) +
- sizeof(struct compat_xt_entry_target)) {
- duprintf("checking: element %p size %u\n",
- e, e->next_offset);
+ sizeof(struct compat_xt_entry_target))
+ return -EINVAL;
+
+ if (!ip6_checkentry(&e->ipv6))
return -EINVAL;
- }
- /* For purposes of check_entry casting the compat entry is fine */
- ret = check_entry((struct ip6t_entry *)e);
+ ret = xt_compat_check_entry_offsets(e, e->elems,
+ e->target_offset, e->next_offset);
if (ret)
return ret;
@@ -1527,7 +1362,7 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
entry_offset = (void *)e - (void *)base;
j = 0;
xt_ematch_foreach(ematch, e) {
- ret = compat_find_calc_match(ematch, name, &e->ipv6, &off);
+ ret = compat_find_calc_match(ematch, &e->ipv6, &off);
if (ret != 0)
goto release_matches;
++j;
@@ -1537,8 +1372,6 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
target = xt_request_find_target(NFPROTO_IPV6, t->u.user.name,
t->u.user.revision);
if (IS_ERR(target)) {
- duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
- t->u.user.name);
ret = PTR_ERR(target);
goto release_matches;
}
@@ -1550,17 +1383,6 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
if (ret)
goto out;
- /* Check hooks & underflows */
- for (h = 0; h < NF_INET_NUMHOOKS; h++) {
- if ((unsigned char *)e - base == hook_entries[h])
- newinfo->hook_entry[h] = hook_entries[h];
- if ((unsigned char *)e - base == underflows[h])
- newinfo->underflow[h] = underflows[h];
- }
-
- /* Clear counters and comefrom */
- memset(&e->counters, 0, sizeof(e->counters));
- e->comefrom = 0;
return 0;
out:
@@ -1574,18 +1396,17 @@ release_matches:
return ret;
}
-static int
+static void
compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr,
- unsigned int *size, const char *name,
+ unsigned int *size,
struct xt_table_info *newinfo, unsigned char *base)
{
struct xt_entry_target *t;
struct ip6t_entry *de;
unsigned int origsize;
- int ret, h;
+ int h;
struct xt_entry_match *ematch;
- ret = 0;
origsize = *size;
de = (struct ip6t_entry *)*dstptr;
memcpy(de, e, sizeof(struct ip6t_entry));
@@ -1594,11 +1415,9 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr,
*dstptr += sizeof(struct ip6t_entry);
*size += sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry);
- xt_ematch_foreach(ematch, e) {
- ret = xt_compat_match_from_user(ematch, dstptr, size);
- if (ret != 0)
- return ret;
- }
+ xt_ematch_foreach(ematch, e)
+ xt_compat_match_from_user(ematch, dstptr, size);
+
de->target_offset = e->target_offset - (origsize - *size);
t = compat_ip6t_get_target(e);
xt_compat_target_from_user(t, dstptr, size);
@@ -1610,183 +1429,79 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr,
if ((unsigned char *)de - base < newinfo->underflow[h])
newinfo->underflow[h] -= origsize - *size;
}
- return ret;
-}
-
-static int compat_check_entry(struct ip6t_entry *e, struct net *net,
- const char *name)
-{
- unsigned int j;
- int ret = 0;
- struct xt_mtchk_param mtpar;
- struct xt_entry_match *ematch;
-
- e->counters.pcnt = xt_percpu_counter_alloc();
- if (IS_ERR_VALUE(e->counters.pcnt))
- return -ENOMEM;
- j = 0;
- mtpar.net = net;
- mtpar.table = name;
- mtpar.entryinfo = &e->ipv6;
- mtpar.hook_mask = e->comefrom;
- mtpar.family = NFPROTO_IPV6;
- xt_ematch_foreach(ematch, e) {
- ret = check_match(ematch, &mtpar);
- if (ret != 0)
- goto cleanup_matches;
- ++j;
- }
-
- ret = check_target(e, net, name);
- if (ret)
- goto cleanup_matches;
- return 0;
-
- cleanup_matches:
- xt_ematch_foreach(ematch, e) {
- if (j-- == 0)
- break;
- cleanup_match(ematch, net);
- }
-
- xt_percpu_counter_free(e->counters.pcnt);
-
- return ret;
}
static int
translate_compat_table(struct net *net,
- const char *name,
- unsigned int valid_hooks,
struct xt_table_info **pinfo,
void **pentry0,
- unsigned int total_size,
- unsigned int number,
- unsigned int *hook_entries,
- unsigned int *underflows)
+ const struct compat_ip6t_replace *compatr)
{
unsigned int i, j;
struct xt_table_info *newinfo, *info;
void *pos, *entry0, *entry1;
struct compat_ip6t_entry *iter0;
- struct ip6t_entry *iter1;
+ struct ip6t_replace repl;
unsigned int size;
int ret = 0;
info = *pinfo;
entry0 = *pentry0;
- size = total_size;
- info->number = number;
+ size = compatr->size;
+ info->number = compatr->num_entries;
- /* Init all hooks to impossible value. */
- for (i = 0; i < NF_INET_NUMHOOKS; i++) {
- info->hook_entry[i] = 0xFFFFFFFF;
- info->underflow[i] = 0xFFFFFFFF;
- }
-
- duprintf("translate_compat_table: size %u\n", info->size);
j = 0;
xt_compat_lock(AF_INET6);
- xt_compat_init_offsets(AF_INET6, number);
+ xt_compat_init_offsets(AF_INET6, compatr->num_entries);
/* Walk through entries, checking offsets. */
- xt_entry_foreach(iter0, entry0, total_size) {
+ xt_entry_foreach(iter0, entry0, compatr->size) {
ret = check_compat_entry_size_and_hooks(iter0, info, &size,
entry0,
- entry0 + total_size,
- hook_entries,
- underflows,
- name);
+ entry0 + compatr->size);
if (ret != 0)
goto out_unlock;
++j;
}
ret = -EINVAL;
- if (j != number) {
- duprintf("translate_compat_table: %u not %u entries\n",
- j, number);
+ if (j != compatr->num_entries)
goto out_unlock;
- }
-
- /* Check hooks all assigned */
- for (i = 0; i < NF_INET_NUMHOOKS; i++) {
- /* Only hooks which are valid */
- if (!(valid_hooks & (1 << i)))
- continue;
- if (info->hook_entry[i] == 0xFFFFFFFF) {
- duprintf("Invalid hook entry %u %u\n",
- i, hook_entries[i]);
- goto out_unlock;
- }
- if (info->underflow[i] == 0xFFFFFFFF) {
- duprintf("Invalid underflow %u %u\n",
- i, underflows[i]);
- goto out_unlock;
- }
- }
ret = -ENOMEM;
newinfo = xt_alloc_table_info(size);
if (!newinfo)
goto out_unlock;
- newinfo->number = number;
+ newinfo->number = compatr->num_entries;
for (i = 0; i < NF_INET_NUMHOOKS; i++) {
- newinfo->hook_entry[i] = info->hook_entry[i];
- newinfo->underflow[i] = info->underflow[i];
+ newinfo->hook_entry[i] = compatr->hook_entry[i];
+ newinfo->underflow[i] = compatr->underflow[i];
}
entry1 = newinfo->entries;
pos = entry1;
- size = total_size;
- xt_entry_foreach(iter0, entry0, total_size) {
- ret = compat_copy_entry_from_user(iter0, &pos, &size,
- name, newinfo, entry1);
- if (ret != 0)
- break;
- }
+ size = compatr->size;
+ xt_entry_foreach(iter0, entry0, compatr->size)
+ compat_copy_entry_from_user(iter0, &pos, &size,
+ newinfo, entry1);
+
+ /* all module references in entry0 are now gone. */
xt_compat_flush_offsets(AF_INET6);
xt_compat_unlock(AF_INET6);
- if (ret)
- goto free_newinfo;
- ret = -ELOOP;
- if (!mark_source_chains(newinfo, valid_hooks, entry1))
- goto free_newinfo;
+ memcpy(&repl, compatr, sizeof(*compatr));
- i = 0;
- xt_entry_foreach(iter1, entry1, newinfo->size) {
- ret = compat_check_entry(iter1, net, name);
- if (ret != 0)
- break;
- ++i;
- if (strcmp(ip6t_get_target(iter1)->u.user.name,
- XT_ERROR_TARGET) == 0)
- ++newinfo->stacksize;
- }
- if (ret) {
- /*
- * The first i matches need cleanup_entry (calls ->destroy)
- * because they had called ->check already. The other j-i
- * entries need only release.
- */
- int skip = i;
- j -= i;
- xt_entry_foreach(iter0, entry0, newinfo->size) {
- if (skip-- > 0)
- continue;
- if (j-- == 0)
- break;
- compat_release_entry(iter0);
- }
- xt_entry_foreach(iter1, entry1, newinfo->size) {
- if (i-- == 0)
- break;
- cleanup_entry(iter1, net);
- }
- xt_free_table_info(newinfo);
- return ret;
+ for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+ repl.hook_entry[i] = newinfo->hook_entry[i];
+ repl.underflow[i] = newinfo->underflow[i];
}
+ repl.num_counters = 0;
+ repl.counters = NULL;
+ repl.size = newinfo->size;
+ ret = translate_table(net, newinfo, entry1, &repl);
+ if (ret)
+ goto free_newinfo;
+
*pinfo = newinfo;
*pentry0 = entry1;
xt_free_table_info(info);
@@ -1794,17 +1509,16 @@ translate_compat_table(struct net *net,
free_newinfo:
xt_free_table_info(newinfo);
-out:
- xt_entry_foreach(iter0, entry0, total_size) {
+ return ret;
+out_unlock:
+ xt_compat_flush_offsets(AF_INET6);
+ xt_compat_unlock(AF_INET6);
+ xt_entry_foreach(iter0, entry0, compatr->size) {
if (j-- == 0)
break;
compat_release_entry(iter0);
}
return ret;
-out_unlock:
- xt_compat_flush_offsets(AF_INET6);
- xt_compat_unlock(AF_INET6);
- goto out;
}
static int
@@ -1820,8 +1534,6 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
return -EFAULT;
/* overflow check */
- if (tmp.size >= INT_MAX / num_possible_cpus())
- return -ENOMEM;
if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
return -ENOMEM;
if (tmp.num_counters == 0)
@@ -1840,15 +1552,10 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
goto free_newinfo;
}
- ret = translate_compat_table(net, tmp.name, tmp.valid_hooks,
- &newinfo, &loc_cpu_entry, tmp.size,
- tmp.num_entries, tmp.hook_entry,
- tmp.underflow);
+ ret = translate_compat_table(net, &newinfo, &loc_cpu_entry, &tmp);
if (ret != 0)
goto free_newinfo;
- duprintf("compat_do_replace: Translated table\n");
-
ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
tmp.num_counters, compat_ptr(tmp.counters));
if (ret)
@@ -1882,7 +1589,6 @@ compat_do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user,
break;
default:
- duprintf("do_ip6t_set_ctl: unknown request %i\n", cmd);
ret = -EINVAL;
}
@@ -1932,19 +1638,15 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr,
struct compat_ip6t_get_entries get;
struct xt_table *t;
- if (*len < sizeof(get)) {
- duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
+ if (*len < sizeof(get))
return -EINVAL;
- }
if (copy_from_user(&get, uptr, sizeof(get)) != 0)
return -EFAULT;
- if (*len != sizeof(struct compat_ip6t_get_entries) + get.size) {
- duprintf("compat_get_entries: %u != %zu\n",
- *len, sizeof(get) + get.size);
+ if (*len != sizeof(struct compat_ip6t_get_entries) + get.size)
return -EINVAL;
- }
+
get.name[sizeof(get.name) - 1] = '\0';
xt_compat_lock(AF_INET6);
@@ -1952,16 +1654,13 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr,
if (!IS_ERR_OR_NULL(t)) {
const struct xt_table_info *private = t->private;
struct xt_table_info info;
- duprintf("t->private->number = %u\n", private->number);
ret = compat_table_info(private, &info);
- if (!ret && get.size == info.size) {
+ if (!ret && get.size == info.size)
ret = compat_copy_entries_to_user(private->size,
t, uptr->entrytable);
- } else if (!ret) {
- duprintf("compat_get_entries: I've got %u not %u!\n",
- private->size, get.size);
+ else if (!ret)
ret = -EAGAIN;
- }
+
xt_compat_flush_offsets(AF_INET6);
module_put(t->me);
xt_table_unlock(t);
@@ -2014,7 +1713,6 @@ do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
break;
default:
- duprintf("do_ip6t_set_ctl: unknown request %i\n", cmd);
ret = -EINVAL;
}
@@ -2066,7 +1764,6 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
}
default:
- duprintf("do_ip6t_get_ctl: unknown request %i\n", cmd);
ret = -EINVAL;
}
@@ -2168,7 +1865,6 @@ icmp6_match(const struct sk_buff *skb, struct xt_action_param *par)
/* We've been asked to examine this packet, and we
* can't. Hence, no choice but to drop.
*/
- duprintf("Dropping evil ICMP tinygram.\n");
par->hotdrop = true;
return false;
}
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index 3deed5860a42..06bed74cf5ee 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -20,15 +20,16 @@
#include <net/netfilter/nf_conntrack_synproxy.h>
static struct ipv6hdr *
-synproxy_build_ip(struct sk_buff *skb, const struct in6_addr *saddr,
- const struct in6_addr *daddr)
+synproxy_build_ip(struct net *net, struct sk_buff *skb,
+ const struct in6_addr *saddr,
+ const struct in6_addr *daddr)
{
struct ipv6hdr *iph;
skb_reset_network_header(skb);
iph = (struct ipv6hdr *)skb_put(skb, sizeof(*iph));
ip6_flow_hdr(iph, 0, 0);
- iph->hop_limit = 64; //XXX
+ iph->hop_limit = net->ipv6.devconf_all->hop_limit;
iph->nexthdr = IPPROTO_TCP;
iph->saddr = *saddr;
iph->daddr = *daddr;
@@ -37,13 +38,12 @@ synproxy_build_ip(struct sk_buff *skb, const struct in6_addr *saddr,
}
static void
-synproxy_send_tcp(const struct synproxy_net *snet,
+synproxy_send_tcp(struct net *net,
const struct sk_buff *skb, struct sk_buff *nskb,
struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo,
struct ipv6hdr *niph, struct tcphdr *nth,
unsigned int tcp_hdr_size)
{
- struct net *net = nf_ct_net(snet->tmpl);
struct dst_entry *dst;
struct flowi6 fl6;
@@ -60,7 +60,7 @@ synproxy_send_tcp(const struct synproxy_net *snet,
fl6.fl6_dport = nth->dest;
security_skb_classify_flow((struct sk_buff *)skb, flowi6_to_flowi(&fl6));
dst = ip6_route_output(net, NULL, &fl6);
- if (dst == NULL || dst->error) {
+ if (dst->error) {
dst_release(dst);
goto free_nskb;
}
@@ -84,7 +84,7 @@ free_nskb:
}
static void
-synproxy_send_client_synack(const struct synproxy_net *snet,
+synproxy_send_client_synack(struct net *net,
const struct sk_buff *skb, const struct tcphdr *th,
const struct synproxy_options *opts)
{
@@ -103,7 +103,7 @@ synproxy_send_client_synack(const struct synproxy_net *snet,
return;
skb_reserve(nskb, MAX_TCP_HEADER);
- niph = synproxy_build_ip(nskb, &iph->daddr, &iph->saddr);
+ niph = synproxy_build_ip(net, nskb, &iph->daddr, &iph->saddr);
skb_reset_transport_header(nskb);
nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
@@ -121,15 +121,16 @@ synproxy_send_client_synack(const struct synproxy_net *snet,
synproxy_build_options(nth, opts);
- synproxy_send_tcp(snet, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
+ synproxy_send_tcp(net, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
niph, nth, tcp_hdr_size);
}
static void
-synproxy_send_server_syn(const struct synproxy_net *snet,
+synproxy_send_server_syn(struct net *net,
const struct sk_buff *skb, const struct tcphdr *th,
const struct synproxy_options *opts, u32 recv_seq)
{
+ struct synproxy_net *snet = synproxy_pernet(net);
struct sk_buff *nskb;
struct ipv6hdr *iph, *niph;
struct tcphdr *nth;
@@ -144,7 +145,7 @@ synproxy_send_server_syn(const struct synproxy_net *snet,
return;
skb_reserve(nskb, MAX_TCP_HEADER);
- niph = synproxy_build_ip(nskb, &iph->saddr, &iph->daddr);
+ niph = synproxy_build_ip(net, nskb, &iph->saddr, &iph->daddr);
skb_reset_transport_header(nskb);
nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
@@ -165,12 +166,12 @@ synproxy_send_server_syn(const struct synproxy_net *snet,
synproxy_build_options(nth, opts);
- synproxy_send_tcp(snet, skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW,
+ synproxy_send_tcp(net, skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW,
niph, nth, tcp_hdr_size);
}
static void
-synproxy_send_server_ack(const struct synproxy_net *snet,
+synproxy_send_server_ack(struct net *net,
const struct ip_ct_tcp *state,
const struct sk_buff *skb, const struct tcphdr *th,
const struct synproxy_options *opts)
@@ -189,7 +190,7 @@ synproxy_send_server_ack(const struct synproxy_net *snet,
return;
skb_reserve(nskb, MAX_TCP_HEADER);
- niph = synproxy_build_ip(nskb, &iph->daddr, &iph->saddr);
+ niph = synproxy_build_ip(net, nskb, &iph->daddr, &iph->saddr);
skb_reset_transport_header(nskb);
nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
@@ -205,11 +206,11 @@ synproxy_send_server_ack(const struct synproxy_net *snet,
synproxy_build_options(nth, opts);
- synproxy_send_tcp(snet, skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
+ synproxy_send_tcp(net, skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
}
static void
-synproxy_send_client_ack(const struct synproxy_net *snet,
+synproxy_send_client_ack(struct net *net,
const struct sk_buff *skb, const struct tcphdr *th,
const struct synproxy_options *opts)
{
@@ -227,7 +228,7 @@ synproxy_send_client_ack(const struct synproxy_net *snet,
return;
skb_reserve(nskb, MAX_TCP_HEADER);
- niph = synproxy_build_ip(nskb, &iph->saddr, &iph->daddr);
+ niph = synproxy_build_ip(net, nskb, &iph->saddr, &iph->daddr);
skb_reset_transport_header(nskb);
nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
@@ -243,15 +244,16 @@ synproxy_send_client_ack(const struct synproxy_net *snet,
synproxy_build_options(nth, opts);
- synproxy_send_tcp(snet, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
+ synproxy_send_tcp(net, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
niph, nth, tcp_hdr_size);
}
static bool
-synproxy_recv_client_ack(const struct synproxy_net *snet,
+synproxy_recv_client_ack(struct net *net,
const struct sk_buff *skb, const struct tcphdr *th,
struct synproxy_options *opts, u32 recv_seq)
{
+ struct synproxy_net *snet = synproxy_pernet(net);
int mss;
mss = __cookie_v6_check(ipv6_hdr(skb), th, ntohl(th->ack_seq) - 1);
@@ -267,7 +269,7 @@ synproxy_recv_client_ack(const struct synproxy_net *snet,
if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP)
synproxy_check_timestamp_cookie(opts);
- synproxy_send_server_syn(snet, skb, th, opts, recv_seq);
+ synproxy_send_server_syn(net, skb, th, opts, recv_seq);
return true;
}
@@ -275,7 +277,8 @@ static unsigned int
synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_synproxy_info *info = par->targinfo;
- struct synproxy_net *snet = synproxy_pernet(par->net);
+ struct net *net = par->net;
+ struct synproxy_net *snet = synproxy_pernet(net);
struct synproxy_options opts = {};
struct tcphdr *th, _th;
@@ -304,12 +307,12 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
XT_SYNPROXY_OPT_SACK_PERM |
XT_SYNPROXY_OPT_ECN);
- synproxy_send_client_synack(snet, skb, th, &opts);
+ synproxy_send_client_synack(net, skb, th, &opts);
return NF_DROP;
} else if (th->ack && !(th->fin || th->rst || th->syn)) {
/* ACK from client */
- synproxy_recv_client_ack(snet, skb, th, &opts, ntohl(th->seq));
+ synproxy_recv_client_ack(net, skb, th, &opts, ntohl(th->seq));
return NF_DROP;
}
@@ -320,7 +323,8 @@ static unsigned int ipv6_synproxy_hook(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *nhs)
{
- struct synproxy_net *snet = synproxy_pernet(nhs->net);
+ struct net *net = nhs->net;
+ struct synproxy_net *snet = synproxy_pernet(net);
enum ip_conntrack_info ctinfo;
struct nf_conn *ct;
struct nf_conn_synproxy *synproxy;
@@ -384,7 +388,7 @@ static unsigned int ipv6_synproxy_hook(void *priv,
* therefore we need to add 1 to make the SYN sequence
* number match the one of first SYN.
*/
- if (synproxy_recv_client_ack(snet, skb, th, &opts,
+ if (synproxy_recv_client_ack(net, skb, th, &opts,
ntohl(th->seq) + 1))
this_cpu_inc(snet->stats->cookie_retrans);
@@ -410,12 +414,12 @@ static unsigned int ipv6_synproxy_hook(void *priv,
XT_SYNPROXY_OPT_SACK_PERM);
swap(opts.tsval, opts.tsecr);
- synproxy_send_server_ack(snet, state, skb, th, &opts);
+ synproxy_send_server_ack(net, state, skb, th, &opts);
nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq));
swap(opts.tsval, opts.tsecr);
- synproxy_send_client_ack(snet, skb, th, &opts);
+ synproxy_send_client_ack(net, skb, th, &opts);
consume_skb(skb);
return NF_STOLEN;
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index cb2b28883252..2b1a9dcdbcb3 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -83,10 +83,6 @@ ip6table_mangle_hook(void *priv, struct sk_buff *skb,
{
if (state->hook == NF_INET_LOCAL_OUT)
return ip6t_mangle_out(skb, state);
- if (state->hook == NF_INET_POST_ROUTING)
- return ip6t_do_table(skb, state,
- state->net->ipv6.ip6table_mangle);
- /* INPUT/FORWARD */
return ip6t_do_table(skb, state, state->net->ipv6.ip6table_mangle);
}
diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c
index 6989c70ae29f..4a84b5ad9ecb 100644
--- a/net/ipv6/netfilter/nf_dup_ipv6.c
+++ b/net/ipv6/netfilter/nf_dup_ipv6.c
@@ -33,6 +33,7 @@ static bool nf_dup_ipv6_route(struct net *net, struct sk_buff *skb,
fl6.daddr = *gw;
fl6.flowlabel = (__force __be32)(((iph->flow_lbl[0] & 0xF) << 16) |
(iph->flow_lbl[1] << 8) | iph->flow_lbl[2]);
+ fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH;
dst = ip6_route_output(net, NULL, &fl6);
if (dst->error) {
dst_release(dst);
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index 4709f657b7b6..a5400223fd74 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -158,7 +158,7 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
fl6.fl6_dport = otcph->source;
security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6));
dst = ip6_route_output(net, NULL, &fl6);
- if (dst == NULL || dst->error) {
+ if (dst->error) {
dst_release(dst);
return;
}
diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c
index 71d995ff3108..2535223ba956 100644
--- a/net/ipv6/netfilter/nft_chain_route_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c
@@ -31,6 +31,7 @@ static unsigned int nf_route_table_hook(void *priv,
struct in6_addr saddr, daddr;
u_int8_t hop_limit;
u32 mark, flowlabel;
+ int err;
/* malformed packet, drop it */
if (nft_set_pktinfo_ipv6(&pkt, skb, state) < 0)
@@ -46,13 +47,16 @@ static unsigned int nf_route_table_hook(void *priv,
flowlabel = *((u32 *)ipv6_hdr(skb));
ret = nft_do_chain(&pkt, priv);
- if (ret != NF_DROP && ret != NF_QUEUE &&
+ if (ret != NF_DROP && ret != NF_STOLEN &&
(memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) ||
memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) ||
skb->mark != mark ||
ipv6_hdr(skb)->hop_limit != hop_limit ||
- flowlabel != *((u_int32_t *)ipv6_hdr(skb))))
- return ip6_route_me_harder(state->net, skb) == 0 ? ret : NF_DROP;
+ flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) {
+ err = ip6_route_me_harder(state->net, skb);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
return ret;
}
diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c
index 533cd5719c59..92bda9908bb9 100644
--- a/net/ipv6/netfilter/nft_reject_ipv6.c
+++ b/net/ipv6/netfilter/nft_reject_ipv6.c
@@ -47,6 +47,7 @@ static const struct nft_expr_ops nft_reject_ipv6_ops = {
.eval = nft_reject_ipv6_eval,
.init = nft_reject_init,
.dump = nft_reject_dump,
+ .validate = nft_reject_validate,
};
static struct nft_expr_type nft_reject_ipv6_type __read_mostly = {
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index c382db7a2e73..0e983b694ee8 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -55,13 +55,14 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
struct icmp6hdr user_icmph;
int addr_type;
struct in6_addr *daddr;
- int iif = 0;
+ int oif = 0;
struct flowi6 fl6;
int err;
- int hlimit;
struct dst_entry *dst;
struct rt6_info *rt;
struct pingfakehdr pfh;
+ struct sockcm_cookie junk = {0};
+ struct ipcm6_cookie ipc6;
pr_debug("ping_v6_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num);
@@ -77,25 +78,30 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (u->sin6_family != AF_INET6) {
return -EAFNOSUPPORT;
}
- if (sk->sk_bound_dev_if &&
- sk->sk_bound_dev_if != u->sin6_scope_id) {
- return -EINVAL;
- }
daddr = &(u->sin6_addr);
- iif = u->sin6_scope_id;
+ if (__ipv6_addr_needs_scope_id(ipv6_addr_type(daddr)))
+ oif = u->sin6_scope_id;
} else {
if (sk->sk_state != TCP_ESTABLISHED)
return -EDESTADDRREQ;
daddr = &sk->sk_v6_daddr;
}
- if (!iif)
- iif = sk->sk_bound_dev_if;
+ if (!oif)
+ oif = sk->sk_bound_dev_if;
+
+ if (!oif)
+ oif = np->sticky_pktinfo.ipi6_ifindex;
+
+ if (!oif && ipv6_addr_is_multicast(daddr))
+ oif = np->mcast_oif;
+ else if (!oif)
+ oif = np->ucast_oif;
addr_type = ipv6_addr_type(daddr);
- if (__ipv6_addr_needs_scope_id(addr_type) && !iif)
- return -EINVAL;
- if (addr_type & IPV6_ADDR_MAPPED)
+ if ((__ipv6_addr_needs_scope_id(addr_type) && !oif) ||
+ (addr_type & IPV6_ADDR_MAPPED) ||
+ (oif && sk->sk_bound_dev_if && oif != sk->sk_bound_dev_if))
return -EINVAL;
/* TODO: use ip6_datagram_send_ctl to get options from cmsg */
@@ -105,15 +111,14 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl6.flowi6_proto = IPPROTO_ICMPV6;
fl6.saddr = np->saddr;
fl6.daddr = *daddr;
+ fl6.flowi6_oif = oif;
fl6.flowi6_mark = sk->sk_mark;
fl6.fl6_icmp_type = user_icmph.icmp6_type;
fl6.fl6_icmp_code = user_icmph.icmp6_code;
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
- if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
- fl6.flowi6_oif = np->mcast_oif;
- else if (!fl6.flowi6_oif)
- fl6.flowi6_oif = np->ucast_oif;
+ ipc6.tclass = np->tclass;
+ fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr);
if (IS_ERR(dst))
@@ -121,8 +126,10 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
rt = (struct rt6_info *) dst;
np = inet6_sk(sk);
- if (!np)
- return -EBADF;
+ if (!np) {
+ err = -EBADF;
+ goto dst_err_out;
+ }
if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
fl6.flowi6_oif = np->mcast_oif;
@@ -138,13 +145,14 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
pfh.wcheck = 0;
pfh.family = AF_INET6;
- hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
+ ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
+ ipc6.dontfrag = np->dontfrag;
+ ipc6.opt = NULL;
lock_sock(sk);
err = ip6_append_data(sk, ping_getfrag, &pfh, len,
- 0, hlimit,
- np->tclass, NULL, &fl6, rt,
- MSG_DONTWAIT, np->dontfrag);
+ 0, &ipc6, &fl6, rt,
+ MSG_DONTWAIT, &junk);
if (err) {
ICMP6_INC_STATS(sock_net(sk), rt->rt6i_idev,
@@ -157,6 +165,9 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
}
release_sock(sk);
+dst_err_out:
+ dst_release(dst);
+
if (err)
return err;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index fa59dd7a427e..590dd1f7746f 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -745,10 +745,9 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
struct dst_entry *dst = NULL;
struct raw6_frag_vec rfv;
struct flowi6 fl6;
+ struct sockcm_cookie sockc;
+ struct ipcm6_cookie ipc6;
int addr_len = msg->msg_namelen;
- int hlimit = -1;
- int tclass = -1;
- int dontfrag = -1;
u16 proto;
int err;
@@ -769,6 +768,11 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl6.flowi6_mark = sk->sk_mark;
+ ipc6.hlimit = -1;
+ ipc6.tclass = -1;
+ ipc6.dontfrag = -1;
+ ipc6.opt = NULL;
+
if (sin6) {
if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL;
@@ -821,13 +825,14 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (fl6.flowi6_oif == 0)
fl6.flowi6_oif = sk->sk_bound_dev_if;
+ sockc.tsflags = sk->sk_tsflags;
if (msg->msg_controllen) {
opt = &opt_space;
memset(opt, 0, sizeof(struct ipv6_txoptions));
opt->tot_len = sizeof(struct ipv6_txoptions);
+ ipc6.opt = opt;
- err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt,
- &hlimit, &tclass, &dontfrag);
+ err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6, &sockc);
if (err < 0) {
fl6_sock_release(flowlabel);
return err;
@@ -843,7 +848,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (!opt) {
opt = txopt_get(np);
opt_to_free = opt;
- }
+ }
if (flowlabel)
opt = fl6_merge_options(&opt_space, flowlabel, opt);
opt = ipv6_fixup_options(&opt_space, opt);
@@ -873,19 +878,21 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (inet->hdrincl)
fl6.flowi6_flags |= FLOWI_FLAG_KNOWN_NH;
+ if (ipc6.tclass < 0)
+ ipc6.tclass = np->tclass;
+
+ fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
+
dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
if (IS_ERR(dst)) {
err = PTR_ERR(dst);
goto out;
}
- if (hlimit < 0)
- hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
-
- if (tclass < 0)
- tclass = np->tclass;
+ if (ipc6.hlimit < 0)
+ ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
- if (dontfrag < 0)
- dontfrag = np->dontfrag;
+ if (ipc6.dontfrag < 0)
+ ipc6.dontfrag = np->dontfrag;
if (msg->msg_flags&MSG_CONFIRM)
goto do_confirm;
@@ -894,10 +901,11 @@ back_from_confirm:
if (inet->hdrincl)
err = rawv6_send_hdrinc(sk, msg, len, &fl6, &dst, msg->msg_flags);
else {
+ ipc6.opt = opt;
lock_sock(sk);
err = ip6_append_data(sk, raw6_getfrag, &rfv,
- len, 0, hlimit, tclass, opt, &fl6, (struct rt6_info *)dst,
- msg->msg_flags, dontfrag);
+ len, 0, &ipc6, &fl6, (struct rt6_info *)dst,
+ msg->msg_flags, &sockc);
if (err)
ip6_flush_pending_frames(sk);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index e2ea31175ef9..2160d5d009cb 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -145,12 +145,12 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
if (!dev)
goto out_rcu_unlock;
- IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
+ __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
if (inet_frag_evicting(&fq->q))
goto out_rcu_unlock;
- IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
+ __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
/* Don't send error if the first segment did not arrive. */
if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !fq->q.fragments)
@@ -223,8 +223,8 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
if ((unsigned int)end > IPV6_MAXPLEN) {
- IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
((u8 *)&fhdr->frag_off -
skb_network_header(skb)));
@@ -258,8 +258,8 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
/* RFC2460 says always send parameter problem in
* this case. -DaveM
*/
- IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
offsetof(struct ipv6hdr, payload_len));
return -1;
@@ -361,8 +361,8 @@ found:
discard_fq:
inet_frag_kill(&fq->q, &ip6_frags);
err:
- IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_REASMFAILS);
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_REASMFAILS);
kfree_skb(skb);
return -1;
}
@@ -500,7 +500,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
skb_network_header_len(head));
rcu_read_lock();
- IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
+ __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
rcu_read_unlock();
fq->q.fragments = NULL;
fq->q.fragments_tail = NULL;
@@ -513,7 +513,7 @@ out_oom:
net_dbg_ratelimited("ip6_frag_reasm: no memory for reassembly\n");
out_fail:
rcu_read_lock();
- IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
+ __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
rcu_read_unlock();
return -1;
}
@@ -528,7 +528,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED)
goto fail_hdr;
- IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS);
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS);
/* Jumbo payload inhibits frag. header */
if (hdr->payload_len == 0)
@@ -544,8 +544,8 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
if (!(fhdr->frag_off & htons(0xFFF9))) {
/* It is not a fragmented frame */
skb->transport_header += sizeof(struct frag_hdr);
- IP6_INC_STATS_BH(net,
- ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMOKS);
+ __IP6_INC_STATS(net,
+ ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMOKS);
IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb);
IP6CB(skb)->flags |= IP6SKB_FRAGMENTED;
@@ -566,13 +566,13 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
return ret;
}
- IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS);
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS);
kfree_skb(skb);
return -1;
fail_hdr:
- IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_INHDRERRORS);
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb_network_header_len(skb));
return -1;
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 6f32944e0223..e3a224b97905 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1042,8 +1042,8 @@ static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
return pcpu_rt;
}
-static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
- struct flowi6 *fl6, int flags)
+struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
+ int oif, struct flowi6 *fl6, int flags)
{
struct fib6_node *fn, *saved_fn;
struct rt6_info *rt;
@@ -1139,6 +1139,7 @@ redo_rt6_select:
}
}
+EXPORT_SYMBOL_GPL(ip6_pol_route);
static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
struct flowi6 *fl6, int flags)
@@ -1190,7 +1191,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
struct dst_entry *dst;
bool any_src;
- dst = l3mdev_rt6_dst_by_oif(net, fl6);
+ dst = l3mdev_get_rt6_dst(net, fl6);
if (dst)
return dst;
@@ -1771,6 +1772,37 @@ static int ip6_convert_metrics(struct mx6_config *mxc,
return -EINVAL;
}
+static struct rt6_info *ip6_nh_lookup_table(struct net *net,
+ struct fib6_config *cfg,
+ const struct in6_addr *gw_addr)
+{
+ struct flowi6 fl6 = {
+ .flowi6_oif = cfg->fc_ifindex,
+ .daddr = *gw_addr,
+ .saddr = cfg->fc_prefsrc,
+ };
+ struct fib6_table *table;
+ struct rt6_info *rt;
+ int flags = RT6_LOOKUP_F_IFACE;
+
+ table = fib6_get_table(net, cfg->fc_table);
+ if (!table)
+ return NULL;
+
+ if (!ipv6_addr_any(&cfg->fc_prefsrc))
+ flags |= RT6_LOOKUP_F_HAS_SADDR;
+
+ rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
+
+ /* if table lookup failed, fall back to full lookup */
+ if (rt == net->ipv6.ip6_null_entry) {
+ ip6_rt_put(rt);
+ rt = NULL;
+ }
+
+ return rt;
+}
+
static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
{
struct net *net = cfg->fc_nlinfo.nl_net;
@@ -1942,7 +1974,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
rt->rt6i_gateway = *gw_addr;
if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
- struct rt6_info *grt;
+ struct rt6_info *grt = NULL;
/* IPv6 strictly inhibits using not link-local
addresses as nexthop address.
@@ -1954,7 +1986,21 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
if (!(gwa_type & IPV6_ADDR_UNICAST))
goto out;
- grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
+ if (cfg->fc_table) {
+ grt = ip6_nh_lookup_table(net, cfg, gw_addr);
+
+ if (grt) {
+ if (grt->rt6i_flags & RTF_GATEWAY ||
+ (dev && dev != grt->dst.dev)) {
+ ip6_rt_put(grt);
+ grt = NULL;
+ }
+ }
+ }
+
+ if (!grt)
+ grt = rt6_lookup(net, gw_addr, NULL,
+ cfg->fc_ifindex, 1);
err = -EHOSTUNREACH;
if (!grt)
@@ -2164,7 +2210,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
* first-hop router for the specified ICMP Destination Address.
*/
- if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
+ if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
return;
}
@@ -2199,12 +2245,12 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
* We have finally decided to accept it.
*/
- neigh_update(neigh, lladdr, NUD_STALE,
+ ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
NEIGH_UPDATE_F_WEAK_OVERRIDE|
NEIGH_UPDATE_F_OVERRIDE|
(on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
- NEIGH_UPDATE_F_ISROUTER))
- );
+ NEIGH_UPDATE_F_ISROUTER)),
+ NDISC_REDIRECT, &ndopts);
nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
if (!nrt)
@@ -2549,23 +2595,6 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
return rt;
}
-int ip6_route_get_saddr(struct net *net,
- struct rt6_info *rt,
- const struct in6_addr *daddr,
- unsigned int prefs,
- struct in6_addr *saddr)
-{
- struct inet6_dev *idev =
- rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
- int err = 0;
- if (rt && rt->rt6i_prefsrc.plen)
- *saddr = rt->rt6i_prefsrc.addr;
- else
- err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
- daddr, prefs, saddr);
- return err;
-}
-
/* remove deleted ip from prefsrc entries */
struct arg_dev_net_ip {
struct net_device *dev;
@@ -3270,6 +3299,8 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
err = -EINVAL;
memset(&fl6, 0, sizeof(fl6));
+ rtm = nlmsg_data(nlh);
+ fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
if (tb[RTA_SRC]) {
if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 83384308d032..182b6a9be29d 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -479,47 +479,12 @@ static void ipip6_tunnel_uninit(struct net_device *dev)
dev_put(dev);
}
-/* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
- * if sufficient data bytes are available
- */
-static int ipip6_err_gen_icmpv6_unreach(struct sk_buff *skb)
-{
- int ihl = ((const struct iphdr *)skb->data)->ihl*4;
- struct rt6_info *rt;
- struct sk_buff *skb2;
-
- if (!pskb_may_pull(skb, ihl + sizeof(struct ipv6hdr) + 8))
- return 1;
-
- skb2 = skb_clone(skb, GFP_ATOMIC);
-
- if (!skb2)
- return 1;
-
- skb_dst_drop(skb2);
- skb_pull(skb2, ihl);
- skb_reset_network_header(skb2);
-
- rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0);
-
- if (rt && rt->dst.dev)
- skb2->dev = rt->dst.dev;
-
- icmpv6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
-
- if (rt)
- ip6_rt_put(rt);
-
- kfree_skb(skb2);
-
- return 0;
-}
-
static int ipip6_err(struct sk_buff *skb, u32 info)
{
const struct iphdr *iph = (const struct iphdr *)skb->data;
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
+ unsigned int data_len = 0;
struct ip_tunnel *t;
int err;
@@ -544,6 +509,7 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
case ICMP_TIME_EXCEEDED:
if (code != ICMP_EXC_TTL)
return 0;
+ data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
break;
case ICMP_REDIRECT:
break;
@@ -560,22 +526,22 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
ipv4_update_pmtu(skb, dev_net(skb->dev), info,
- t->parms.link, 0, IPPROTO_IPV6, 0);
+ t->parms.link, 0, iph->protocol, 0);
err = 0;
goto out;
}
if (type == ICMP_REDIRECT) {
ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
- IPPROTO_IPV6, 0);
+ iph->protocol, 0);
err = 0;
goto out;
}
- if (t->parms.iph.daddr == 0)
+ err = 0;
+ if (!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4, type, data_len))
goto out;
- err = 0;
- if (!ipip6_err_gen_icmpv6_unreach(skb))
+ if (t->parms.iph.daddr == 0)
goto out;
if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
@@ -722,12 +688,19 @@ out:
return 0;
}
-static const struct tnl_ptk_info tpi = {
+static const struct tnl_ptk_info ipip_tpi = {
/* no tunnel info required for ipip. */
.proto = htons(ETH_P_IP),
};
-static int ipip_rcv(struct sk_buff *skb)
+#if IS_ENABLED(CONFIG_MPLS)
+static const struct tnl_ptk_info mplsip_tpi = {
+ /* no tunnel info required for mplsip. */
+ .proto = htons(ETH_P_MPLS_UC),
+};
+#endif
+
+static int sit_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
{
const struct iphdr *iph;
struct ip_tunnel *tunnel;
@@ -736,15 +709,23 @@ static int ipip_rcv(struct sk_buff *skb)
tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
iph->saddr, iph->daddr);
if (tunnel) {
- if (tunnel->parms.iph.protocol != IPPROTO_IPIP &&
+ const struct tnl_ptk_info *tpi;
+
+ if (tunnel->parms.iph.protocol != ipproto &&
tunnel->parms.iph.protocol != 0)
goto drop;
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
goto drop;
- if (iptunnel_pull_header(skb, 0, tpi.proto, false))
+#if IS_ENABLED(CONFIG_MPLS)
+ if (ipproto == IPPROTO_MPLS)
+ tpi = &mplsip_tpi;
+ else
+#endif
+ tpi = &ipip_tpi;
+ if (iptunnel_pull_header(skb, 0, tpi->proto, false))
goto drop;
- return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, log_ecn_error);
+ return ip_tunnel_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
}
return 1;
@@ -754,6 +735,18 @@ drop:
return 0;
}
+static int ipip_rcv(struct sk_buff *skb)
+{
+ return sit_tunnel_rcv(skb, IPPROTO_IPIP);
+}
+
+#if IS_ENABLED(CONFIG_MPLS)
+static int mplsip_rcv(struct sk_buff *skb)
+{
+ return sit_tunnel_rcv(skb, IPPROTO_MPLS);
+}
+#endif
+
/*
* If the IPv6 address comes from 6rd / 6to4 (RFC 3056) addr space this function
* stores the embedded IPv4 address in v4dst and returns true.
@@ -825,9 +818,6 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
u8 protocol = IPPROTO_IPV6;
int t_hlen = tunnel->hlen + sizeof(struct iphdr);
- if (skb->protocol != htons(ETH_P_IPV6))
- goto tx_error;
-
if (tos == 1)
tos = ipv6_get_dsfield(iph6);
@@ -913,10 +903,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
goto tx_error;
}
- skb = iptunnel_handle_offloads(skb, SKB_GSO_SIT);
- if (IS_ERR(skb)) {
+ if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4)) {
ip_rt_put(rt);
- goto out;
+ goto tx_error;
}
if (df) {
@@ -992,25 +981,25 @@ tx_error_icmp:
dst_link_failure(skb);
tx_error:
kfree_skb(skb);
-out:
dev->stats.tx_errors++;
return NETDEV_TX_OK;
}
-static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t sit_tunnel_xmit__(struct sk_buff *skb,
+ struct net_device *dev, u8 ipproto)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
const struct iphdr *tiph = &tunnel->parms.iph;
- skb = iptunnel_handle_offloads(skb, SKB_GSO_IPIP);
- if (IS_ERR(skb))
- goto out;
+ if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4))
+ goto tx_error;
- skb_set_inner_ipproto(skb, IPPROTO_IPIP);
+ skb_set_inner_ipproto(skb, ipproto);
- ip_tunnel_xmit(skb, dev, tiph, IPPROTO_IPIP);
+ ip_tunnel_xmit(skb, dev, tiph, ipproto);
return NETDEV_TX_OK;
-out:
+tx_error:
+ kfree_skb(skb);
dev->stats.tx_errors++;
return NETDEV_TX_OK;
}
@@ -1020,11 +1009,16 @@ static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb,
{
switch (skb->protocol) {
case htons(ETH_P_IP):
- ipip_tunnel_xmit(skb, dev);
+ sit_tunnel_xmit__(skb, dev, IPPROTO_IPIP);
break;
case htons(ETH_P_IPV6):
ipip6_tunnel_xmit(skb, dev);
break;
+#if IS_ENABLED(CONFIG_MPLS)
+ case htons(ETH_P_MPLS_UC):
+ sit_tunnel_xmit__(skb, dev, IPPROTO_MPLS);
+ break;
+#endif
default:
goto tx_err;
}
@@ -1132,6 +1126,16 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t,
}
#endif
+bool ipip6_valid_ip_proto(u8 ipproto)
+{
+ return ipproto == IPPROTO_IPV6 ||
+ ipproto == IPPROTO_IPIP ||
+#if IS_ENABLED(CONFIG_MPLS)
+ ipproto == IPPROTO_MPLS ||
+#endif
+ ipproto == 0;
+}
+
static int
ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
@@ -1191,9 +1195,7 @@ ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
goto done;
err = -EINVAL;
- if (p.iph.protocol != IPPROTO_IPV6 &&
- p.iph.protocol != IPPROTO_IPIP &&
- p.iph.protocol != 0)
+ if (!ipip6_valid_ip_proto(p.iph.protocol))
goto done;
if (p.iph.version != 4 ||
p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
@@ -1418,9 +1420,7 @@ static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[])
return 0;
proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
- if (proto != IPPROTO_IPV6 &&
- proto != IPPROTO_IPIP &&
- proto != 0)
+ if (!ipip6_valid_ip_proto(proto))
return -EINVAL;
return 0;
@@ -1762,6 +1762,14 @@ static struct xfrm_tunnel ipip_handler __read_mostly = {
.priority = 2,
};
+#if IS_ENABLED(CONFIG_MPLS)
+static struct xfrm_tunnel mplsip_handler __read_mostly = {
+ .handler = mplsip_rcv,
+ .err_handler = ipip6_err,
+ .priority = 2,
+};
+#endif
+
static void __net_exit sit_destroy_tunnels(struct net *net,
struct list_head *head)
{
@@ -1857,6 +1865,9 @@ static void __exit sit_cleanup(void)
rtnl_link_unregister(&sit_link_ops);
xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
+#if IS_ENABLED(CONFIG_MPLS)
+ xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS);
+#endif
unregister_pernet_device(&sit_net_ops);
rcu_barrier(); /* Wait for completion of call_rcu()'s */
@@ -1866,7 +1877,7 @@ static int __init sit_init(void)
{
int err;
- pr_info("IPv6 over IPv4 tunneling driver\n");
+ pr_info("IPv6, IPv4 and MPLS over IPv4 tunneling driver\n");
err = register_pernet_device(&sit_net_ops);
if (err < 0)
@@ -1881,6 +1892,13 @@ static int __init sit_init(void)
pr_info("%s: can't register ip4ip4\n", __func__);
goto xfrm_tunnel4_failed;
}
+#if IS_ENABLED(CONFIG_MPLS)
+ err = xfrm4_tunnel_register(&mplsip_handler, AF_MPLS);
+ if (err < 0) {
+ pr_info("%s: can't register mplsip\n", __func__);
+ goto xfrm_tunnel_mpls_failed;
+ }
+#endif
err = rtnl_link_register(&sit_link_ops);
if (err < 0)
goto rtnl_link_failed;
@@ -1889,6 +1907,10 @@ out:
return err;
rtnl_link_failed:
+#if IS_ENABLED(CONFIG_MPLS)
+ xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS);
+xfrm_tunnel_mpls_failed:
+#endif
xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
xfrm_tunnel4_failed:
xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index aab91fa86c5e..59c483937aec 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -155,11 +155,11 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
mss = __cookie_v6_check(ipv6_hdr(skb), th, cookie);
if (mss == 0) {
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);
goto out;
}
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV);
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV);
/* check for timestamp cookie support */
memset(&tcp_opt, 0, sizeof(tcp_opt));
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 45243bbe5253..69c50e737c54 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -15,6 +15,9 @@
#include <net/ipv6.h>
#include <net/addrconf.h>
#include <net/inet_frag.h>
+#ifdef CONFIG_NETLABEL
+#include <net/calipso.h>
+#endif
static int one = 1;
static int auto_flowlabels_min;
@@ -106,6 +109,22 @@ static struct ctl_table ipv6_rotable[] = {
.proc_handler = proc_dointvec_minmax,
.extra1 = &one
},
+#ifdef CONFIG_NETLABEL
+ {
+ .procname = "calipso_cache_enable",
+ .data = &calipso_cache_enabled,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "calipso_cache_bucket_size",
+ .data = &calipso_cache_bucketsize,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif /* CONFIG_NETLABEL */
{ }
};
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f443c6b0ce16..94f4f89d73e7 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -234,7 +234,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
fl6.fl6_dport = usin->sin6_port;
fl6.fl6_sport = inet->inet_sport;
- opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+ opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
final_p = fl6_update_dst(&fl6, opt, &final);
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
@@ -336,8 +336,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
skb->dev->ifindex);
if (!sk) {
- ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
- ICMP6_MIB_INERRORS);
+ __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
+ ICMP6_MIB_INERRORS);
return;
}
@@ -352,13 +352,13 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
bh_lock_sock(sk);
if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
- NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
+ __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
if (sk->sk_state == TCP_CLOSE)
goto out;
if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) {
- NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
+ __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
goto out;
}
@@ -368,7 +368,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
if (sk->sk_state != TCP_LISTEN &&
!between(seq, snd_una, tp->snd_nxt)) {
- NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
+ __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
goto out;
}
@@ -439,10 +439,11 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
struct flowi *fl,
struct request_sock *req,
struct tcp_fastopen_cookie *foc,
- bool attach_req)
+ enum tcp_synack_type synack_type)
{
struct inet_request_sock *ireq = inet_rsk(req);
struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_txoptions *opt;
struct flowi6 *fl6 = &fl->u.ip6;
struct sk_buff *skb;
int err = -ENOMEM;
@@ -452,7 +453,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
IPPROTO_TCP)) == NULL)
goto done;
- skb = tcp_make_synack(sk, dst, req, foc, attach_req);
+ skb = tcp_make_synack(sk, dst, req, foc, synack_type);
if (skb) {
__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
@@ -463,8 +464,10 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
rcu_read_lock();
- err = ip6_xmit(sk, skb, fl6, rcu_dereference(np->opt),
- np->tclass);
+ opt = ireq->ipv6_opt;
+ if (!opt)
+ opt = rcu_dereference(np->opt);
+ err = ip6_xmit(sk, skb, fl6, opt, np->tclass);
rcu_read_unlock();
err = net_xmit_eval(err);
}
@@ -476,6 +479,7 @@ done:
static void tcp_v6_reqsk_destructor(struct request_sock *req)
{
+ kfree(inet_rsk(req)->ipv6_opt);
kfree_skb(inet_rsk(req)->pktopts);
}
@@ -526,26 +530,33 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, char __user *optval,
AF_INET6, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
}
-static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
- const struct in6_addr *daddr,
- const struct in6_addr *saddr, int nbytes)
+static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr,
+ const struct tcphdr *th, int nbytes)
{
struct tcp6_pseudohdr *bp;
struct scatterlist sg;
+ struct tcphdr *_th;
- bp = &hp->md5_blk.ip6;
+ bp = hp->scratch;
/* 1. TCP pseudo-header (RFC2460) */
bp->saddr = *saddr;
bp->daddr = *daddr;
bp->protocol = cpu_to_be32(IPPROTO_TCP);
bp->len = cpu_to_be32(nbytes);
- sg_init_one(&sg, bp, sizeof(*bp));
- ahash_request_set_crypt(hp->md5_req, &sg, NULL, sizeof(*bp));
+ _th = (struct tcphdr *)(bp + 1);
+ memcpy(_th, th, sizeof(*th));
+ _th->check = 0;
+
+ sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
+ ahash_request_set_crypt(hp->md5_req, &sg, NULL,
+ sizeof(*bp) + sizeof(*th));
return crypto_ahash_update(hp->md5_req);
}
-static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
+static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
const struct in6_addr *daddr, struct in6_addr *saddr,
const struct tcphdr *th)
{
@@ -559,9 +570,7 @@ static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
if (crypto_ahash_init(req))
goto clear_hash;
- if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
- goto clear_hash;
- if (tcp_md5_hash_header(hp, th))
+ if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
goto clear_hash;
if (tcp_md5_hash_key(hp, key))
goto clear_hash;
@@ -606,9 +615,7 @@ static int tcp_v6_md5_hash_skb(char *md5_hash,
if (crypto_ahash_init(req))
goto clear_hash;
- if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
- goto clear_hash;
- if (tcp_md5_hash_header(hp, th))
+ if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
goto clear_hash;
if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
goto clear_hash;
@@ -649,12 +656,12 @@ static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
return false;
if (hash_expected && !hash_location) {
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
return true;
}
if (!hash_expected && hash_location) {
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
return true;
}
@@ -738,7 +745,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
u32 ack, u32 win, u32 tsval, u32 tsecr,
int oif, struct tcp_md5sig_key *key, int rst,
- u8 tclass, u32 label)
+ u8 tclass, __be32 label)
{
const struct tcphdr *th = tcp_hdr(skb);
struct tcphdr *t1;
@@ -830,9 +837,9 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
if (!IS_ERR(dst)) {
skb_dst_set(buff, dst);
ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass);
- TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
+ TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
if (rst)
- TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
+ TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
return;
}
@@ -863,6 +870,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
return;
#ifdef CONFIG_TCP_MD5SIG
+ rcu_read_lock();
hash_location = tcp_parse_md5sig_option(th);
if (sk && sk_fullsock(sk)) {
key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
@@ -880,16 +888,15 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
th->source, &ipv6h->daddr,
ntohs(th->source), tcp_v6_iif(skb));
if (!sk1)
- return;
+ goto out;
- rcu_read_lock();
key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
if (!key)
- goto release_sk1;
+ goto out;
genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
if (genhash || memcmp(hash_location, newhash, 16) != 0)
- goto release_sk1;
+ goto out;
}
#endif
@@ -903,18 +910,15 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
#ifdef CONFIG_TCP_MD5SIG
-release_sk1:
- if (sk1) {
- rcu_read_unlock();
- sock_put(sk1);
- }
+out:
+ rcu_read_unlock();
#endif
}
static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
struct tcp_md5sig_key *key, u8 tclass,
- u32 label)
+ __be32 label)
{
tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
tclass, label);
@@ -940,9 +944,15 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
* sk->sk_state == TCP_SYN_RECV -> for Fast Open.
*/
+ /* RFC 7323 2.3
+ * The window field (SEG.WND) of every outgoing segment, with the
+ * exception of <SYN> segments, MUST be right-shifted by
+ * Rcv.Wind.Shift bits:
+ */
tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
- tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd,
+ tcp_rsk(req)->rcv_nxt,
+ req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if,
tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
0, 0);
@@ -972,7 +982,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
&tcp_request_sock_ipv6_ops, sk, skb);
drop:
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
+ tcp_listendrop(sk);
return 0; /* don't send reset */
}
@@ -1112,7 +1122,9 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
but we make one more one thing there: reattach optmem
to newsk.
*/
- opt = rcu_dereference(np->opt);
+ opt = ireq->ipv6_opt;
+ if (!opt)
+ opt = rcu_dereference(np->opt);
if (opt) {
opt = ipv6_dup_options(newsk, opt);
RCU_INIT_POINTER(newnp->opt, opt);
@@ -1173,11 +1185,11 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
return newsk;
out_overflow:
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
out_nonewsk:
dst_release(dst);
out:
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
+ tcp_listendrop(sk);
return NULL;
}
@@ -1284,8 +1296,8 @@ discard:
kfree_skb(skb);
return 0;
csum_err:
- TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
- TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
+ TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
+ TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
goto discard;
@@ -1356,6 +1368,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
{
const struct tcphdr *th;
const struct ipv6hdr *hdr;
+ bool refcounted;
struct sock *sk;
int ret;
struct net *net = dev_net(skb->dev);
@@ -1366,14 +1379,14 @@ static int tcp_v6_rcv(struct sk_buff *skb)
/*
* Count it even if it's bad.
*/
- TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
+ __TCP_INC_STATS(net, TCP_MIB_INSEGS);
if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
goto discard_it;
- th = tcp_hdr(skb);
+ th = (const struct tcphdr *)skb->data;
- if (th->doff < sizeof(struct tcphdr)/4)
+ if (unlikely(th->doff < sizeof(struct tcphdr)/4))
goto bad_packet;
if (!pskb_may_pull(skb, th->doff*4))
goto discard_it;
@@ -1381,12 +1394,13 @@ static int tcp_v6_rcv(struct sk_buff *skb)
if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
goto csum_error;
- th = tcp_hdr(skb);
+ th = (const struct tcphdr *)skb->data;
hdr = ipv6_hdr(skb);
lookup:
sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
- th->source, th->dest, inet6_iif(skb));
+ th->source, th->dest, inet6_iif(skb),
+ &refcounted);
if (!sk)
goto no_tcp_socket;
@@ -1409,6 +1423,7 @@ process:
goto lookup;
}
sock_hold(sk);
+ refcounted = true;
nsk = tcp_check_req(sk, skb, req, false);
if (!nsk) {
reqsk_put(req);
@@ -1426,7 +1441,7 @@ process:
}
}
if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
- NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
+ __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
goto discard_and_relse;
}
@@ -1459,13 +1474,14 @@ process:
} else if (unlikely(sk_add_backlog(sk, skb,
sk->sk_rcvbuf + sk->sk_sndbuf))) {
bh_unlock_sock(sk);
- NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
+ __NET_INC_STATS(net, LINUX_MIB_TCPBACKLOGDROP);
goto discard_and_relse;
}
bh_unlock_sock(sk);
put_and_return:
- sock_put(sk);
+ if (refcounted)
+ sock_put(sk);
return ret ? -1 : 0;
no_tcp_socket:
@@ -1476,9 +1492,9 @@ no_tcp_socket:
if (tcp_checksum_complete(skb)) {
csum_error:
- TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
+ __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
bad_packet:
- TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
+ __TCP_INC_STATS(net, TCP_MIB_INERRS);
} else {
tcp_v6_send_reset(NULL, skb);
}
@@ -1488,7 +1504,9 @@ discard_it:
return 0;
discard_and_relse:
- sock_put(sk);
+ sk_drops_add(sk, skb);
+ if (refcounted)
+ sock_put(sk);
goto discard_it;
do_time_wait:
@@ -1519,6 +1537,7 @@ do_time_wait:
inet_twsk_deschedule_put(tw);
sk = sk2;
tcp_v6_restore_cb(skb);
+ refcounted = false;
goto process;
}
/* Fall through to ACK */
@@ -1717,7 +1736,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
destp = ntohs(inet->inet_dport);
srcp = ntohs(inet->inet_sport);
- if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
+ if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
+ icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
+ icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
timer_active = 1;
timer_expires = icsk->icsk_timeout;
} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 6bc5c664fa46..19ac3a1c308d 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -115,11 +115,10 @@ static void udp_v6_rehash(struct sock *sk)
udp_lib_rehash(sk, new_hash);
}
-static inline int compute_score(struct sock *sk, struct net *net,
- unsigned short hnum,
- const struct in6_addr *saddr, __be16 sport,
- const struct in6_addr *daddr, __be16 dport,
- int dif)
+static int compute_score(struct sock *sk, struct net *net,
+ const struct in6_addr *saddr, __be16 sport,
+ const struct in6_addr *daddr, unsigned short hnum,
+ int dif)
{
int score;
struct inet_sock *inet;
@@ -162,88 +161,36 @@ static inline int compute_score(struct sock *sk, struct net *net,
return score;
}
-static inline int compute_score2(struct sock *sk, struct net *net,
- const struct in6_addr *saddr, __be16 sport,
- const struct in6_addr *daddr,
- unsigned short hnum, int dif)
-{
- int score;
- struct inet_sock *inet;
-
- if (!net_eq(sock_net(sk), net) ||
- udp_sk(sk)->udp_port_hash != hnum ||
- sk->sk_family != PF_INET6)
- return -1;
-
- if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
- return -1;
-
- score = 0;
- inet = inet_sk(sk);
-
- if (inet->inet_dport) {
- if (inet->inet_dport != sport)
- return -1;
- score++;
- }
-
- if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
- if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr))
- return -1;
- score++;
- }
-
- if (sk->sk_bound_dev_if) {
- if (sk->sk_bound_dev_if != dif)
- return -1;
- score++;
- }
-
- if (sk->sk_incoming_cpu == raw_smp_processor_id())
- score++;
-
- return score;
-}
-
-/* called with read_rcu_lock() */
+/* called with rcu_read_lock() */
static struct sock *udp6_lib_lookup2(struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, unsigned int hnum, int dif,
- struct udp_hslot *hslot2, unsigned int slot2,
+ struct udp_hslot *hslot2,
struct sk_buff *skb)
{
struct sock *sk, *result;
- struct hlist_nulls_node *node;
int score, badness, matches = 0, reuseport = 0;
- bool select_ok = true;
u32 hash = 0;
-begin:
result = NULL;
badness = -1;
- udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) {
- score = compute_score2(sk, net, saddr, sport,
+ udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
+ score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif);
if (score > badness) {
- result = sk;
- badness = score;
reuseport = sk->sk_reuseport;
if (reuseport) {
hash = udp6_ehashfn(net, daddr, hnum,
saddr, sport);
- if (select_ok) {
- struct sock *sk2;
- sk2 = reuseport_select_sock(sk, hash, skb,
+ result = reuseport_select_sock(sk, hash, skb,
sizeof(struct udphdr));
- if (sk2) {
- result = sk2;
- select_ok = false;
- goto found;
- }
- }
+ if (result)
+ return result;
matches = 1;
}
+ result = sk;
+ badness = score;
} else if (score == badness && reuseport) {
matches++;
if (reciprocal_scale(hash, matches) == 0)
@@ -251,27 +198,10 @@ begin:
hash = next_pseudo_random32(hash);
}
}
- /*
- * if the nulls value we got at the end of this lookup is
- * not the expected one, we must restart lookup.
- * We probably met an item that was moved to another chain.
- */
- if (get_nulls_value(node) != slot2)
- goto begin;
-
- if (result) {
-found:
- if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
- result = NULL;
- else if (unlikely(compute_score2(result, net, saddr, sport,
- daddr, hnum, dif) < badness)) {
- sock_put(result);
- goto begin;
- }
- }
return result;
}
+/* rcu_read_lock() must be held */
struct sock *__udp6_lib_lookup(struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, __be16 dport,
@@ -279,15 +209,12 @@ struct sock *__udp6_lib_lookup(struct net *net,
struct sk_buff *skb)
{
struct sock *sk, *result;
- struct hlist_nulls_node *node;
unsigned short hnum = ntohs(dport);
unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
int score, badness, matches = 0, reuseport = 0;
- bool select_ok = true;
u32 hash = 0;
- rcu_read_lock();
if (hslot->count > 10) {
hash2 = udp6_portaddr_hash(net, daddr, hnum);
slot2 = hash2 & udptable->mask;
@@ -297,46 +224,43 @@ struct sock *__udp6_lib_lookup(struct net *net,
result = udp6_lib_lookup2(net, saddr, sport,
daddr, hnum, dif,
- hslot2, slot2, skb);
+ hslot2, skb);
if (!result) {
+ unsigned int old_slot2 = slot2;
hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum);
slot2 = hash2 & udptable->mask;
+ /* avoid searching the same slot again. */
+ if (unlikely(slot2 == old_slot2))
+ return result;
+
hslot2 = &udptable->hash2[slot2];
if (hslot->count < hslot2->count)
goto begin;
result = udp6_lib_lookup2(net, saddr, sport,
- &in6addr_any, hnum, dif,
- hslot2, slot2, skb);
+ daddr, hnum, dif,
+ hslot2, skb);
}
- rcu_read_unlock();
return result;
}
begin:
result = NULL;
badness = -1;
- sk_nulls_for_each_rcu(sk, node, &hslot->head) {
- score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif);
+ sk_for_each_rcu(sk, &hslot->head) {
+ score = compute_score(sk, net, saddr, sport, daddr, hnum, dif);
if (score > badness) {
- result = sk;
- badness = score;
reuseport = sk->sk_reuseport;
if (reuseport) {
hash = udp6_ehashfn(net, daddr, hnum,
saddr, sport);
- if (select_ok) {
- struct sock *sk2;
-
- sk2 = reuseport_select_sock(sk, hash, skb,
+ result = reuseport_select_sock(sk, hash, skb,
sizeof(struct udphdr));
- if (sk2) {
- result = sk2;
- select_ok = false;
- goto found;
- }
- }
+ if (result)
+ return result;
matches = 1;
}
+ result = sk;
+ badness = score;
} else if (score == badness && reuseport) {
matches++;
if (reciprocal_scale(hash, matches) == 0)
@@ -344,25 +268,6 @@ begin:
hash = next_pseudo_random32(hash);
}
}
- /*
- * if the nulls value we got at the end of this lookup is
- * not the expected one, we must restart lookup.
- * We probably met an item that was moved to another chain.
- */
- if (get_nulls_value(node) != slot)
- goto begin;
-
- if (result) {
-found:
- if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
- result = NULL;
- else if (unlikely(compute_score(result, net, hnum, saddr, sport,
- daddr, dport, dif) < badness)) {
- sock_put(result);
- goto begin;
- }
- }
- rcu_read_unlock();
return result;
}
EXPORT_SYMBOL_GPL(__udp6_lib_lookup);
@@ -371,23 +276,46 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
__be16 sport, __be16 dport,
struct udp_table *udptable)
{
- struct sock *sk;
const struct ipv6hdr *iph = ipv6_hdr(skb);
+ struct sock *sk;
sk = skb_steal_sock(skb);
if (unlikely(sk))
return sk;
- return __udp6_lib_lookup(dev_net(skb_dst(skb)->dev), &iph->saddr, sport,
+ return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport,
&iph->daddr, dport, inet6_iif(skb),
udptable, skb);
}
+struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
+ __be16 sport, __be16 dport)
+{
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
+
+ return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport,
+ &iph->daddr, dport, inet6_iif(skb),
+ &udp_table, skb);
+}
+EXPORT_SYMBOL_GPL(udp6_lib_lookup_skb);
+
+/* Must be called under rcu_read_lock().
+ * Does increment socket refcount.
+ */
+#if IS_ENABLED(CONFIG_NETFILTER_XT_MATCH_SOCKET) || \
+ IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TPROXY)
struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, __be16 dport, int dif)
{
- return __udp6_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table, NULL);
+ struct sock *sk;
+
+ sk = __udp6_lib_lookup(net, saddr, sport, daddr, dport,
+ dif, &udp_table, NULL);
+ if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+ sk = NULL;
+ return sk;
}
EXPORT_SYMBOL_GPL(udp6_lib_lookup);
+#endif
/*
* This should be easy, if there is something there we
@@ -401,7 +329,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
struct inet_sock *inet = inet_sk(sk);
struct sk_buff *skb;
unsigned int ulen, copied;
- int peeked, off = 0;
+ int peeked, peeking, off;
int err;
int is_udplite = IS_UDPLITE(sk);
bool checksum_valid = false;
@@ -415,15 +343,16 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
return ipv6_recv_rxpmtu(sk, msg, len, addr_len);
try_again:
+ peeking = off = sk_peek_offset(sk, flags);
skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
&peeked, &off, &err);
if (!skb)
- goto out;
+ return err;
- ulen = skb->len - sizeof(struct udphdr);
+ ulen = skb->len;
copied = len;
- if (copied > ulen)
- copied = ulen;
+ if (copied > ulen - off)
+ copied = ulen - off;
else if (copied < ulen)
msg->msg_flags |= MSG_TRUNC;
@@ -435,17 +364,16 @@ try_again:
* coverage checksum (UDP-Lite), do it before the copy.
*/
- if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
+ if (copied < ulen || UDP_SKB_CB(skb)->partial_cov || peeking) {
checksum_valid = !udp_lib_checksum_complete(skb);
if (!checksum_valid)
goto csum_copy_err;
}
if (checksum_valid || skb_csum_unnecessary(skb))
- err = skb_copy_datagram_msg(skb, sizeof(struct udphdr),
- msg, copied);
+ err = skb_copy_datagram_msg(skb, off, msg, copied);
else {
- err = skb_copy_and_csum_datagram_msg(skb, sizeof(struct udphdr), msg);
+ err = skb_copy_and_csum_datagram_msg(skb, off, msg);
if (err == -EINVAL)
goto csum_copy_err;
}
@@ -454,23 +382,22 @@ try_again:
if (!peeked) {
atomic_inc(&sk->sk_drops);
if (is_udp4)
- UDP_INC_STATS_USER(sock_net(sk),
- UDP_MIB_INERRORS,
- is_udplite);
+ UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS,
+ is_udplite);
else
- UDP6_INC_STATS_USER(sock_net(sk),
- UDP_MIB_INERRORS,
- is_udplite);
+ UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS,
+ is_udplite);
}
- goto out_free;
+ skb_free_datagram_locked(sk, skb);
+ return err;
}
if (!peeked) {
if (is_udp4)
- UDP_INC_STATS_USER(sock_net(sk),
- UDP_MIB_INDATAGRAMS, is_udplite);
+ UDP_INC_STATS(sock_net(sk), UDP_MIB_INDATAGRAMS,
+ is_udplite);
else
- UDP6_INC_STATS_USER(sock_net(sk),
- UDP_MIB_INDATAGRAMS, is_udplite);
+ UDP6_INC_STATS(sock_net(sk), UDP_MIB_INDATAGRAMS,
+ is_udplite);
}
sock_recv_ts_and_drops(msg, sk, skb);
@@ -510,24 +437,22 @@ try_again:
if (flags & MSG_TRUNC)
err = ulen;
-out_free:
- skb_free_datagram_locked(sk, skb);
-out:
+ __skb_free_datagram_locked(sk, skb, peeking ? -err : err);
return err;
csum_copy_err:
slow = lock_sock_fast(sk);
if (!skb_kill_datagram(sk, skb, flags)) {
if (is_udp4) {
- UDP_INC_STATS_USER(sock_net(sk),
- UDP_MIB_CSUMERRORS, is_udplite);
- UDP_INC_STATS_USER(sock_net(sk),
- UDP_MIB_INERRORS, is_udplite);
+ UDP_INC_STATS(sock_net(sk),
+ UDP_MIB_CSUMERRORS, is_udplite);
+ UDP_INC_STATS(sock_net(sk),
+ UDP_MIB_INERRORS, is_udplite);
} else {
- UDP6_INC_STATS_USER(sock_net(sk),
- UDP_MIB_CSUMERRORS, is_udplite);
- UDP6_INC_STATS_USER(sock_net(sk),
- UDP_MIB_INERRORS, is_udplite);
+ UDP6_INC_STATS(sock_net(sk),
+ UDP_MIB_CSUMERRORS, is_udplite);
+ UDP6_INC_STATS(sock_net(sk),
+ UDP_MIB_INERRORS, is_udplite);
}
}
unlock_sock_fast(sk, slow);
@@ -555,8 +480,8 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
inet6_iif(skb), udptable, skb);
if (!sk) {
- ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
- ICMP6_MIB_INERRORS);
+ __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
+ ICMP6_MIB_INERRORS);
return;
}
@@ -585,7 +510,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
sk->sk_err = err;
sk->sk_error_report(sk);
out:
- sock_put(sk);
+ return;
}
static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
@@ -598,15 +523,15 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
sk_incoming_cpu_update(sk);
}
- rc = sock_queue_rcv_skb(sk, skb);
+ rc = __sock_queue_rcv_skb(sk, skb);
if (rc < 0) {
int is_udplite = IS_UDPLITE(sk);
/* Note that an ENOMEM error is charged twice */
if (rc == -ENOMEM)
- UDP6_INC_STATS_BH(sock_net(sk),
- UDP_MIB_RCVBUFERRORS, is_udplite);
- UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
+ UDP6_INC_STATS(sock_net(sk),
+ UDP_MIB_RCVBUFERRORS, is_udplite);
+ UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
kfree_skb(skb);
return -1;
}
@@ -653,7 +578,7 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
/* if we're overly short, let UDP handle it */
encap_rcv = ACCESS_ONCE(up->encap_rcv);
- if (skb->len > sizeof(struct udphdr) && encap_rcv) {
+ if (encap_rcv) {
int ret;
/* Verify checksum before giving to encap */
@@ -662,9 +587,9 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
ret = encap_rcv(sk, skb);
if (ret <= 0) {
- UDP_INC_STATS_BH(sock_net(sk),
- UDP_MIB_INDATAGRAMS,
- is_udplite);
+ __UDP_INC_STATS(sock_net(sk),
+ UDP_MIB_INDATAGRAMS,
+ is_udplite);
return -ret;
}
}
@@ -689,14 +614,17 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
}
}
- if (rcu_access_pointer(sk->sk_filter)) {
- if (udp_lib_checksum_complete(skb))
- goto csum_error;
- }
+ if (rcu_access_pointer(sk->sk_filter) &&
+ udp_lib_checksum_complete(skb))
+ goto csum_error;
+
+ if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr)))
+ goto drop;
+ udp_csum_pull_header(skb);
if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
- UDP6_INC_STATS_BH(sock_net(sk),
- UDP_MIB_RCVBUFERRORS, is_udplite);
+ __UDP6_INC_STATS(sock_net(sk),
+ UDP_MIB_RCVBUFERRORS, is_udplite);
goto drop;
}
@@ -715,9 +643,9 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
return rc;
csum_error:
- UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
+ __UDP6_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
drop:
- UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
+ __UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
atomic_inc(&sk->sk_drops);
kfree_skb(skb);
return -1;
@@ -747,33 +675,6 @@ static bool __udp_v6_is_mcast_sock(struct net *net, struct sock *sk,
return true;
}
-static void flush_stack(struct sock **stack, unsigned int count,
- struct sk_buff *skb, unsigned int final)
-{
- struct sk_buff *skb1 = NULL;
- struct sock *sk;
- unsigned int i;
-
- for (i = 0; i < count; i++) {
- sk = stack[i];
- if (likely(!skb1))
- skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC);
- if (!skb1) {
- atomic_inc(&sk->sk_drops);
- UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS,
- IS_UDPLITE(sk));
- UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS,
- IS_UDPLITE(sk));
- }
-
- if (skb1 && udpv6_queue_rcv_skb(sk, skb1) <= 0)
- skb1 = NULL;
- sock_put(sk);
- }
- if (unlikely(skb1))
- kfree_skb(skb1);
-}
-
static void udp6_csum_zero_error(struct sk_buff *skb)
{
/* RFC 2460 section 8.1 says that we SHOULD log
@@ -792,15 +693,15 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
const struct in6_addr *saddr, const struct in6_addr *daddr,
struct udp_table *udptable, int proto)
{
- struct sock *sk, *stack[256 / sizeof(struct sock *)];
+ struct sock *sk, *first = NULL;
const struct udphdr *uh = udp_hdr(skb);
- struct hlist_nulls_node *node;
unsigned short hnum = ntohs(uh->dest);
struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum);
- int dif = inet6_iif(skb);
- unsigned int count = 0, offset = offsetof(typeof(*sk), sk_nulls_node);
+ unsigned int offset = offsetof(typeof(*sk), sk_node);
unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10);
- bool inner_flushed = false;
+ int dif = inet6_iif(skb);
+ struct hlist_node *node;
+ struct sk_buff *nskb;
if (use_hash2) {
hash2_any = udp6_portaddr_hash(net, &in6addr_any, hnum) &
@@ -811,27 +712,32 @@ start_lookup:
offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
}
- spin_lock(&hslot->lock);
- sk_nulls_for_each_entry_offset(sk, node, &hslot->head, offset) {
- if (__udp_v6_is_mcast_sock(net, sk,
- uh->dest, daddr,
- uh->source, saddr,
- dif, hnum) &&
- /* If zero checksum and no_check is not on for
- * the socket then skip it.
- */
- (uh->check || udp_sk(sk)->no_check6_rx)) {
- if (unlikely(count == ARRAY_SIZE(stack))) {
- flush_stack(stack, count, skb, ~0);
- inner_flushed = true;
- count = 0;
- }
- stack[count++] = sk;
- sock_hold(sk);
+ sk_for_each_entry_offset_rcu(sk, node, &hslot->head, offset) {
+ if (!__udp_v6_is_mcast_sock(net, sk, uh->dest, daddr,
+ uh->source, saddr, dif, hnum))
+ continue;
+ /* If zero checksum and no_check is not on for
+ * the socket then skip it.
+ */
+ if (!uh->check && !udp_sk(sk)->no_check6_rx)
+ continue;
+ if (!first) {
+ first = sk;
+ continue;
+ }
+ nskb = skb_clone(skb, GFP_ATOMIC);
+ if (unlikely(!nskb)) {
+ atomic_inc(&sk->sk_drops);
+ __UDP6_INC_STATS(net, UDP_MIB_RCVBUFERRORS,
+ IS_UDPLITE(sk));
+ __UDP6_INC_STATS(net, UDP_MIB_INERRORS,
+ IS_UDPLITE(sk));
+ continue;
}
- }
- spin_unlock(&hslot->lock);
+ if (udpv6_queue_rcv_skb(sk, nskb) > 0)
+ consume_skb(nskb);
+ }
/* Also lookup *:port if we are using hash2 and haven't done so yet. */
if (use_hash2 && hash2 != hash2_any) {
@@ -839,13 +745,13 @@ start_lookup:
goto start_lookup;
}
- if (count) {
- flush_stack(stack, count, skb, count - 1);
+ if (first) {
+ if (udpv6_queue_rcv_skb(first, skb) > 0)
+ consume_skb(skb);
} else {
- if (!inner_flushed)
- UDP6_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI,
- proto == IPPROTO_UDPLITE);
- consume_skb(skb);
+ kfree_skb(skb);
+ __UDP6_INC_STATS(net, UDP_MIB_IGNOREDMULTI,
+ proto == IPPROTO_UDPLITE);
}
return 0;
}
@@ -853,10 +759,10 @@ start_lookup:
int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
int proto)
{
+ const struct in6_addr *saddr, *daddr;
struct net *net = dev_net(skb->dev);
- struct sock *sk;
struct udphdr *uh;
- const struct in6_addr *saddr, *daddr;
+ struct sock *sk;
u32 ulen = 0;
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
@@ -910,7 +816,6 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
int ret;
if (!uh->check && !udp_sk(sk)->no_check6_rx) {
- sock_put(sk);
udp6_csum_zero_error(skb);
goto csum_error;
}
@@ -920,7 +825,6 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
ip6_compute_pseudo);
ret = udpv6_queue_rcv_skb(sk, skb);
- sock_put(sk);
/* a return value > 0 means to resubmit the input */
if (ret > 0)
@@ -940,7 +844,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
if (udp_lib_checksum_complete(skb))
goto csum_error;
- UDP6_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
+ __UDP6_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
kfree_skb(skb);
@@ -954,9 +858,9 @@ short_packet:
daddr, ntohs(uh->dest));
goto discard;
csum_error:
- UDP6_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
+ __UDP6_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
discard:
- UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
+ __UDP6_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
kfree_skb(skb);
return 0;
}
@@ -1068,13 +972,14 @@ send:
err = ip6_send_skb(skb);
if (err) {
if (err == -ENOBUFS && !inet6_sk(sk)->recverr) {
- UDP6_INC_STATS_USER(sock_net(sk),
- UDP_MIB_SNDBUFERRORS, is_udplite);
+ UDP6_INC_STATS(sock_net(sk),
+ UDP_MIB_SNDBUFERRORS, is_udplite);
err = 0;
}
- } else
- UDP6_INC_STATS_USER(sock_net(sk),
- UDP_MIB_OUTDATAGRAMS, is_udplite);
+ } else {
+ UDP6_INC_STATS(sock_net(sk),
+ UDP_MIB_OUTDATAGRAMS, is_udplite);
+ }
return err;
}
@@ -1118,16 +1023,19 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
struct ip6_flowlabel *flowlabel = NULL;
struct flowi6 fl6;
struct dst_entry *dst;
+ struct ipcm6_cookie ipc6;
int addr_len = msg->msg_namelen;
int ulen = len;
- int hlimit = -1;
- int tclass = -1;
- int dontfrag = -1;
int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
int err;
int connected = 0;
int is_udplite = IS_UDPLITE(sk);
int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
+ struct sockcm_cookie sockc;
+
+ ipc6.hlimit = -1;
+ ipc6.tclass = -1;
+ ipc6.dontfrag = -1;
/* destination address check */
if (sin6) {
@@ -1247,14 +1155,15 @@ do_udp_sendmsg:
fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
fl6.flowi6_mark = sk->sk_mark;
+ sockc.tsflags = sk->sk_tsflags;
if (msg->msg_controllen) {
opt = &opt_space;
memset(opt, 0, sizeof(struct ipv6_txoptions));
opt->tot_len = sizeof(*opt);
+ ipc6.opt = opt;
- err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt,
- &hlimit, &tclass, &dontfrag);
+ err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6, &sockc);
if (err < 0) {
fl6_sock_release(flowlabel);
return err;
@@ -1275,6 +1184,7 @@ do_udp_sendmsg:
if (flowlabel)
opt = fl6_merge_options(&opt_space, flowlabel, opt);
opt = ipv6_fixup_options(&opt_space, opt);
+ ipc6.opt = opt;
fl6.flowi6_proto = sk->sk_protocol;
if (!ipv6_addr_any(daddr))
@@ -1297,6 +1207,11 @@ do_udp_sendmsg:
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+ if (ipc6.tclass < 0)
+ ipc6.tclass = np->tclass;
+
+ fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
+
dst = ip6_sk_dst_lookup_flow(sk, &fl6, final_p);
if (IS_ERR(dst)) {
err = PTR_ERR(dst);
@@ -1304,11 +1219,8 @@ do_udp_sendmsg:
goto out;
}
- if (hlimit < 0)
- hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
-
- if (tclass < 0)
- tclass = np->tclass;
+ if (ipc6.hlimit < 0)
+ ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
if (msg->msg_flags&MSG_CONFIRM)
goto do_confirm;
@@ -1319,9 +1231,9 @@ back_from_confirm:
struct sk_buff *skb;
skb = ip6_make_skb(sk, getfrag, msg, ulen,
- sizeof(struct udphdr), hlimit, tclass, opt,
+ sizeof(struct udphdr), &ipc6,
&fl6, (struct rt6_info *)dst,
- msg->msg_flags, dontfrag);
+ msg->msg_flags, &sockc);
err = PTR_ERR(skb);
if (!IS_ERR_OR_NULL(skb))
err = udp_v6_send_skb(skb, &fl6);
@@ -1342,13 +1254,12 @@ back_from_confirm:
up->pending = AF_INET6;
do_append_data:
- if (dontfrag < 0)
- dontfrag = np->dontfrag;
+ if (ipc6.dontfrag < 0)
+ ipc6.dontfrag = np->dontfrag;
up->len += ulen;
- err = ip6_append_data(sk, getfrag, msg, ulen,
- sizeof(struct udphdr), hlimit, tclass, opt, &fl6,
- (struct rt6_info *)dst,
- corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag);
+ err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr),
+ &ipc6, &fl6, (struct rt6_info *)dst,
+ corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, &sockc);
if (err)
udp_v6_flush_pending_frames(sk);
else if (!corkreq)
@@ -1391,8 +1302,8 @@ out:
* seems like overkill.
*/
if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
- UDP6_INC_STATS_USER(sock_net(sk),
- UDP_MIB_SNDBUFERRORS, is_udplite);
+ UDP6_INC_STATS(sock_net(sk),
+ UDP_MIB_SNDBUFERRORS, is_udplite);
}
return err;
@@ -1549,7 +1460,6 @@ struct proto udpv6_prot = {
.sysctl_wmem = &sysctl_udp_wmem_min,
.sysctl_rmem = &sysctl_udp_rmem_min,
.obj_size = sizeof(struct udp6_sock),
- .slab_flags = SLAB_DESTROY_BY_RCU,
.h.udp_table = &udp_table,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_udpv6_setsockopt,
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 2b0fbe6929e8..ac858c480f2f 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -36,19 +36,6 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
/* Packet is from an untrusted source, reset gso_segs. */
- int type = skb_shinfo(skb)->gso_type;
-
- if (unlikely(type & ~(SKB_GSO_UDP |
- SKB_GSO_DODGY |
- SKB_GSO_UDP_TUNNEL |
- SKB_GSO_UDP_TUNNEL_CSUM |
- SKB_GSO_TUNNEL_REMCSUM |
- SKB_GSO_GRE |
- SKB_GSO_GRE_CSUM |
- SKB_GSO_IPIP |
- SKB_GSO_SIT) ||
- !(type & (SKB_GSO_UDP))))
- goto out;
skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
@@ -153,7 +140,7 @@ static struct sk_buff **udp6_gro_receive(struct sk_buff **head,
skip:
NAPI_GRO_CB(skb)->is_ipv6 = 1;
- return udp_gro_receive(head, skb, uh);
+ return udp_gro_receive(head, skb, uh, udp6_lib_lookup_skb);
flush:
NAPI_GRO_CB(skb)->flush = 1;
@@ -173,7 +160,7 @@ static int udp6_gro_complete(struct sk_buff *skb, int nhoff)
skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
}
- return udp_gro_complete(skb, nhoff);
+ return udp_gro_complete(skb, nhoff, udp6_lib_lookup_skb);
}
static const struct net_offload udpv6_offload = {
@@ -184,7 +171,12 @@ static const struct net_offload udpv6_offload = {
},
};
-int __init udp_offload_init(void)
+int udpv6_offload_init(void)
{
return inet6_add_offload(&udpv6_offload, IPPROTO_UDP);
}
+
+int udpv6_offload_exit(void)
+{
+ return inet6_del_offload(&udpv6_offload, IPPROTO_UDP);
+}
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 9cf097e206e9..fd6ef414899b 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -50,7 +50,6 @@ struct proto udplitev6_prot = {
.unhash = udp_lib_unhash,
.get_port = udp_v6_get_port,
.obj_size = sizeof(struct udp6_sock),
- .slab_flags = SLAB_DESTROY_BY_RCU,
.h.udp_table = &udplite_table,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_udpv6_setsockopt,
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 0eaab1fa6be5..b5789562aded 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -21,8 +21,10 @@ int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb)
return xfrm6_extract_header(skb);
}
-int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi)
+int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
+ struct ip6_tnl *t)
{
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t;
XFRM_SPI_SKB_CB(skb)->family = AF_INET6;
XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr);
return xfrm_input(skb, nexthdr, spi, 0);
@@ -48,13 +50,18 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
return -1;
}
-int xfrm6_rcv(struct sk_buff *skb)
+int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t)
{
return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff],
- 0);
+ 0, t);
}
-EXPORT_SYMBOL(xfrm6_rcv);
+EXPORT_SYMBOL(xfrm6_rcv_tnl);
+int xfrm6_rcv(struct sk_buff *skb)
+{
+ return xfrm6_rcv_tnl(skb, NULL);
+}
+EXPORT_SYMBOL(xfrm6_rcv);
int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
xfrm_address_t *saddr, u8 proto)
{
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index c074771a10f7..70a86adad875 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -36,7 +36,7 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif,
int err;
memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_oif = oif;
+ fl6.flowi6_oif = l3mdev_master_ifindex_by_index(net, oif);
fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF;
memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr));
if (saddr)
@@ -366,12 +366,12 @@ static void __net_exit xfrm6_net_sysctl_exit(struct net *net)
kfree(table);
}
#else /* CONFIG_SYSCTL */
-static int inline xfrm6_net_sysctl_init(struct net *net)
+static inline int xfrm6_net_sysctl_init(struct net *net)
{
return 0;
}
-static void inline xfrm6_net_sysctl_exit(struct net *net)
+static inline void xfrm6_net_sysctl_exit(struct net *net)
{
}
#endif
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 5743044cd660..e1c0bbe7996c 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -236,7 +236,7 @@ static int xfrm6_tunnel_rcv(struct sk_buff *skb)
__be32 spi;
spi = xfrm6_tunnel_spi_lookup(net, (const xfrm_address_t *)&iph->saddr);
- return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi);
+ return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi, NULL);
}
static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt,