summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/atm/lec.c4
-rw-r--r--net/batman-adv/bat_v.c12
-rw-r--r--net/batman-adv/distributed-arp-table.c17
-rw-r--r--net/batman-adv/hard-interface.c6
-rw-r--r--net/batman-adv/originator.c17
-rw-r--r--net/batman-adv/routing.c9
-rw-r--r--net/batman-adv/send.c6
-rw-r--r--net/batman-adv/soft-interface.c10
-rw-r--r--net/batman-adv/translation-table.c42
-rw-r--r--net/batman-adv/types.h7
-rw-r--r--net/bluetooth/bnep/netdev.c2
-rw-r--r--net/ceph/auth.c8
-rw-r--r--net/ceph/auth_none.c71
-rw-r--r--net/ceph/auth_none.h3
-rw-r--r--net/ceph/auth_x.c21
-rw-r--r--net/ceph/auth_x.h1
-rw-r--r--net/ceph/osd_client.c6
-rw-r--r--net/core/dev.c6
-rw-r--r--net/core/skbuff.c11
-rw-r--r--net/core/sock.c24
-rw-r--r--net/ipv4/gre_demux.c11
-rw-r--r--net/ipv4/inet_hashtables.c2
-rw-r--r--net/ipv4/ip_gre.c76
-rw-r--r--net/ipv4/ip_tunnel.c4
-rw-r--r--net/ipv4/tcp.c7
-rw-r--r--net/ipv4/tcp_output.c2
-rw-r--r--net/ipv6/Kconfig1
-rw-r--r--net/ipv6/datagram.c13
-rw-r--r--net/ipv6/icmp.c28
-rw-r--r--net/ipv6/ila/ila_lwt.c3
-rw-r--r--net/ipv6/ip6_flowlabel.c6
-rw-r--r--net/ipv6/ip6_gre.c3
-rw-r--r--net/ipv6/ip6_output.c42
-rw-r--r--net/ipv6/ipv6_sockglue.c6
-rw-r--r--net/ipv6/ping.c12
-rw-r--r--net/ipv6/raw.c33
-rw-r--r--net/ipv6/udp.c38
-rw-r--r--net/irda/irlan/irlan_eth.c2
-rw-r--r--net/l2tp/l2tp_core.c4
-rw-r--r--net/l2tp/l2tp_ip6.c33
-rw-r--r--net/mac80211/iface.c4
-rw-r--r--net/rds/tcp.c3
-rw-r--r--net/rds/tcp.h4
-rw-r--r--net/rds/tcp_connect.c8
-rw-r--r--net/rds/tcp_listen.c54
-rw-r--r--net/sched/sch_generic.c12
-rw-r--r--net/sched/sch_netem.c61
-rw-r--r--net/tipc/core.c8
-rw-r--r--net/tipc/msg.h14
-rw-r--r--net/tipc/node.c26
-rw-r--r--net/tipc/node.h6
-rw-r--r--net/tipc/socket.c144
-rw-r--r--net/tipc/socket.h17
53 files changed, 606 insertions, 364 deletions
diff --git a/net/atm/lec.c b/net/atm/lec.c
index cd3b37989057..e574a7e9db6f 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -194,7 +194,7 @@ lec_send(struct atm_vcc *vcc, struct sk_buff *skb)
static void lec_tx_timeout(struct net_device *dev)
{
pr_info("%s\n", dev->name);
- dev->trans_start = jiffies;
+ netif_trans_update(dev);
netif_wake_queue(dev);
}
@@ -324,7 +324,7 @@ static netdev_tx_t lec_start_xmit(struct sk_buff *skb,
out:
if (entry)
lec_arp_put(entry);
- dev->trans_start = jiffies;
+ netif_trans_update(dev);
return NETDEV_TX_OK;
}
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index 246f9e959849..3ff8bd1b7bdc 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -32,10 +32,21 @@
#include "bat_v_elp.h"
#include "bat_v_ogm.h"
+#include "hard-interface.h"
#include "hash.h"
#include "originator.h"
#include "packet.h"
+static void batadv_v_iface_activate(struct batadv_hard_iface *hard_iface)
+{
+ /* B.A.T.M.A.N. V does not use any queuing mechanism, therefore it can
+ * set the interface as ACTIVE right away, without any risk of race
+ * condition
+ */
+ if (hard_iface->if_status == BATADV_IF_TO_BE_ACTIVATED)
+ hard_iface->if_status = BATADV_IF_ACTIVE;
+}
+
static int batadv_v_iface_enable(struct batadv_hard_iface *hard_iface)
{
int ret;
@@ -273,6 +284,7 @@ static bool batadv_v_neigh_is_sob(struct batadv_neigh_node *neigh1,
static struct batadv_algo_ops batadv_batman_v __read_mostly = {
.name = "BATMAN_V",
+ .bat_iface_activate = batadv_v_iface_activate,
.bat_iface_enable = batadv_v_iface_enable,
.bat_iface_disable = batadv_v_iface_disable,
.bat_iface_update_mac = batadv_v_iface_update_mac,
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index f0548b4f66f4..67f44f5d630b 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -568,6 +568,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv,
* be sent to
* @bat_priv: the bat priv with all the soft interface information
* @ip_dst: ipv4 to look up in the DHT
+ * @vid: VLAN identifier
*
* An originator O is selected if and only if its DHT_ID value is one of three
* closest values (from the LEFT, with wrap around if needed) then the hash
@@ -576,7 +577,8 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv,
* Return: the candidate array of size BATADV_DAT_CANDIDATE_NUM.
*/
static struct batadv_dat_candidate *
-batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst)
+batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst,
+ unsigned short vid)
{
int select;
batadv_dat_addr_t last_max = BATADV_DAT_ADDR_MAX, ip_key;
@@ -592,7 +594,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst)
return NULL;
dat.ip = ip_dst;
- dat.vid = 0;
+ dat.vid = vid;
ip_key = (batadv_dat_addr_t)batadv_hash_dat(&dat,
BATADV_DAT_ADDR_MAX);
@@ -612,6 +614,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst)
* @bat_priv: the bat priv with all the soft interface information
* @skb: payload to send
* @ip: the DHT key
+ * @vid: VLAN identifier
* @packet_subtype: unicast4addr packet subtype to use
*
* This function copies the skb with pskb_copy() and is sent as unicast packet
@@ -622,7 +625,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst)
*/
static bool batadv_dat_send_data(struct batadv_priv *bat_priv,
struct sk_buff *skb, __be32 ip,
- int packet_subtype)
+ unsigned short vid, int packet_subtype)
{
int i;
bool ret = false;
@@ -631,7 +634,7 @@ static bool batadv_dat_send_data(struct batadv_priv *bat_priv,
struct sk_buff *tmp_skb;
struct batadv_dat_candidate *cand;
- cand = batadv_dat_select_candidates(bat_priv, ip);
+ cand = batadv_dat_select_candidates(bat_priv, ip, vid);
if (!cand)
goto out;
@@ -1022,7 +1025,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
ret = true;
} else {
/* Send the request to the DHT */
- ret = batadv_dat_send_data(bat_priv, skb, ip_dst,
+ ret = batadv_dat_send_data(bat_priv, skb, ip_dst, vid,
BATADV_P_DAT_DHT_GET);
}
out:
@@ -1150,8 +1153,8 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv,
/* Send the ARP reply to the candidates for both the IP addresses that
* the node obtained from the ARP reply
*/
- batadv_dat_send_data(bat_priv, skb, ip_src, BATADV_P_DAT_DHT_PUT);
- batadv_dat_send_data(bat_priv, skb, ip_dst, BATADV_P_DAT_DHT_PUT);
+ batadv_dat_send_data(bat_priv, skb, ip_src, vid, BATADV_P_DAT_DHT_PUT);
+ batadv_dat_send_data(bat_priv, skb, ip_dst, vid, BATADV_P_DAT_DHT_PUT);
}
/**
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index b22b2775a0a5..0a7deaf2670a 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -407,6 +407,9 @@ batadv_hardif_activate_interface(struct batadv_hard_iface *hard_iface)
batadv_update_min_mtu(hard_iface->soft_iface);
+ if (bat_priv->bat_algo_ops->bat_iface_activate)
+ bat_priv->bat_algo_ops->bat_iface_activate(hard_iface);
+
out:
if (primary_if)
batadv_hardif_put(primary_if);
@@ -572,8 +575,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
struct batadv_hard_iface *primary_if = NULL;
- if (hard_iface->if_status == BATADV_IF_ACTIVE)
- batadv_hardif_deactivate_interface(hard_iface);
+ batadv_hardif_deactivate_interface(hard_iface);
if (hard_iface->if_status != BATADV_IF_INACTIVE)
goto out;
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 155c1dd36c17..f885a41d06d5 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -250,7 +250,6 @@ static void batadv_neigh_node_release(struct kref *ref)
{
struct hlist_node *node_tmp;
struct batadv_neigh_node *neigh_node;
- struct batadv_hardif_neigh_node *hardif_neigh;
struct batadv_neigh_ifinfo *neigh_ifinfo;
struct batadv_algo_ops *bao;
@@ -262,13 +261,7 @@ static void batadv_neigh_node_release(struct kref *ref)
batadv_neigh_ifinfo_put(neigh_ifinfo);
}
- hardif_neigh = batadv_hardif_neigh_get(neigh_node->if_incoming,
- neigh_node->addr);
- if (hardif_neigh) {
- /* batadv_hardif_neigh_get() increases refcount too */
- batadv_hardif_neigh_put(hardif_neigh);
- batadv_hardif_neigh_put(hardif_neigh);
- }
+ batadv_hardif_neigh_put(neigh_node->hardif_neigh);
if (bao->bat_neigh_free)
bao->bat_neigh_free(neigh_node);
@@ -663,6 +656,11 @@ batadv_neigh_node_new(struct batadv_orig_node *orig_node,
ether_addr_copy(neigh_node->addr, neigh_addr);
neigh_node->if_incoming = hard_iface;
neigh_node->orig_node = orig_node;
+ neigh_node->last_seen = jiffies;
+
+ /* increment unique neighbor refcount */
+ kref_get(&hardif_neigh->refcount);
+ neigh_node->hardif_neigh = hardif_neigh;
/* extra reference for return */
kref_init(&neigh_node->refcount);
@@ -672,9 +670,6 @@ batadv_neigh_node_new(struct batadv_orig_node *orig_node,
hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list);
spin_unlock_bh(&orig_node->neigh_list_lock);
- /* increment unique neighbor refcount */
- kref_get(&hardif_neigh->refcount);
-
batadv_dbg(BATADV_DBG_BATMAN, orig_node->bat_priv,
"Creating new neighbor %pM for orig_node %pM on interface %s\n",
neigh_addr, orig_node->orig, hard_iface->net_dev->name);
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 4dd646a52f1a..b781bf753250 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -105,6 +105,15 @@ static void _batadv_update_route(struct batadv_priv *bat_priv,
neigh_node = NULL;
spin_lock_bh(&orig_node->neigh_list_lock);
+ /* curr_router used earlier may not be the current orig_ifinfo->router
+ * anymore because it was dereferenced outside of the neigh_list_lock
+ * protected region. After the new best neighbor has replace the current
+ * best neighbor the reference counter needs to decrease. Consequently,
+ * the code needs to ensure the curr_router variable contains a pointer
+ * to the replaced best neighbor.
+ */
+ curr_router = rcu_dereference_protected(orig_ifinfo->router, true);
+
rcu_assign_pointer(orig_ifinfo->router, neigh_node);
spin_unlock_bh(&orig_node->neigh_list_lock);
batadv_orig_ifinfo_put(orig_ifinfo);
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 20076b4c5e1d..99ea9001cf8a 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -675,6 +675,9 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
if (pending) {
hlist_del(&forw_packet->list);
+ if (!forw_packet->own)
+ atomic_inc(&bat_priv->bcast_queue_left);
+
batadv_forw_packet_free(forw_packet);
}
}
@@ -702,6 +705,9 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
if (pending) {
hlist_del(&forw_packet->list);
+ if (!forw_packet->own)
+ atomic_inc(&bat_priv->batman_queue_left);
+
batadv_forw_packet_free(forw_packet);
}
}
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index d78c560852d7..dfb4d56120b6 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -208,7 +208,7 @@ static int batadv_interface_tx(struct sk_buff *skb,
if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE)
goto dropped;
- soft_iface->trans_start = jiffies;
+ netif_trans_update(soft_iface);
vid = batadv_get_vid(skb, 0);
ethhdr = eth_hdr(skb);
@@ -426,11 +426,17 @@ void batadv_interface_rx(struct net_device *soft_iface,
*/
nf_reset(skb);
+ if (unlikely(!pskb_may_pull(skb, ETH_HLEN)))
+ goto dropped;
+
vid = batadv_get_vid(skb, 0);
ethhdr = eth_hdr(skb);
switch (ntohs(ethhdr->h_proto)) {
case ETH_P_8021Q:
+ if (!pskb_may_pull(skb, VLAN_ETH_HLEN))
+ goto dropped;
+
vhdr = (struct vlan_ethhdr *)skb->data;
if (vhdr->h_vlan_encapsulated_proto != ethertype)
@@ -442,8 +448,6 @@ void batadv_interface_rx(struct net_device *soft_iface,
}
/* skb->dev & skb->pkt_type are set here */
- if (unlikely(!pskb_may_pull(skb, ETH_HLEN)))
- goto dropped;
skb->protocol = eth_type_trans(skb, soft_iface);
/* should not be necessary anymore as we use skb_pull_rcsum()
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index d44ce84626c5..942b3aa00bed 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -215,6 +215,8 @@ static void batadv_tt_local_entry_release(struct kref *ref)
tt_local_entry = container_of(ref, struct batadv_tt_local_entry,
common.refcount);
+ batadv_softif_vlan_put(tt_local_entry->vlan);
+
kfree_rcu(tt_local_entry, common.rcu);
}
@@ -673,6 +675,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
kref_get(&tt_local->common.refcount);
tt_local->last_seen = jiffies;
tt_local->common.added_at = tt_local->last_seen;
+ tt_local->vlan = vlan;
/* the batman interface mac and multicast addresses should never be
* purged
@@ -991,7 +994,6 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
struct batadv_tt_common_entry *tt_common_entry;
struct batadv_tt_local_entry *tt_local;
struct batadv_hard_iface *primary_if;
- struct batadv_softif_vlan *vlan;
struct hlist_head *head;
unsigned short vid;
u32 i;
@@ -1027,14 +1029,6 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
last_seen_msecs = last_seen_msecs % 1000;
no_purge = tt_common_entry->flags & np_flag;
-
- vlan = batadv_softif_vlan_get(bat_priv, vid);
- if (!vlan) {
- seq_printf(seq, "Cannot retrieve VLAN %d\n",
- BATADV_PRINT_VID(vid));
- continue;
- }
-
seq_printf(seq,
" * %pM %4i [%c%c%c%c%c%c] %3u.%03u (%#.8x)\n",
tt_common_entry->addr,
@@ -1052,9 +1046,7 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
BATADV_TT_CLIENT_ISOLA) ? 'I' : '.'),
no_purge ? 0 : last_seen_secs,
no_purge ? 0 : last_seen_msecs,
- vlan->tt.crc);
-
- batadv_softif_vlan_put(vlan);
+ tt_local->vlan->tt.crc);
}
rcu_read_unlock();
}
@@ -1099,7 +1091,6 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr,
{
struct batadv_tt_local_entry *tt_local_entry;
u16 flags, curr_flags = BATADV_NO_FLAGS;
- struct batadv_softif_vlan *vlan;
void *tt_entry_exists;
tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr, vid);
@@ -1139,14 +1130,6 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr,
/* extra call to free the local tt entry */
batadv_tt_local_entry_put(tt_local_entry);
- /* decrease the reference held for this vlan */
- vlan = batadv_softif_vlan_get(bat_priv, vid);
- if (!vlan)
- goto out;
-
- batadv_softif_vlan_put(vlan);
- batadv_softif_vlan_put(vlan);
-
out:
if (tt_local_entry)
batadv_tt_local_entry_put(tt_local_entry);
@@ -1219,7 +1202,6 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv)
spinlock_t *list_lock; /* protects write access to the hash lists */
struct batadv_tt_common_entry *tt_common_entry;
struct batadv_tt_local_entry *tt_local;
- struct batadv_softif_vlan *vlan;
struct hlist_node *node_tmp;
struct hlist_head *head;
u32 i;
@@ -1241,14 +1223,6 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv)
struct batadv_tt_local_entry,
common);
- /* decrease the reference held for this vlan */
- vlan = batadv_softif_vlan_get(bat_priv,
- tt_common_entry->vid);
- if (vlan) {
- batadv_softif_vlan_put(vlan);
- batadv_softif_vlan_put(vlan);
- }
-
batadv_tt_local_entry_put(tt_local);
}
spin_unlock_bh(list_lock);
@@ -3308,7 +3282,6 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
struct batadv_hashtable *hash = bat_priv->tt.local_hash;
struct batadv_tt_common_entry *tt_common;
struct batadv_tt_local_entry *tt_local;
- struct batadv_softif_vlan *vlan;
struct hlist_node *node_tmp;
struct hlist_head *head;
spinlock_t *list_lock; /* protects write access to the hash lists */
@@ -3338,13 +3311,6 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
struct batadv_tt_local_entry,
common);
- /* decrease the reference held for this vlan */
- vlan = batadv_softif_vlan_get(bat_priv, tt_common->vid);
- if (vlan) {
- batadv_softif_vlan_put(vlan);
- batadv_softif_vlan_put(vlan);
- }
-
batadv_tt_local_entry_put(tt_local);
}
spin_unlock_bh(list_lock);
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 9abfb3e73c34..1e47fbe8bb7b 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -433,6 +433,7 @@ struct batadv_hardif_neigh_node {
* @ifinfo_lock: lock protecting private ifinfo members and list
* @if_incoming: pointer to incoming hard-interface
* @last_seen: when last packet via this neighbor was received
+ * @hardif_neigh: hardif_neigh of this neighbor
* @refcount: number of contexts the object is used
* @rcu: struct used for freeing in an RCU-safe manner
*/
@@ -444,6 +445,7 @@ struct batadv_neigh_node {
spinlock_t ifinfo_lock; /* protects ifinfo_list and its members */
struct batadv_hard_iface *if_incoming;
unsigned long last_seen;
+ struct batadv_hardif_neigh_node *hardif_neigh;
struct kref refcount;
struct rcu_head rcu;
};
@@ -1073,10 +1075,12 @@ struct batadv_tt_common_entry {
* struct batadv_tt_local_entry - translation table local entry data
* @common: general translation table data
* @last_seen: timestamp used for purging stale tt local entries
+ * @vlan: soft-interface vlan of the entry
*/
struct batadv_tt_local_entry {
struct batadv_tt_common_entry common;
unsigned long last_seen;
+ struct batadv_softif_vlan *vlan;
};
/**
@@ -1250,6 +1254,8 @@ struct batadv_forw_packet {
* struct batadv_algo_ops - mesh algorithm callbacks
* @list: list node for the batadv_algo_list
* @name: name of the algorithm
+ * @bat_iface_activate: start routing mechanisms when hard-interface is brought
+ * up
* @bat_iface_enable: init routing info when hard-interface is enabled
* @bat_iface_disable: de-init routing info when hard-interface is disabled
* @bat_iface_update_mac: (re-)init mac addresses of the protocol information
@@ -1277,6 +1283,7 @@ struct batadv_forw_packet {
struct batadv_algo_ops {
struct hlist_node list;
char *name;
+ void (*bat_iface_activate)(struct batadv_hard_iface *hard_iface);
int (*bat_iface_enable)(struct batadv_hard_iface *hard_iface);
void (*bat_iface_disable)(struct batadv_hard_iface *hard_iface);
void (*bat_iface_update_mac)(struct batadv_hard_iface *hard_iface);
diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c
index 6ceb5d36a32b..f4fcb4a9d5c1 100644
--- a/net/bluetooth/bnep/netdev.c
+++ b/net/bluetooth/bnep/netdev.c
@@ -188,7 +188,7 @@ static netdev_tx_t bnep_net_xmit(struct sk_buff *skb,
* So we have to queue them and wake up session thread which is sleeping
* on the sk_sleep(sk).
*/
- dev->trans_start = jiffies;
+ netif_trans_update(dev);
skb_queue_tail(&sk->sk_write_queue, skb);
wake_up_interruptible(sk_sleep(sk));
diff --git a/net/ceph/auth.c b/net/ceph/auth.c
index 6b923bcaa2a4..2bc5965fdd1e 100644
--- a/net/ceph/auth.c
+++ b/net/ceph/auth.c
@@ -293,13 +293,9 @@ int ceph_auth_create_authorizer(struct ceph_auth_client *ac,
}
EXPORT_SYMBOL(ceph_auth_create_authorizer);
-void ceph_auth_destroy_authorizer(struct ceph_auth_client *ac,
- struct ceph_authorizer *a)
+void ceph_auth_destroy_authorizer(struct ceph_authorizer *a)
{
- mutex_lock(&ac->mutex);
- if (ac->ops && ac->ops->destroy_authorizer)
- ac->ops->destroy_authorizer(ac, a);
- mutex_unlock(&ac->mutex);
+ a->destroy(a);
}
EXPORT_SYMBOL(ceph_auth_destroy_authorizer);
diff --git a/net/ceph/auth_none.c b/net/ceph/auth_none.c
index 8c93fa8d81bc..5f836f02ae36 100644
--- a/net/ceph/auth_none.c
+++ b/net/ceph/auth_none.c
@@ -16,7 +16,6 @@ static void reset(struct ceph_auth_client *ac)
struct ceph_auth_none_info *xi = ac->private;
xi->starting = true;
- xi->built_authorizer = false;
}
static void destroy(struct ceph_auth_client *ac)
@@ -39,6 +38,27 @@ static int should_authenticate(struct ceph_auth_client *ac)
return xi->starting;
}
+static int ceph_auth_none_build_authorizer(struct ceph_auth_client *ac,
+ struct ceph_none_authorizer *au)
+{
+ void *p = au->buf;
+ void *const end = p + sizeof(au->buf);
+ int ret;
+
+ ceph_encode_8_safe(&p, end, 1, e_range);
+ ret = ceph_entity_name_encode(ac->name, &p, end);
+ if (ret < 0)
+ return ret;
+
+ ceph_encode_64_safe(&p, end, ac->global_id, e_range);
+ au->buf_len = p - (void *)au->buf;
+ dout("%s built authorizer len %d\n", __func__, au->buf_len);
+ return 0;
+
+e_range:
+ return -ERANGE;
+}
+
static int build_request(struct ceph_auth_client *ac, void *buf, void *end)
{
return 0;
@@ -57,32 +77,32 @@ static int handle_reply(struct ceph_auth_client *ac, int result,
return result;
}
+static void ceph_auth_none_destroy_authorizer(struct ceph_authorizer *a)
+{
+ kfree(a);
+}
+
/*
- * build an 'authorizer' with our entity_name and global_id. we can
- * reuse a single static copy since it is identical for all services
- * we connect to.
+ * build an 'authorizer' with our entity_name and global_id. it is
+ * identical for all services we connect to.
*/
static int ceph_auth_none_create_authorizer(
struct ceph_auth_client *ac, int peer_type,
struct ceph_auth_handshake *auth)
{
- struct ceph_auth_none_info *ai = ac->private;
- struct ceph_none_authorizer *au = &ai->au;
- void *p, *end;
+ struct ceph_none_authorizer *au;
int ret;
- if (!ai->built_authorizer) {
- p = au->buf;
- end = p + sizeof(au->buf);
- ceph_encode_8(&p, 1);
- ret = ceph_entity_name_encode(ac->name, &p, end - 8);
- if (ret < 0)
- goto bad;
- ceph_decode_need(&p, end, sizeof(u64), bad2);
- ceph_encode_64(&p, ac->global_id);
- au->buf_len = p - (void *)au->buf;
- ai->built_authorizer = true;
- dout("built authorizer len %d\n", au->buf_len);
+ au = kmalloc(sizeof(*au), GFP_NOFS);
+ if (!au)
+ return -ENOMEM;
+
+ au->base.destroy = ceph_auth_none_destroy_authorizer;
+
+ ret = ceph_auth_none_build_authorizer(ac, au);
+ if (ret) {
+ kfree(au);
+ return ret;
}
auth->authorizer = (struct ceph_authorizer *) au;
@@ -92,17 +112,6 @@ static int ceph_auth_none_create_authorizer(
auth->authorizer_reply_buf_len = sizeof (au->reply_buf);
return 0;
-
-bad2:
- ret = -ERANGE;
-bad:
- return ret;
-}
-
-static void ceph_auth_none_destroy_authorizer(struct ceph_auth_client *ac,
- struct ceph_authorizer *a)
-{
- /* nothing to do */
}
static const struct ceph_auth_client_ops ceph_auth_none_ops = {
@@ -114,7 +123,6 @@ static const struct ceph_auth_client_ops ceph_auth_none_ops = {
.build_request = build_request,
.handle_reply = handle_reply,
.create_authorizer = ceph_auth_none_create_authorizer,
- .destroy_authorizer = ceph_auth_none_destroy_authorizer,
};
int ceph_auth_none_init(struct ceph_auth_client *ac)
@@ -127,7 +135,6 @@ int ceph_auth_none_init(struct ceph_auth_client *ac)
return -ENOMEM;
xi->starting = true;
- xi->built_authorizer = false;
ac->protocol = CEPH_AUTH_NONE;
ac->private = xi;
diff --git a/net/ceph/auth_none.h b/net/ceph/auth_none.h
index 059a3ce4b53f..62021535ae4a 100644
--- a/net/ceph/auth_none.h
+++ b/net/ceph/auth_none.h
@@ -12,6 +12,7 @@
*/
struct ceph_none_authorizer {
+ struct ceph_authorizer base;
char buf[128];
int buf_len;
char reply_buf[0];
@@ -19,8 +20,6 @@ struct ceph_none_authorizer {
struct ceph_auth_none_info {
bool starting;
- bool built_authorizer;
- struct ceph_none_authorizer au; /* we only need one; it's static */
};
int ceph_auth_none_init(struct ceph_auth_client *ac);
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 9e43a315e662..a0905f04bd13 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -565,6 +565,14 @@ static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result,
return -EAGAIN;
}
+static void ceph_x_destroy_authorizer(struct ceph_authorizer *a)
+{
+ struct ceph_x_authorizer *au = (void *)a;
+
+ ceph_x_authorizer_cleanup(au);
+ kfree(au);
+}
+
static int ceph_x_create_authorizer(
struct ceph_auth_client *ac, int peer_type,
struct ceph_auth_handshake *auth)
@@ -581,6 +589,8 @@ static int ceph_x_create_authorizer(
if (!au)
return -ENOMEM;
+ au->base.destroy = ceph_x_destroy_authorizer;
+
ret = ceph_x_build_authorizer(ac, th, au);
if (ret) {
kfree(au);
@@ -643,16 +653,6 @@ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
return ret;
}
-static void ceph_x_destroy_authorizer(struct ceph_auth_client *ac,
- struct ceph_authorizer *a)
-{
- struct ceph_x_authorizer *au = (void *)a;
-
- ceph_x_authorizer_cleanup(au);
- kfree(au);
-}
-
-
static void ceph_x_reset(struct ceph_auth_client *ac)
{
struct ceph_x_info *xi = ac->private;
@@ -770,7 +770,6 @@ static const struct ceph_auth_client_ops ceph_x_ops = {
.create_authorizer = ceph_x_create_authorizer,
.update_authorizer = ceph_x_update_authorizer,
.verify_authorizer_reply = ceph_x_verify_authorizer_reply,
- .destroy_authorizer = ceph_x_destroy_authorizer,
.invalidate_authorizer = ceph_x_invalidate_authorizer,
.reset = ceph_x_reset,
.destroy = ceph_x_destroy,
diff --git a/net/ceph/auth_x.h b/net/ceph/auth_x.h
index 40b1a3cf7397..21a5af904bae 100644
--- a/net/ceph/auth_x.h
+++ b/net/ceph/auth_x.h
@@ -26,6 +26,7 @@ struct ceph_x_ticket_handler {
struct ceph_x_authorizer {
+ struct ceph_authorizer base;
struct ceph_crypto_key session_key;
struct ceph_buffer *buf;
unsigned int service;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 32355d9d0103..40a53a70efdf 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1087,10 +1087,8 @@ static void put_osd(struct ceph_osd *osd)
dout("put_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref),
atomic_read(&osd->o_ref) - 1);
if (atomic_dec_and_test(&osd->o_ref)) {
- struct ceph_auth_client *ac = osd->o_osdc->client->monc.auth;
-
if (osd->o_auth.authorizer)
- ceph_auth_destroy_authorizer(ac, osd->o_auth.authorizer);
+ ceph_auth_destroy_authorizer(osd->o_auth.authorizer);
kfree(osd);
}
}
@@ -2984,7 +2982,7 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
struct ceph_auth_handshake *auth = &o->o_auth;
if (force_new && auth->authorizer) {
- ceph_auth_destroy_authorizer(ac, auth->authorizer);
+ ceph_auth_destroy_authorizer(auth->authorizer);
auth->authorizer = NULL;
}
if (!auth->authorizer) {
diff --git a/net/core/dev.c b/net/core/dev.c
index d91dfbec0fc6..e98ba63fe280 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2815,7 +2815,7 @@ static netdev_features_t harmonize_features(struct sk_buff *skb,
if (skb->ip_summed != CHECKSUM_NONE &&
!can_checksum_protocol(features, type)) {
- features &= ~NETIF_F_CSUM_MASK;
+ features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
} else if (illegal_highdma(skb->dev, skb)) {
features &= ~NETIF_F_SG;
}
@@ -6721,6 +6721,10 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
features &= ~NETIF_F_TSO6;
}
+ /* TSO with IPv4 ID mangling requires IPv4 TSO be enabled */
+ if ((features & NETIF_F_TSO_MANGLEID) && !(features & NETIF_F_TSO))
+ features &= ~NETIF_F_TSO_MANGLEID;
+
/* TSO ECN requires that TSO is present as well. */
if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)
features &= ~NETIF_F_TSO_ECN;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7a1d48983f81..5586be93632f 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3080,8 +3080,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
unsigned int headroom;
unsigned int len = head_skb->len;
__be16 proto;
- bool csum;
- int sg = !!(features & NETIF_F_SG);
+ bool csum, sg;
int nfrags = skb_shinfo(head_skb)->nr_frags;
int err = -ENOMEM;
int i = 0;
@@ -3093,15 +3092,19 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
if (unlikely(!proto))
return ERR_PTR(-EINVAL);
+ sg = !!(features & NETIF_F_SG);
csum = !!can_checksum_protocol(features, proto);
/* GSO partial only requires that we trim off any excess that
* doesn't fit into an MSS sized block, so take care of that
* now.
*/
- if (features & NETIF_F_GSO_PARTIAL) {
+ if (sg && csum && (features & NETIF_F_GSO_PARTIAL)) {
partial_segs = len / mss;
- mss *= partial_segs;
+ if (partial_segs > 1)
+ mss *= partial_segs;
+ else
+ partial_segs = 0;
}
headroom = skb_headroom(head_skb);
diff --git a/net/core/sock.c b/net/core/sock.c
index f615e9391170..08bf97eceeb3 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1655,6 +1655,17 @@ void sock_wfree(struct sk_buff *skb)
}
EXPORT_SYMBOL(sock_wfree);
+/* This variant of sock_wfree() is used by TCP,
+ * since it sets SOCK_USE_WRITE_QUEUE.
+ */
+void __sock_wfree(struct sk_buff *skb)
+{
+ struct sock *sk = skb->sk;
+
+ if (atomic_sub_and_test(skb->truesize, &sk->sk_wmem_alloc))
+ __sk_free(sk);
+}
+
void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
{
skb_orphan(skb);
@@ -1677,8 +1688,21 @@ void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
}
EXPORT_SYMBOL(skb_set_owner_w);
+/* This helper is used by netem, as it can hold packets in its
+ * delay queue. We want to allow the owner socket to send more
+ * packets, as if they were already TX completed by a typical driver.
+ * But we also want to keep skb->sk set because some packet schedulers
+ * rely on it (sch_fq for example). So we set skb->truesize to a small
+ * amount (1) and decrease sk_wmem_alloc accordingly.
+ */
void skb_orphan_partial(struct sk_buff *skb)
{
+ /* If this skb is a TCP pure ACK or already went here,
+ * we have nothing to do. 2 is already a very small truesize.
+ */
+ if (skb->truesize <= 2)
+ return;
+
/* TCP stack sets skb->ooo_okay based on sk_wmem_alloc,
* so we do not completely orphan skb, but transfert all
* accounted bytes but one, to avoid unexpected reorders.
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index 371674801e84..d78e2eefc0f7 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -60,8 +60,9 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version)
}
EXPORT_SYMBOL_GPL(gre_del_protocol);
+/* Fills in tpi and returns header length to be pulled. */
int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
- bool *csum_err, int *ret_hdr_len)
+ bool *csum_err)
{
const struct gre_base_hdr *greh;
__be32 *options;
@@ -113,14 +114,10 @@ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
*/
if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
tpi->proto = htons(ETH_P_IP);
- if ((*(u8 *)options & 0xF0) != 0x40) {
+ if ((*(u8 *)options & 0xF0) != 0x40)
hdr_len += 4;
- if (!pskb_may_pull(skb, hdr_len))
- return -EINVAL;
- }
}
- *ret_hdr_len = hdr_len;
- return 0;
+ return hdr_len;
}
EXPORT_SYMBOL(gre_parse_header);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 3177211ab651..77c20a489218 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -438,6 +438,7 @@ static int inet_reuseport_add_sock(struct sock *sk,
const struct sock *sk2,
bool match_wildcard))
{
+ struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash;
struct sock *sk2;
kuid_t uid = sock_i_uid(sk);
@@ -446,6 +447,7 @@ static int inet_reuseport_add_sock(struct sock *sk,
sk2->sk_family == sk->sk_family &&
ipv6_only_sock(sk2) == ipv6_only_sock(sk) &&
sk2->sk_bound_dev_if == sk->sk_bound_dev_if &&
+ inet_csk(sk2)->icsk_bind_hash == tb &&
sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) &&
saddr_same(sk, sk2, false))
return reuseport_add_sock(sk, sk2);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 2480d79b0e37..2b267e71ebf5 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -221,16 +221,12 @@ static void gre_err(struct sk_buff *skb, u32 info)
const int code = icmp_hdr(skb)->code;
struct tnl_ptk_info tpi;
bool csum_err = false;
- int hdr_len;
- if (gre_parse_header(skb, &tpi, &csum_err, &hdr_len)) {
+ if (gre_parse_header(skb, &tpi, &csum_err) < 0) {
if (!csum_err) /* ignore csum errors. */
return;
}
- if (iptunnel_pull_header(skb, hdr_len, tpi.proto, false))
- return;
-
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
ipv4_update_pmtu(skb, dev_net(skb->dev), info,
skb->dev->ifindex, 0, IPPROTO_GRE, 0);
@@ -264,24 +260,22 @@ static __be32 tunnel_id_to_key(__be64 x)
#endif
}
-static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
+static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
+ struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
{
- struct net *net = dev_net(skb->dev);
struct metadata_dst *tun_dst = NULL;
- struct ip_tunnel_net *itn;
const struct iphdr *iph;
struct ip_tunnel *tunnel;
- if (tpi->proto == htons(ETH_P_TEB))
- itn = net_generic(net, gre_tap_net_id);
- else
- itn = net_generic(net, ipgre_net_id);
-
iph = ip_hdr(skb);
tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
iph->saddr, iph->daddr, tpi->key);
if (tunnel) {
+ if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
+ raw_proto, false) < 0)
+ goto drop;
+
skb_pop_mac_header(skb);
if (tunnel->collect_md) {
__be16 flags;
@@ -297,7 +291,34 @@ static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
return PACKET_RCVD;
}
- return PACKET_REJECT;
+ return PACKET_NEXT;
+
+drop:
+ kfree_skb(skb);
+ return PACKET_RCVD;
+}
+
+static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
+ int hdr_len)
+{
+ struct net *net = dev_net(skb->dev);
+ struct ip_tunnel_net *itn;
+ int res;
+
+ if (tpi->proto == htons(ETH_P_TEB))
+ itn = net_generic(net, gre_tap_net_id);
+ else
+ itn = net_generic(net, ipgre_net_id);
+
+ res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
+ if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
+ /* ipgre tunnels in collect metadata mode should receive
+ * also ETH_P_TEB traffic.
+ */
+ itn = net_generic(net, ipgre_net_id);
+ res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
+ }
+ return res;
}
static int gre_rcv(struct sk_buff *skb)
@@ -314,13 +335,11 @@ static int gre_rcv(struct sk_buff *skb)
}
#endif
- if (gre_parse_header(skb, &tpi, &csum_err, &hdr_len) < 0)
- goto drop;
-
- if (iptunnel_pull_header(skb, hdr_len, tpi.proto, false))
+ hdr_len = gre_parse_header(skb, &tpi, &csum_err);
+ if (hdr_len < 0)
goto drop;
- if (ipgre_rcv(skb, &tpi) == PACKET_RCVD)
+ if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
return 0;
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
@@ -369,7 +388,8 @@ static struct rtable *gre_get_rt(struct sk_buff *skb,
return ip_route_output_key(net, fl);
}
-static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
+static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
+ __be16 proto)
{
struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key;
@@ -418,7 +438,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
goto err_free_rt;
flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
- gre_build_header(skb, tunnel_hlen, flags, htons(ETH_P_TEB),
+ gre_build_header(skb, tunnel_hlen, flags, proto,
tunnel_id_to_key(tun_info->key.tun_id), 0);
df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
@@ -459,7 +479,7 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
const struct iphdr *tnl_params;
if (tunnel->collect_md) {
- gre_fb_xmit(skb, dev);
+ gre_fb_xmit(skb, dev, skb->protocol);
return NETDEV_TX_OK;
}
@@ -501,7 +521,7 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
struct ip_tunnel *tunnel = netdev_priv(dev);
if (tunnel->collect_md) {
- gre_fb_xmit(skb, dev);
+ gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
return NETDEV_TX_OK;
}
@@ -732,7 +752,7 @@ static int ipgre_tunnel_init(struct net_device *dev)
netif_keep_dst(dev);
dev->addr_len = 4;
- if (iph->daddr) {
+ if (iph->daddr && !tunnel->collect_md) {
#ifdef CONFIG_NET_IPGRE_BROADCAST
if (ipv4_is_multicast(iph->daddr)) {
if (!iph->saddr)
@@ -741,8 +761,9 @@ static int ipgre_tunnel_init(struct net_device *dev)
dev->header_ops = &ipgre_header_ops;
}
#endif
- } else
+ } else if (!tunnel->collect_md) {
dev->header_ops = &ipgre_header_ops;
+ }
return ip_tunnel_init(dev);
}
@@ -785,6 +806,11 @@ static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
if (flags & (GRE_VERSION|GRE_ROUTING))
return -EINVAL;
+ if (data[IFLA_GRE_COLLECT_METADATA] &&
+ data[IFLA_GRE_ENCAP_TYPE] &&
+ nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
+ return -EINVAL;
+
return 0;
}
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 6aad0192443d..a69ed94bda1b 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -326,12 +326,12 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
if (!IS_ERR(rt)) {
tdev = rt->dst.dev;
- dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
- fl4.saddr);
ip_rt_put(rt);
}
if (dev->type != ARPHRD_ETHER)
dev->flags |= IFF_POINTOPOINT;
+
+ dst_cache_reset(&tunnel->dst_cache);
}
if (!tdev && tunnel->parms.link)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b945c2b046c5..5c7ed147449c 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1084,6 +1084,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
struct sockcm_cookie sockc;
int flags, err, copied = 0;
int mss_now = 0, size_goal, copied_syn = 0;
+ bool process_backlog = false;
bool sg;
long timeo;
@@ -1167,9 +1168,10 @@ new_segment:
if (!sk_stream_memory_free(sk))
goto wait_for_sndbuf;
- if (sk_flush_backlog(sk))
+ if (process_backlog && sk_flush_backlog(sk)) {
+ process_backlog = false;
goto restart;
-
+ }
skb = sk_stream_alloc_skb(sk,
select_size(sk, sg),
sk->sk_allocation,
@@ -1177,6 +1179,7 @@ new_segment:
if (!skb)
goto wait_for_memory;
+ process_backlog = true;
/*
* Check whether we can use HW checksum.
*/
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 25d527922b18..8daefd8b1b49 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -949,7 +949,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
skb_orphan(skb);
skb->sk = sk;
- skb->destructor = skb_is_tcp_pure_ack(skb) ? sock_wfree : tcp_wfree;
+ skb->destructor = skb_is_tcp_pure_ack(skb) ? __sock_wfree : tcp_wfree;
skb_set_hash_from_sk(skb, sk);
atomic_add(skb->truesize, &sk->sk_wmem_alloc);
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 11e875ffd7ac..3f8411328de5 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -218,6 +218,7 @@ config IPV6_GRE
tristate "IPv6: GRE tunnel"
select IPV6_TUNNEL
select NET_IP_TUNNEL
+ depends on NET_IPGRE_DEMUX
---help---
Tunneling means encapsulating data of one protocol type within
another protocol and sending it over a channel that understands the
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index ea9ee5cce5cf..00d0c2903173 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -727,14 +727,13 @@ EXPORT_SYMBOL_GPL(ip6_datagram_recv_ctl);
int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
struct msghdr *msg, struct flowi6 *fl6,
- struct ipv6_txoptions *opt,
- int *hlimit, int *tclass, int *dontfrag,
- struct sockcm_cookie *sockc)
+ struct ipcm6_cookie *ipc6, struct sockcm_cookie *sockc)
{
struct in6_pktinfo *src_info;
struct cmsghdr *cmsg;
struct ipv6_rt_hdr *rthdr;
struct ipv6_opt_hdr *hdr;
+ struct ipv6_txoptions *opt = ipc6->opt;
int len;
int err = 0;
@@ -953,8 +952,8 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
goto exit_f;
}
- *hlimit = *(int *)CMSG_DATA(cmsg);
- if (*hlimit < -1 || *hlimit > 0xff) {
+ ipc6->hlimit = *(int *)CMSG_DATA(cmsg);
+ if (ipc6->hlimit < -1 || ipc6->hlimit > 0xff) {
err = -EINVAL;
goto exit_f;
}
@@ -974,7 +973,7 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
goto exit_f;
err = 0;
- *tclass = tc;
+ ipc6->tclass = tc;
break;
}
@@ -992,7 +991,7 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
goto exit_f;
err = 0;
- *dontfrag = df;
+ ipc6->dontfrag = df;
break;
}
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 23b9a4cc418e..9554b99a8508 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -401,10 +401,10 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
struct flowi6 fl6;
struct icmpv6_msg msg;
struct sockcm_cookie sockc_unused = {0};
+ struct ipcm6_cookie ipc6;
int iif = 0;
int addr_type = 0;
int len;
- int hlimit;
int err = 0;
u32 mark = IP6_REPLY_MARK(net, skb->mark);
@@ -507,7 +507,10 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
if (IS_ERR(dst))
goto out;
- hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
+ ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
+ ipc6.tclass = np->tclass;
+ ipc6.dontfrag = np->dontfrag;
+ ipc6.opt = NULL;
msg.skb = skb;
msg.offset = skb_network_offset(skb);
@@ -526,9 +529,9 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
err = ip6_append_data(sk, icmpv6_getfrag, &msg,
len + sizeof(struct icmp6hdr),
- sizeof(struct icmp6hdr), hlimit,
- np->tclass, NULL, &fl6, (struct rt6_info *)dst,
- MSG_DONTWAIT, np->dontfrag, &sockc_unused);
+ sizeof(struct icmp6hdr),
+ &ipc6, &fl6, (struct rt6_info *)dst,
+ MSG_DONTWAIT, &sockc_unused);
if (err) {
ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
ip6_flush_pending_frames(sk);
@@ -563,9 +566,8 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
struct flowi6 fl6;
struct icmpv6_msg msg;
struct dst_entry *dst;
+ struct ipcm6_cookie ipc6;
int err = 0;
- int hlimit;
- u8 tclass;
u32 mark = IP6_REPLY_MARK(net, skb->mark);
struct sockcm_cookie sockc_unused = {0};
@@ -607,19 +609,21 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
if (IS_ERR(dst))
goto out;
- hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
-
idev = __in6_dev_get(skb->dev);
msg.skb = skb;
msg.offset = 0;
msg.type = ICMPV6_ECHO_REPLY;
- tclass = ipv6_get_dsfield(ipv6_hdr(skb));
+ ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
+ ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
+ ipc6.dontfrag = np->dontfrag;
+ ipc6.opt = NULL;
+
err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
- sizeof(struct icmp6hdr), hlimit, tclass, NULL, &fl6,
+ sizeof(struct icmp6hdr), &ipc6, &fl6,
(struct rt6_info *)dst, MSG_DONTWAIT,
- np->dontfrag, &sockc_unused);
+ &sockc_unused);
if (err) {
__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c
index 4985e1a735a6..17038e1ede98 100644
--- a/net/ipv6/ila/ila_lwt.c
+++ b/net/ipv6/ila/ila_lwt.c
@@ -144,8 +144,7 @@ nla_put_failure:
static int ila_encap_nlsize(struct lwtunnel_state *lwtstate)
{
- /* No encapsulation overhead */
- return 0;
+ return nla_total_size(sizeof(u64)); /* ILA_ATTR_LOCATOR */
}
static int ila_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 35d3ddc328f8..b912f0dbaf72 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -373,7 +373,7 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
struct msghdr msg;
struct flowi6 flowi6;
struct sockcm_cookie sockc_junk;
- int junk;
+ struct ipcm6_cookie ipc6;
err = -ENOMEM;
fl->opt = kmalloc(sizeof(*fl->opt) + olen, GFP_KERNEL);
@@ -390,8 +390,8 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
msg.msg_control = (void *)(fl->opt+1);
memset(&flowi6, 0, sizeof(flowi6));
- err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, fl->opt,
- &junk, &junk, &junk, &sockc_junk);
+ ipc6.opt = fl->opt;
+ err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, &ipc6, &sockc_junk);
if (err)
goto done;
err = -EINVAL;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 10127741a60d..47b671a46dc4 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -468,7 +468,8 @@ static int gre_rcv(struct sk_buff *skb)
bool csum_err = false;
int hdr_len;
- if (gre_parse_header(skb, &tpi, &csum_err, &hdr_len) < 0)
+ hdr_len = gre_parse_header(skb, &tpi, &csum_err);
+ if (hdr_len < 0)
goto drop;
if (iptunnel_pull_header(skb, hdr_len, tpi.proto, false))
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 2b3ffc582d16..cbf127ae7c67 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1182,12 +1182,12 @@ static void ip6_append_data_mtu(unsigned int *mtu,
}
static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
- struct inet6_cork *v6_cork,
- int hlimit, int tclass, struct ipv6_txoptions *opt,
+ struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
struct rt6_info *rt, struct flowi6 *fl6)
{
struct ipv6_pinfo *np = inet6_sk(sk);
unsigned int mtu;
+ struct ipv6_txoptions *opt = ipc6->opt;
/*
* setup for corking
@@ -1229,8 +1229,8 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
dst_hold(&rt->dst);
cork->base.dst = &rt->dst;
cork->fl.u.ip6 = *fl6;
- v6_cork->hop_limit = hlimit;
- v6_cork->tclass = tclass;
+ v6_cork->hop_limit = ipc6->hlimit;
+ v6_cork->tclass = ipc6->tclass;
if (rt->dst.flags & DST_XFRM_TUNNEL)
mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
rt->dst.dev->mtu : dst_mtu(&rt->dst);
@@ -1258,7 +1258,7 @@ static int __ip6_append_data(struct sock *sk,
int getfrag(void *from, char *to, int offset,
int len, int odd, struct sk_buff *skb),
void *from, int length, int transhdrlen,
- unsigned int flags, int dontfrag,
+ unsigned int flags, struct ipcm6_cookie *ipc6,
const struct sockcm_cookie *sockc)
{
struct sk_buff *skb, *skb_prev = NULL;
@@ -1298,7 +1298,7 @@ static int __ip6_append_data(struct sock *sk,
sizeof(struct frag_hdr) : 0) +
rt->rt6i_nfheader_len;
- if (cork->length + length > mtu - headersize && dontfrag &&
+ if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
(sk->sk_protocol == IPPROTO_UDP ||
sk->sk_protocol == IPPROTO_RAW)) {
ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
@@ -1564,9 +1564,9 @@ error:
int ip6_append_data(struct sock *sk,
int getfrag(void *from, char *to, int offset, int len,
int odd, struct sk_buff *skb),
- void *from, int length, int transhdrlen, int hlimit,
- int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
- struct rt6_info *rt, unsigned int flags, int dontfrag,
+ void *from, int length, int transhdrlen,
+ struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
+ struct rt6_info *rt, unsigned int flags,
const struct sockcm_cookie *sockc)
{
struct inet_sock *inet = inet_sk(sk);
@@ -1580,12 +1580,12 @@ int ip6_append_data(struct sock *sk,
/*
* setup for corking
*/
- err = ip6_setup_cork(sk, &inet->cork, &np->cork, hlimit,
- tclass, opt, rt, fl6);
+ err = ip6_setup_cork(sk, &inet->cork, &np->cork,
+ ipc6, rt, fl6);
if (err)
return err;
- exthdrlen = (opt ? opt->opt_flen : 0);
+ exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
length += exthdrlen;
transhdrlen += exthdrlen;
} else {
@@ -1595,8 +1595,7 @@ int ip6_append_data(struct sock *sk,
return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
&np->cork, sk_page_frag(sk), getfrag,
- from, length, transhdrlen, flags, dontfrag,
- sockc);
+ from, length, transhdrlen, flags, ipc6, sockc);
}
EXPORT_SYMBOL_GPL(ip6_append_data);
@@ -1752,15 +1751,14 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
int getfrag(void *from, char *to, int offset,
int len, int odd, struct sk_buff *skb),
void *from, int length, int transhdrlen,
- int hlimit, int tclass,
- struct ipv6_txoptions *opt, struct flowi6 *fl6,
+ struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
struct rt6_info *rt, unsigned int flags,
- int dontfrag, const struct sockcm_cookie *sockc)
+ const struct sockcm_cookie *sockc)
{
struct inet_cork_full cork;
struct inet6_cork v6_cork;
struct sk_buff_head queue;
- int exthdrlen = (opt ? opt->opt_flen : 0);
+ int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
int err;
if (flags & MSG_PROBE)
@@ -1772,17 +1770,17 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
cork.base.addr = 0;
cork.base.opt = NULL;
v6_cork.opt = NULL;
- err = ip6_setup_cork(sk, &cork, &v6_cork, hlimit, tclass, opt, rt, fl6);
+ err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6);
if (err)
return ERR_PTR(err);
- if (dontfrag < 0)
- dontfrag = inet6_sk(sk)->dontfrag;
+ if (ipc6->dontfrag < 0)
+ ipc6->dontfrag = inet6_sk(sk)->dontfrag;
err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
&current->task_frag, getfrag, from,
length + exthdrlen, transhdrlen + exthdrlen,
- flags, dontfrag, sockc);
+ flags, ipc6, sockc);
if (err) {
__ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
return ERR_PTR(err);
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 4ff4b29894eb..a9895e15ee9c 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -473,7 +473,7 @@ sticky_done:
struct msghdr msg;
struct flowi6 fl6;
struct sockcm_cookie sockc_junk;
- int junk;
+ struct ipcm6_cookie ipc6;
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_oif = sk->sk_bound_dev_if;
@@ -503,9 +503,9 @@ sticky_done:
msg.msg_controllen = optlen;
msg.msg_control = (void *)(opt+1);
+ ipc6.opt = opt;
- retv = ip6_datagram_send_ctl(net, sk, &msg, &fl6, opt, &junk,
- &junk, &junk, &sockc_junk);
+ retv = ip6_datagram_send_ctl(net, sk, &msg, &fl6, &ipc6, &sockc_junk);
if (retv)
goto done;
update:
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index da1cff79e447..3ee3e444a66b 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -58,11 +58,11 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
int iif = 0;
struct flowi6 fl6;
int err;
- int hlimit;
struct dst_entry *dst;
struct rt6_info *rt;
struct pingfakehdr pfh;
struct sockcm_cookie junk = {0};
+ struct ipcm6_cookie ipc6;
pr_debug("ping_v6_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num);
@@ -139,13 +139,15 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
pfh.wcheck = 0;
pfh.family = AF_INET6;
- hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
+ ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
+ ipc6.tclass = np->tclass;
+ ipc6.dontfrag = np->dontfrag;
+ ipc6.opt = NULL;
lock_sock(sk);
err = ip6_append_data(sk, ping_getfrag, &pfh, len,
- 0, hlimit,
- np->tclass, NULL, &fl6, rt,
- MSG_DONTWAIT, np->dontfrag, &junk);
+ 0, &ipc6, &fl6, rt,
+ MSG_DONTWAIT, &junk);
if (err) {
ICMP6_INC_STATS(sock_net(sk), rt->rt6i_idev,
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index b07ce21983aa..896350df6423 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -746,10 +746,8 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
struct raw6_frag_vec rfv;
struct flowi6 fl6;
struct sockcm_cookie sockc;
+ struct ipcm6_cookie ipc6;
int addr_len = msg->msg_namelen;
- int hlimit = -1;
- int tclass = -1;
- int dontfrag = -1;
u16 proto;
int err;
@@ -770,6 +768,11 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl6.flowi6_mark = sk->sk_mark;
+ ipc6.hlimit = -1;
+ ipc6.tclass = -1;
+ ipc6.dontfrag = -1;
+ ipc6.opt = NULL;
+
if (sin6) {
if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL;
@@ -827,10 +830,9 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
opt = &opt_space;
memset(opt, 0, sizeof(struct ipv6_txoptions));
opt->tot_len = sizeof(struct ipv6_txoptions);
+ ipc6.opt = opt;
- err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt,
- &hlimit, &tclass, &dontfrag,
- &sockc);
+ err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6, &sockc);
if (err < 0) {
fl6_sock_release(flowlabel);
return err;
@@ -846,7 +848,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (!opt) {
opt = txopt_get(np);
opt_to_free = opt;
- }
+ }
if (flowlabel)
opt = fl6_merge_options(&opt_space, flowlabel, opt);
opt = ipv6_fixup_options(&opt_space, opt);
@@ -881,14 +883,14 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
err = PTR_ERR(dst);
goto out;
}
- if (hlimit < 0)
- hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
+ if (ipc6.hlimit < 0)
+ ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
- if (tclass < 0)
- tclass = np->tclass;
+ if (ipc6.tclass < 0)
+ ipc6.tclass = np->tclass;
- if (dontfrag < 0)
- dontfrag = np->dontfrag;
+ if (ipc6.dontfrag < 0)
+ ipc6.dontfrag = np->dontfrag;
if (msg->msg_flags&MSG_CONFIRM)
goto do_confirm;
@@ -897,10 +899,11 @@ back_from_confirm:
if (inet->hdrincl)
err = rawv6_send_hdrinc(sk, msg, len, &fl6, &dst, msg->msg_flags);
else {
+ ipc6.opt = opt;
lock_sock(sk);
err = ip6_append_data(sk, raw6_getfrag, &rfv,
- len, 0, hlimit, tclass, opt, &fl6, (struct rt6_info *)dst,
- msg->msg_flags, dontfrag, &sockc);
+ len, 0, &ipc6, &fl6, (struct rt6_info *)dst,
+ msg->msg_flags, &sockc);
if (err)
ip6_flush_pending_frames(sk);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index f911c63f79e6..aca06094110f 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1064,11 +1064,9 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
struct ip6_flowlabel *flowlabel = NULL;
struct flowi6 fl6;
struct dst_entry *dst;
+ struct ipcm6_cookie ipc6;
int addr_len = msg->msg_namelen;
int ulen = len;
- int hlimit = -1;
- int tclass = -1;
- int dontfrag = -1;
int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
int err;
int connected = 0;
@@ -1076,6 +1074,10 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
struct sockcm_cookie sockc;
+ ipc6.hlimit = -1;
+ ipc6.tclass = -1;
+ ipc6.dontfrag = -1;
+
/* destination address check */
if (sin6) {
if (addr_len < offsetof(struct sockaddr, sa_data))
@@ -1200,10 +1202,9 @@ do_udp_sendmsg:
opt = &opt_space;
memset(opt, 0, sizeof(struct ipv6_txoptions));
opt->tot_len = sizeof(*opt);
+ ipc6.opt = opt;
- err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt,
- &hlimit, &tclass, &dontfrag,
- &sockc);
+ err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6, &sockc);
if (err < 0) {
fl6_sock_release(flowlabel);
return err;
@@ -1224,6 +1225,7 @@ do_udp_sendmsg:
if (flowlabel)
opt = fl6_merge_options(&opt_space, flowlabel, opt);
opt = ipv6_fixup_options(&opt_space, opt);
+ ipc6.opt = opt;
fl6.flowi6_proto = sk->sk_protocol;
if (!ipv6_addr_any(daddr))
@@ -1253,11 +1255,11 @@ do_udp_sendmsg:
goto out;
}
- if (hlimit < 0)
- hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
+ if (ipc6.hlimit < 0)
+ ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
- if (tclass < 0)
- tclass = np->tclass;
+ if (ipc6.tclass < 0)
+ ipc6.tclass = np->tclass;
if (msg->msg_flags&MSG_CONFIRM)
goto do_confirm;
@@ -1268,9 +1270,9 @@ back_from_confirm:
struct sk_buff *skb;
skb = ip6_make_skb(sk, getfrag, msg, ulen,
- sizeof(struct udphdr), hlimit, tclass, opt,
+ sizeof(struct udphdr), &ipc6,
&fl6, (struct rt6_info *)dst,
- msg->msg_flags, dontfrag, &sockc);
+ msg->msg_flags, &sockc);
err = PTR_ERR(skb);
if (!IS_ERR_OR_NULL(skb))
err = udp_v6_send_skb(skb, &fl6);
@@ -1291,14 +1293,12 @@ back_from_confirm:
up->pending = AF_INET6;
do_append_data:
- if (dontfrag < 0)
- dontfrag = np->dontfrag;
+ if (ipc6.dontfrag < 0)
+ ipc6.dontfrag = np->dontfrag;
up->len += ulen;
- err = ip6_append_data(sk, getfrag, msg, ulen,
- sizeof(struct udphdr), hlimit, tclass, opt, &fl6,
- (struct rt6_info *)dst,
- corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag,
- &sockc);
+ err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr),
+ &ipc6, &fl6, (struct rt6_info *)dst,
+ corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, &sockc);
if (err)
udp_v6_flush_pending_frames(sk);
else if (!corkreq)
diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c
index fcfbe579434a..d8b7267280c3 100644
--- a/net/irda/irlan/irlan_eth.c
+++ b/net/irda/irlan/irlan_eth.c
@@ -181,7 +181,7 @@ static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb,
skb = new_skb;
}
- dev->trans_start = jiffies;
+ netif_trans_update(dev);
len = skb->len;
/* Now queue the packet in the transport layer */
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index afca2eb4dfa7..6edfa9980314 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1376,9 +1376,9 @@ static int l2tp_tunnel_sock_create(struct net *net,
memcpy(&udp_conf.peer_ip6, cfg->peer_ip6,
sizeof(udp_conf.peer_ip6));
udp_conf.use_udp6_tx_checksums =
- cfg->udp6_zero_tx_checksums;
+ ! cfg->udp6_zero_tx_checksums;
udp_conf.use_udp6_rx_checksums =
- cfg->udp6_zero_rx_checksums;
+ ! cfg->udp6_zero_rx_checksums;
} else
#endif
{
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 46e07267e503..c6f5df1bed12 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -495,10 +495,8 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
struct dst_entry *dst = NULL;
struct flowi6 fl6;
struct sockcm_cookie sockc_unused = {0};
+ struct ipcm6_cookie ipc6;
int addr_len = msg->msg_namelen;
- int hlimit = -1;
- int tclass = -1;
- int dontfrag = -1;
int transhdrlen = 4; /* zero session-id */
int ulen = len + transhdrlen;
int err;
@@ -520,6 +518,10 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl6.flowi6_mark = sk->sk_mark;
+ ipc6.hlimit = -1;
+ ipc6.tclass = -1;
+ ipc6.dontfrag = -1;
+
if (lsa) {
if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL;
@@ -564,11 +566,11 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
opt = &opt_space;
memset(opt, 0, sizeof(struct ipv6_txoptions));
opt->tot_len = sizeof(struct ipv6_txoptions);
+ ipc6.opt = opt;
- err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt,
- &hlimit, &tclass, &dontfrag,
- &sockc_unused);
- if (err < 0) {
+ err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6,
+ &sockc_unused);
+ if (err < 0) {
fl6_sock_release(flowlabel);
return err;
}
@@ -588,6 +590,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (flowlabel)
opt = fl6_merge_options(&opt_space, flowlabel, opt);
opt = ipv6_fixup_options(&opt_space, opt);
+ ipc6.opt = opt;
fl6.flowi6_proto = sk->sk_protocol;
if (!ipv6_addr_any(daddr))
@@ -612,14 +615,14 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
goto out;
}
- if (hlimit < 0)
- hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
+ if (ipc6.hlimit < 0)
+ ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
- if (tclass < 0)
- tclass = np->tclass;
+ if (ipc6.tclass < 0)
+ ipc6.tclass = np->tclass;
- if (dontfrag < 0)
- dontfrag = np->dontfrag;
+ if (ipc6.dontfrag < 0)
+ ipc6.dontfrag = np->dontfrag;
if (msg->msg_flags & MSG_CONFIRM)
goto do_confirm;
@@ -627,9 +630,9 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
back_from_confirm:
lock_sock(sk);
err = ip6_append_data(sk, ip_generic_getfrag, msg,
- ulen, transhdrlen, hlimit, tclass, opt,
+ ulen, transhdrlen, &ipc6,
&fl6, (struct rt6_info *)dst,
- msg->msg_flags, dontfrag, &sockc_unused);
+ msg->msg_flags, &sockc_unused);
if (err)
ip6_flush_pending_frames(sk);
else if (!(msg->msg_flags & MSG_MORE))
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 6a33f0b4d839..c59af3eb9fa4 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1761,7 +1761,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
ret = dev_alloc_name(ndev, ndev->name);
if (ret < 0) {
- free_netdev(ndev);
+ ieee80211_if_free(ndev);
return ret;
}
@@ -1847,7 +1847,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
ret = register_netdevice(ndev);
if (ret) {
- free_netdev(ndev);
+ ieee80211_if_free(ndev);
return ret;
}
}
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 61ed2a8764ba..86187dad1440 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -127,7 +127,7 @@ void rds_tcp_restore_callbacks(struct socket *sock,
/*
* This is the only path that sets tc->t_sock. Send and receive trust that
- * it is set. The RDS_CONN_CONNECTED bit protects those paths from being
+ * it is set. The RDS_CONN_UP bit protects those paths from being
* called while it isn't set.
*/
void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn)
@@ -216,6 +216,7 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
if (!tc)
return -ENOMEM;
+ mutex_init(&tc->t_conn_lock);
tc->t_sock = NULL;
tc->t_tinc = NULL;
tc->t_tinc_hdr_rem = sizeof(struct rds_header);
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index 64f873c0c6b6..41c228300525 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -12,6 +12,10 @@ struct rds_tcp_connection {
struct list_head t_tcp_node;
struct rds_connection *conn;
+ /* t_conn_lock synchronizes the connection establishment between
+ * rds_tcp_accept_one and rds_tcp_conn_connect
+ */
+ struct mutex t_conn_lock;
struct socket *t_sock;
void *t_orig_write_space;
void *t_orig_data_ready;
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index 5cb16875c460..49a3fcfed360 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -78,7 +78,14 @@ int rds_tcp_conn_connect(struct rds_connection *conn)
struct socket *sock = NULL;
struct sockaddr_in src, dest;
int ret;
+ struct rds_tcp_connection *tc = conn->c_transport_data;
+
+ mutex_lock(&tc->t_conn_lock);
+ if (rds_conn_up(conn)) {
+ mutex_unlock(&tc->t_conn_lock);
+ return 0;
+ }
ret = sock_create_kern(rds_conn_net(conn), PF_INET,
SOCK_STREAM, IPPROTO_TCP, &sock);
if (ret < 0)
@@ -120,6 +127,7 @@ int rds_tcp_conn_connect(struct rds_connection *conn)
}
out:
+ mutex_unlock(&tc->t_conn_lock);
if (sock)
sock_release(sock);
return ret;
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 0936a4a32b47..be263cdf268b 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -76,7 +76,9 @@ int rds_tcp_accept_one(struct socket *sock)
struct rds_connection *conn;
int ret;
struct inet_sock *inet;
- struct rds_tcp_connection *rs_tcp;
+ struct rds_tcp_connection *rs_tcp = NULL;
+ int conn_state;
+ struct sock *nsk;
ret = sock_create_kern(sock_net(sock->sk), sock->sk->sk_family,
sock->sk->sk_type, sock->sk->sk_protocol,
@@ -115,28 +117,44 @@ int rds_tcp_accept_one(struct socket *sock)
* rds_tcp_state_change() will do that cleanup
*/
rs_tcp = (struct rds_tcp_connection *)conn->c_transport_data;
- if (rs_tcp->t_sock &&
- ntohl(inet->inet_saddr) < ntohl(inet->inet_daddr)) {
- struct sock *nsk = new_sock->sk;
-
- nsk->sk_user_data = NULL;
- nsk->sk_prot->disconnect(nsk, 0);
- tcp_done(nsk);
- new_sock = NULL;
- ret = 0;
- goto out;
- } else if (rs_tcp->t_sock) {
- rds_tcp_restore_callbacks(rs_tcp->t_sock, rs_tcp);
- conn->c_outgoing = 0;
- }
-
rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING);
+ mutex_lock(&rs_tcp->t_conn_lock);
+ conn_state = rds_conn_state(conn);
+ if (conn_state != RDS_CONN_CONNECTING && conn_state != RDS_CONN_UP)
+ goto rst_nsk;
+ if (rs_tcp->t_sock) {
+ /* Need to resolve a duelling SYN between peers.
+ * We have an outstanding SYN to this peer, which may
+ * potentially have transitioned to the RDS_CONN_UP state,
+ * so we must quiesce any send threads before resetting
+ * c_transport_data.
+ */
+ wait_event(conn->c_waitq,
+ !test_bit(RDS_IN_XMIT, &conn->c_flags));
+ if (ntohl(inet->inet_saddr) < ntohl(inet->inet_daddr)) {
+ goto rst_nsk;
+ } else if (rs_tcp->t_sock) {
+ rds_tcp_restore_callbacks(rs_tcp->t_sock, rs_tcp);
+ conn->c_outgoing = 0;
+ }
+ }
rds_tcp_set_callbacks(new_sock, conn);
- rds_connect_complete(conn);
+ rds_connect_complete(conn); /* marks RDS_CONN_UP */
+ new_sock = NULL;
+ ret = 0;
+ goto out;
+rst_nsk:
+ /* reset the newly returned accept sock and bail */
+ nsk = new_sock->sk;
+ rds_tcp_stats_inc(s_tcp_listen_closed_stale);
+ nsk->sk_user_data = NULL;
+ nsk->sk_prot->disconnect(nsk, 0);
+ tcp_done(nsk);
new_sock = NULL;
ret = 0;
-
out:
+ if (rs_tcp)
+ mutex_unlock(&rs_tcp->t_conn_lock);
if (new_sock)
sock_release(new_sock);
return ret;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 9c7756237904..269dd71b3828 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -227,13 +227,12 @@ unsigned long dev_trans_start(struct net_device *dev)
if (is_vlan_dev(dev))
dev = vlan_dev_real_dev(dev);
- res = dev->trans_start;
- for (i = 0; i < dev->num_tx_queues; i++) {
+ res = netdev_get_tx_queue(dev, 0)->trans_start;
+ for (i = 1; i < dev->num_tx_queues; i++) {
val = netdev_get_tx_queue(dev, i)->trans_start;
if (val && time_after(val, res))
res = val;
}
- dev->trans_start = res;
return res;
}
@@ -256,10 +255,7 @@ static void dev_watchdog(unsigned long arg)
struct netdev_queue *txq;
txq = netdev_get_tx_queue(dev, i);
- /*
- * old device drivers set dev->trans_start
- */
- trans_start = txq->trans_start ? : dev->trans_start;
+ trans_start = txq->trans_start;
if (netif_xmit_stopped(txq) &&
time_after(jiffies, (trans_start +
dev->watchdog_timeo))) {
@@ -775,7 +771,7 @@ void dev_activate(struct net_device *dev)
transition_one_qdisc(dev, dev_ingress_queue(dev), NULL);
if (need_watchdog) {
- dev->trans_start = jiffies;
+ netif_trans_update(dev);
dev_watchdog_up(dev);
}
}
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 491d6fd6430c..205bed00dd34 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -395,6 +395,25 @@ static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
sch->q.qlen++;
}
+/* netem can't properly corrupt a megapacket (like we get from GSO), so instead
+ * when we statistically choose to corrupt one, we instead segment it, returning
+ * the first packet to be corrupted, and re-enqueue the remaining frames
+ */
+static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch)
+{
+ struct sk_buff *segs;
+ netdev_features_t features = netif_skb_features(skb);
+
+ segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
+
+ if (IS_ERR_OR_NULL(segs)) {
+ qdisc_reshape_fail(skb, sch);
+ return NULL;
+ }
+ consume_skb(skb);
+ return segs;
+}
+
/*
* Insert one skb into qdisc.
* Note: parent depends on return value to account for queue length.
@@ -407,7 +426,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
/* We don't fill cb now as skb_unshare() may invalidate it */
struct netem_skb_cb *cb;
struct sk_buff *skb2;
+ struct sk_buff *segs = NULL;
+ unsigned int len = 0, last_len, prev_len = qdisc_pkt_len(skb);
+ int nb = 0;
int count = 1;
+ int rc = NET_XMIT_SUCCESS;
/* Random duplication */
if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
@@ -453,10 +476,23 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
* do it now in software before we mangle it.
*/
if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
+ if (skb_is_gso(skb)) {
+ segs = netem_segment(skb, sch);
+ if (!segs)
+ return NET_XMIT_DROP;
+ } else {
+ segs = skb;
+ }
+
+ skb = segs;
+ segs = segs->next;
+
if (!(skb = skb_unshare(skb, GFP_ATOMIC)) ||
(skb->ip_summed == CHECKSUM_PARTIAL &&
- skb_checksum_help(skb)))
- return qdisc_drop(skb, sch);
+ skb_checksum_help(skb))) {
+ rc = qdisc_drop(skb, sch);
+ goto finish_segs;
+ }
skb->data[prandom_u32() % skb_headlen(skb)] ^=
1<<(prandom_u32() % 8);
@@ -516,6 +552,27 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
sch->qstats.requeues++;
}
+finish_segs:
+ if (segs) {
+ while (segs) {
+ skb2 = segs->next;
+ segs->next = NULL;
+ qdisc_skb_cb(segs)->pkt_len = segs->len;
+ last_len = segs->len;
+ rc = qdisc_enqueue(segs, sch);
+ if (rc != NET_XMIT_SUCCESS) {
+ if (net_xmit_drop_count(rc))
+ qdisc_qstats_drop(sch);
+ } else {
+ nb++;
+ len += last_len;
+ }
+ segs = skb2;
+ }
+ sch->q.qlen += nb;
+ if (nb > 1)
+ qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
+ }
return NET_XMIT_SUCCESS;
}
diff --git a/net/tipc/core.c b/net/tipc/core.c
index e2bdb07a49a2..fe1b062c4f18 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -112,11 +112,9 @@ static int __init tipc_init(void)
pr_info("Activated (version " TIPC_MOD_VER ")\n");
- sysctl_tipc_rmem[0] = TIPC_CONN_OVERLOAD_LIMIT >> 4 <<
- TIPC_LOW_IMPORTANCE;
- sysctl_tipc_rmem[1] = TIPC_CONN_OVERLOAD_LIMIT >> 4 <<
- TIPC_CRITICAL_IMPORTANCE;
- sysctl_tipc_rmem[2] = TIPC_CONN_OVERLOAD_LIMIT;
+ sysctl_tipc_rmem[0] = RCVBUF_MIN;
+ sysctl_tipc_rmem[1] = RCVBUF_DEF;
+ sysctl_tipc_rmem[2] = RCVBUF_MAX;
err = tipc_netlink_start();
if (err)
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 58bf51541813..024da8af91f0 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -743,16 +743,26 @@ static inline void msg_set_msgcnt(struct tipc_msg *m, u16 n)
msg_set_bits(m, 9, 16, 0xffff, n);
}
-static inline u32 msg_bcast_tag(struct tipc_msg *m)
+static inline u32 msg_conn_ack(struct tipc_msg *m)
{
return msg_bits(m, 9, 16, 0xffff);
}
-static inline void msg_set_bcast_tag(struct tipc_msg *m, u32 n)
+static inline void msg_set_conn_ack(struct tipc_msg *m, u32 n)
{
msg_set_bits(m, 9, 16, 0xffff, n);
}
+static inline u32 msg_adv_win(struct tipc_msg *m)
+{
+ return msg_bits(m, 9, 0, 0xffff);
+}
+
+static inline void msg_set_adv_win(struct tipc_msg *m, u32 n)
+{
+ msg_set_bits(m, 9, 0, 0xffff, n);
+}
+
static inline u32 msg_max_pkt(struct tipc_msg *m)
{
return msg_bits(m, 9, 16, 0xffff) * 4;
diff --git a/net/tipc/node.c b/net/tipc/node.c
index c29915688230..d903f560e2fd 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1,7 +1,7 @@
/*
* net/tipc/node.c: TIPC node management routines
*
- * Copyright (c) 2000-2006, 2012-2015, Ericsson AB
+ * Copyright (c) 2000-2006, 2012-2016, Ericsson AB
* Copyright (c) 2005-2006, 2010-2014, Wind River Systems
* All rights reserved.
*
@@ -191,6 +191,20 @@ int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel)
tipc_node_put(n);
return mtu;
}
+
+u16 tipc_node_get_capabilities(struct net *net, u32 addr)
+{
+ struct tipc_node *n;
+ u16 caps;
+
+ n = tipc_node_find(net, addr);
+ if (unlikely(!n))
+ return TIPC_NODE_CAPABILITIES;
+ caps = n->capabilities;
+ tipc_node_put(n);
+ return caps;
+}
+
/*
* A trivial power-of-two bitmask technique is used for speed, since this
* operation is done for every incoming TIPC packet. The number of hash table
@@ -304,8 +318,11 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities)
spin_lock_bh(&tn->node_list_lock);
n = tipc_node_find(net, addr);
- if (n)
+ if (n) {
+ /* Same node may come back with new capabilities */
+ n->capabilities = capabilities;
goto exit;
+ }
n = kzalloc(sizeof(*n), GFP_ATOMIC);
if (!n) {
pr_warn("Node creation failed, no memory\n");
@@ -1452,6 +1469,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
int bearer_id = b->identity;
struct tipc_link_entry *le;
u16 bc_ack = msg_bcast_ack(hdr);
+ u32 self = tipc_own_addr(net);
int rc = 0;
__skb_queue_head_init(&xmitq);
@@ -1468,6 +1486,10 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
return tipc_node_bc_rcv(net, skb, bearer_id);
}
+ /* Discard unicast link messages destined for another node */
+ if (unlikely(!msg_short(hdr) && (msg_destnode(hdr) != self)))
+ goto discard;
+
/* Locate neighboring node that sent packet */
n = tipc_node_find(net, msg_prevnode(hdr));
if (unlikely(!n))
diff --git a/net/tipc/node.h b/net/tipc/node.h
index f39d9d06e8bb..8264b3d97dc4 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -45,10 +45,11 @@
/* Optional capabilities supported by this code version
*/
enum {
- TIPC_BCAST_SYNCH = (1 << 1)
+ TIPC_BCAST_SYNCH = (1 << 1),
+ TIPC_BLOCK_FLOWCTL = (2 << 1)
};
-#define TIPC_NODE_CAPABILITIES TIPC_BCAST_SYNCH
+#define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH | TIPC_BLOCK_FLOWCTL)
#define INVALID_BEARER_ID -1
void tipc_node_stop(struct net *net);
@@ -70,6 +71,7 @@ void tipc_node_broadcast(struct net *net, struct sk_buff *skb);
int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port);
void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port);
int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel);
+u16 tipc_node_get_capabilities(struct net *net, u32 addr);
int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb);
int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb);
int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 3eeb50a27b89..12628890c219 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -96,8 +96,11 @@ struct tipc_sock {
uint conn_timeout;
atomic_t dupl_rcvcnt;
bool link_cong;
- uint sent_unacked;
- uint rcv_unacked;
+ u16 snt_unacked;
+ u16 snd_win;
+ u16 peer_caps;
+ u16 rcv_unacked;
+ u16 rcv_win;
struct sockaddr_tipc remote;
struct rhash_head node;
struct rcu_head rcu;
@@ -227,9 +230,29 @@ static struct tipc_sock *tipc_sk(const struct sock *sk)
return container_of(sk, struct tipc_sock, sk);
}
-static int tsk_conn_cong(struct tipc_sock *tsk)
+static bool tsk_conn_cong(struct tipc_sock *tsk)
{
- return tsk->sent_unacked >= TIPC_FLOWCTRL_WIN;
+ return tsk->snt_unacked >= tsk->snd_win;
+}
+
+/* tsk_blocks(): translate a buffer size in bytes to number of
+ * advertisable blocks, taking into account the ratio truesize(len)/len
+ * We can trust that this ratio is always < 4 for len >= FLOWCTL_BLK_SZ
+ */
+static u16 tsk_adv_blocks(int len)
+{
+ return len / FLOWCTL_BLK_SZ / 4;
+}
+
+/* tsk_inc(): increment counter for sent or received data
+ * - If block based flow control is not supported by peer we
+ * fall back to message based ditto, incrementing the counter
+ */
+static u16 tsk_inc(struct tipc_sock *tsk, int msglen)
+{
+ if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL))
+ return ((msglen / FLOWCTL_BLK_SZ) + 1);
+ return 1;
}
/**
@@ -377,9 +400,12 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
sk->sk_write_space = tipc_write_space;
sk->sk_destruct = tipc_sock_destruct;
tsk->conn_timeout = CONN_TIMEOUT_DEFAULT;
- tsk->sent_unacked = 0;
atomic_set(&tsk->dupl_rcvcnt, 0);
+ /* Start out with safe limits until we receive an advertised window */
+ tsk->snd_win = tsk_adv_blocks(RCVBUF_MIN);
+ tsk->rcv_win = tsk->snd_win;
+
if (sock->state == SS_READY) {
tsk_set_unreturnable(tsk, true);
if (sock->type == SOCK_DGRAM)
@@ -775,7 +801,7 @@ static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb)
struct sock *sk = &tsk->sk;
struct tipc_msg *hdr = buf_msg(skb);
int mtyp = msg_type(hdr);
- int conn_cong;
+ bool conn_cong;
/* Ignore if connection cannot be validated: */
if (!tsk_peer_msg(tsk, hdr))
@@ -789,7 +815,9 @@ static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb)
return;
} else if (mtyp == CONN_ACK) {
conn_cong = tsk_conn_cong(tsk);
- tsk->sent_unacked -= msg_msgcnt(hdr);
+ tsk->snt_unacked -= msg_conn_ack(hdr);
+ if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
+ tsk->snd_win = msg_adv_win(hdr);
if (conn_cong)
sk->sk_write_space(sk);
} else if (mtyp != CONN_PROBE_REPLY) {
@@ -1020,12 +1048,14 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz)
u32 dnode;
uint mtu, send, sent = 0;
struct iov_iter save;
+ int hlen = MIN_H_SIZE;
/* Handle implied connection establishment */
if (unlikely(dest)) {
rc = __tipc_sendmsg(sock, m, dsz);
+ hlen = msg_hdr_sz(mhdr);
if (dsz && (dsz == rc))
- tsk->sent_unacked = 1;
+ tsk->snt_unacked = tsk_inc(tsk, dsz + hlen);
return rc;
}
if (dsz > (uint)INT_MAX)
@@ -1054,7 +1084,7 @@ next:
if (likely(!tsk_conn_cong(tsk))) {
rc = tipc_node_xmit(net, &pktchain, dnode, portid);
if (likely(!rc)) {
- tsk->sent_unacked++;
+ tsk->snt_unacked += tsk_inc(tsk, send + hlen);
sent += send;
if (sent == dsz)
return dsz;
@@ -1118,6 +1148,13 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
sk_reset_timer(sk, &sk->sk_timer, jiffies + tsk->probing_intv);
tipc_node_add_conn(net, peer_node, tsk->portid, peer_port);
tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid);
+ tsk->peer_caps = tipc_node_get_capabilities(net, peer_node);
+ if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
+ return;
+
+ /* Fall back to message based flow control */
+ tsk->rcv_win = FLOWCTL_MSG_WIN;
+ tsk->snd_win = FLOWCTL_MSG_WIN;
}
/**
@@ -1214,7 +1251,7 @@ static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
return 0;
}
-static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack)
+static void tipc_sk_send_ack(struct tipc_sock *tsk)
{
struct net *net = sock_net(&tsk->sk);
struct sk_buff *skb = NULL;
@@ -1230,7 +1267,14 @@ static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack)
if (!skb)
return;
msg = buf_msg(skb);
- msg_set_msgcnt(msg, ack);
+ msg_set_conn_ack(msg, tsk->rcv_unacked);
+ tsk->rcv_unacked = 0;
+
+ /* Adjust to and advertize the correct window limit */
+ if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL) {
+ tsk->rcv_win = tsk_adv_blocks(tsk->sk.sk_rcvbuf);
+ msg_set_adv_win(msg, tsk->rcv_win);
+ }
tipc_node_xmit_skb(net, skb, dnode, msg_link_selector(msg));
}
@@ -1288,7 +1332,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, size_t buf_len,
long timeo;
unsigned int sz;
u32 err;
- int res;
+ int res, hlen;
/* Catch invalid receive requests */
if (unlikely(!buf_len))
@@ -1313,6 +1357,7 @@ restart:
buf = skb_peek(&sk->sk_receive_queue);
msg = buf_msg(buf);
sz = msg_data_sz(msg);
+ hlen = msg_hdr_sz(msg);
err = msg_errcode(msg);
/* Discard an empty non-errored message & try again */
@@ -1335,7 +1380,7 @@ restart:
sz = buf_len;
m->msg_flags |= MSG_TRUNC;
}
- res = skb_copy_datagram_msg(buf, msg_hdr_sz(msg), m, sz);
+ res = skb_copy_datagram_msg(buf, hlen, m, sz);
if (res)
goto exit;
res = sz;
@@ -1347,15 +1392,15 @@ restart:
res = -ECONNRESET;
}
- /* Consume received message (optional) */
- if (likely(!(flags & MSG_PEEK))) {
- if ((sock->state != SS_READY) &&
- (++tsk->rcv_unacked >= TIPC_CONNACK_INTV)) {
- tipc_sk_send_ack(tsk, tsk->rcv_unacked);
- tsk->rcv_unacked = 0;
- }
- tsk_advance_rx_queue(sk);
+ if (unlikely(flags & MSG_PEEK))
+ goto exit;
+
+ if (likely(sock->state != SS_READY)) {
+ tsk->rcv_unacked += tsk_inc(tsk, hlen + sz);
+ if (unlikely(tsk->rcv_unacked >= (tsk->rcv_win / 4)))
+ tipc_sk_send_ack(tsk);
}
+ tsk_advance_rx_queue(sk);
exit:
release_sock(sk);
return res;
@@ -1384,7 +1429,7 @@ static int tipc_recv_stream(struct socket *sock, struct msghdr *m,
int sz_to_copy, target, needed;
int sz_copied = 0;
u32 err;
- int res = 0;
+ int res = 0, hlen;
/* Catch invalid receive attempts */
if (unlikely(!buf_len))
@@ -1410,6 +1455,7 @@ restart:
buf = skb_peek(&sk->sk_receive_queue);
msg = buf_msg(buf);
sz = msg_data_sz(msg);
+ hlen = msg_hdr_sz(msg);
err = msg_errcode(msg);
/* Discard an empty non-errored message & try again */
@@ -1434,8 +1480,7 @@ restart:
needed = (buf_len - sz_copied);
sz_to_copy = (sz <= needed) ? sz : needed;
- res = skb_copy_datagram_msg(buf, msg_hdr_sz(msg) + offset,
- m, sz_to_copy);
+ res = skb_copy_datagram_msg(buf, hlen + offset, m, sz_to_copy);
if (res)
goto exit;
@@ -1457,20 +1502,18 @@ restart:
res = -ECONNRESET;
}
- /* Consume received message (optional) */
- if (likely(!(flags & MSG_PEEK))) {
- if (unlikely(++tsk->rcv_unacked >= TIPC_CONNACK_INTV)) {
- tipc_sk_send_ack(tsk, tsk->rcv_unacked);
- tsk->rcv_unacked = 0;
- }
- tsk_advance_rx_queue(sk);
- }
+ if (unlikely(flags & MSG_PEEK))
+ goto exit;
+
+ tsk->rcv_unacked += tsk_inc(tsk, hlen + sz);
+ if (unlikely(tsk->rcv_unacked >= (tsk->rcv_win / 4)))
+ tipc_sk_send_ack(tsk);
+ tsk_advance_rx_queue(sk);
/* Loop around if more data is required */
if ((sz_copied < buf_len) && /* didn't get all requested data */
(!skb_queue_empty(&sk->sk_receive_queue) ||
(sz_copied < target)) && /* and more is ready or required */
- (!(flags & MSG_PEEK)) && /* and aren't just peeking at data */
(!err)) /* and haven't reached a FIN */
goto restart;
@@ -1602,30 +1645,33 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
/**
* rcvbuf_limit - get proper overload limit of socket receive queue
* @sk: socket
- * @buf: message
+ * @skb: message
*
- * For all connection oriented messages, irrespective of importance,
- * the default overload value (i.e. 67MB) is set as limit.
+ * For connection oriented messages, irrespective of importance,
+ * default queue limit is 2 MB.
*
- * For all connectionless messages, by default new queue limits are
- * as belows:
+ * For connectionless messages, queue limits are based on message
+ * importance as follows:
*
- * TIPC_LOW_IMPORTANCE (4 MB)
- * TIPC_MEDIUM_IMPORTANCE (8 MB)
- * TIPC_HIGH_IMPORTANCE (16 MB)
- * TIPC_CRITICAL_IMPORTANCE (32 MB)
+ * TIPC_LOW_IMPORTANCE (2 MB)
+ * TIPC_MEDIUM_IMPORTANCE (4 MB)
+ * TIPC_HIGH_IMPORTANCE (8 MB)
+ * TIPC_CRITICAL_IMPORTANCE (16 MB)
*
* Returns overload limit according to corresponding message importance
*/
-static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf)
+static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
{
- struct tipc_msg *msg = buf_msg(buf);
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_msg *hdr = buf_msg(skb);
+
+ if (unlikely(!msg_connected(hdr)))
+ return sk->sk_rcvbuf << msg_importance(hdr);
- if (msg_connected(msg))
- return sysctl_tipc_rmem[2];
+ if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL))
+ return sk->sk_rcvbuf;
- return sk->sk_rcvbuf >> TIPC_CRITICAL_IMPORTANCE <<
- msg_importance(msg);
+ return FLOWCTL_MSG_LIM;
}
/**
@@ -1748,7 +1794,7 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
/* Try backlog, compensating for double-counted bytes */
dcnt = &tipc_sk(sk)->dupl_rcvcnt;
- if (sk->sk_backlog.len)
+ if (!sk->sk_backlog.len)
atomic_set(dcnt, 0);
lim = rcvbuf_limit(sk, skb) + atomic_read(dcnt);
if (likely(!sk_add_backlog(sk, skb, lim)))
diff --git a/net/tipc/socket.h b/net/tipc/socket.h
index 4241f22069dc..06fb5944cf76 100644
--- a/net/tipc/socket.h
+++ b/net/tipc/socket.h
@@ -1,6 +1,6 @@
/* net/tipc/socket.h: Include file for TIPC socket code
*
- * Copyright (c) 2014-2015, Ericsson AB
+ * Copyright (c) 2014-2016, Ericsson AB
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -38,10 +38,17 @@
#include <net/sock.h>
#include <net/genetlink.h>
-#define TIPC_CONNACK_INTV 256
-#define TIPC_FLOWCTRL_WIN (TIPC_CONNACK_INTV * 2)
-#define TIPC_CONN_OVERLOAD_LIMIT ((TIPC_FLOWCTRL_WIN * 2 + 1) * \
- SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE))
+/* Compatibility values for deprecated message based flow control */
+#define FLOWCTL_MSG_WIN 512
+#define FLOWCTL_MSG_LIM ((FLOWCTL_MSG_WIN * 2 + 1) * SKB_TRUESIZE(MAX_MSG_SIZE))
+
+#define FLOWCTL_BLK_SZ 1024
+
+/* Socket receive buffer sizes */
+#define RCVBUF_MIN (FLOWCTL_BLK_SZ * 512)
+#define RCVBUF_DEF (FLOWCTL_BLK_SZ * 1024 * 2)
+#define RCVBUF_MAX (FLOWCTL_BLK_SZ * 1024 * 16)
+
int tipc_socket_init(void);
void tipc_socket_stop(void);
void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq);