diff options
Diffstat (limited to 'net')
154 files changed, 2787 insertions, 2438 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 6e64f7c6a2e9..7850412f52b7 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -327,7 +327,7 @@ static void vlan_sync_address(struct net_device *dev, static void vlan_transfer_features(struct net_device *dev, struct net_device *vlandev) { - unsigned long old_features = vlandev->features; + u32 old_features = vlandev->features; vlandev->features &= ~dev->vlan_features; vlandev->features |= dev->features & dev->vlan_features; diff --git a/net/Kconfig b/net/Kconfig index 72840626284b..79cabf1ee68b 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -221,6 +221,12 @@ config RPS depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS default y +config RFS_ACCEL + boolean + depends on RPS && GENERIC_HARDIRQS + select CPU_RMAP + default y + config XPS boolean depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile index d936aeccd194..2de93d00631b 100644 --- a/net/batman-adv/Makefile +++ b/net/batman-adv/Makefile @@ -1,5 +1,5 @@ # -# Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: +# Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: # # Marek Lindner, Simon Wunderlich # diff --git a/net/batman-adv/aggregation.c b/net/batman-adv/aggregation.c index 3850a3ecf947..1997725a243b 100644 --- a/net/batman-adv/aggregation.c +++ b/net/batman-adv/aggregation.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/aggregation.h b/net/batman-adv/aggregation.h index 71a91b3da913..6ce305b40017 100644 --- a/net/batman-adv/aggregation.h +++ b/net/batman-adv/aggregation.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/bat_debugfs.c b/net/batman-adv/bat_debugfs.c index 0ae81d07f102..0e9d43509935 100644 --- a/net/batman-adv/bat_debugfs.c +++ b/net/batman-adv/bat_debugfs.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors: * * Marek Lindner * @@ -52,7 +52,6 @@ static void emit_log_char(struct debug_log *debug_log, char c) static int fdebug_log(struct debug_log *debug_log, char *fmt, ...) { - int printed_len; va_list args; static char debug_log_buf[256]; char *p; @@ -62,8 +61,7 @@ static int fdebug_log(struct debug_log *debug_log, char *fmt, ...) spin_lock_bh(&debug_log->lock); va_start(args, fmt); - printed_len = vscnprintf(debug_log_buf, sizeof(debug_log_buf), - fmt, args); + vscnprintf(debug_log_buf, sizeof(debug_log_buf), fmt, args); va_end(args); for (p = debug_log_buf; *p != 0; p++) diff --git a/net/batman-adv/bat_debugfs.h b/net/batman-adv/bat_debugfs.h index 72df532b7d5f..bc9cda3f01e1 100644 --- a/net/batman-adv/bat_debugfs.h +++ b/net/batman-adv/bat_debugfs.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/bat_sysfs.c b/net/batman-adv/bat_sysfs.c index cd7bb51825f1..f7b93a0805fe 100644 --- a/net/batman-adv/bat_sysfs.c +++ b/net/batman-adv/bat_sysfs.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/bat_sysfs.h b/net/batman-adv/bat_sysfs.h index 7f186c007b4f..02f1fa7aadfa 100644 --- a/net/batman-adv/bat_sysfs.h +++ b/net/batman-adv/bat_sysfs.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c index bbcd8f744cdd..ad2ca925b3e0 100644 --- a/net/batman-adv/bitarray.c +++ b/net/batman-adv/bitarray.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2006-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2006-2011 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h index ac54017601b1..769c246d1fc1 100644 --- a/net/batman-adv/bitarray.h +++ b/net/batman-adv/bitarray.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2006-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2006-2011 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 0065ffb8d96d..429a013d2e0a 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2009-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2009-2011 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h index 4585e6549844..2aa439124ee3 100644 --- a/net/batman-adv/gateway_client.h +++ b/net/batman-adv/gateway_client.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2009-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2009-2011 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c index b962982f017e..50d3a59a3d73 100644 --- a/net/batman-adv/gateway_common.c +++ b/net/batman-adv/gateway_common.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2009-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2009-2011 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h index 5e728d0b7959..55e527a489fe 100644 --- a/net/batman-adv/gateway_common.h +++ b/net/batman-adv/gateway_common.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2009-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2009-2011 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 4f95777ce080..f2131f45aa9b 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -34,6 +34,12 @@ /* protect update critical side of if_list - but not the content */ static DEFINE_SPINLOCK(if_list_lock); + +static int batman_skb_recv(struct sk_buff *skb, + struct net_device *dev, + struct packet_type *ptype, + struct net_device *orig_dev); + static void hardif_free_rcu(struct rcu_head *rcu) { struct batman_if *batman_if; @@ -549,8 +555,9 @@ out: /* receive a packet with the batman ethertype coming on a hard * interface */ -int batman_skb_recv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *ptype, struct net_device *orig_dev) +static int batman_skb_recv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *ptype, + struct net_device *orig_dev) { struct bat_priv *bat_priv; struct batman_packet *batman_packet; diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h index 30ec3b8db459..ad195438428a 100644 --- a/net/batman-adv/hard-interface.h +++ b/net/batman-adv/hard-interface.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -35,10 +35,6 @@ struct batman_if *get_batman_if_by_netdev(struct net_device *net_dev); int hardif_enable_interface(struct batman_if *batman_if, char *iface_name); void hardif_disable_interface(struct batman_if *batman_if); void hardif_remove_interfaces(void); -int batman_skb_recv(struct sk_buff *skb, - struct net_device *dev, - struct packet_type *ptype, - struct net_device *orig_dev); int hardif_min_mtu(struct net_device *soft_iface); void update_min_mtu(struct net_device *soft_iface); diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c index 26e623eb9def..fa2693973ab8 100644 --- a/net/batman-adv/hash.c +++ b/net/batman-adv/hash.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2006-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2006-2011 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h index 09216ade16f1..eae24402fd0a 100644 --- a/net/batman-adv/hash.h +++ b/net/batman-adv/hash.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2006-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2006-2011 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * @@ -49,11 +49,6 @@ struct hashtable_t { /* allocates and clears the hash */ struct hashtable_t *hash_new(int size); -/* remove element if you already found the element you want to delete and don't - * need the overhead to find it again with hash_remove(). But usually, you - * don't want to use this function, as it fiddles with hash-internals. */ -void *hash_remove_element(struct hashtable_t *hash, struct element_t *elem); - /* free only the hashtable and the hash itself. */ void hash_destroy(struct hashtable_t *hash); diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index ecf6d7ffab2e..5e86d6f0c0fb 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h index bf9b348cde27..08b185959501 100644 --- a/net/batman-adv/icmp_socket.h +++ b/net/batman-adv/icmp_socket.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index b827f6a158cb..dc9248d9ea5f 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 65106fb61b8f..e235d7bbe045 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -22,9 +22,6 @@ #ifndef _NET_BATMAN_ADV_MAIN_H_ #define _NET_BATMAN_ADV_MAIN_H_ -/* Kernel Programming */ -#define LINUX - #define DRIVER_AUTHOR "Marek Lindner <lindner_marek@yahoo.de>, " \ "Simon Wunderlich <siwu@hrz.tu-chemnitz.de>" #define DRIVER_DESC "B.A.T.M.A.N. advanced" @@ -54,7 +51,6 @@ #define NUM_WORDS (TQ_LOCAL_WINDOW_SIZE / WORD_BIT_SIZE) -#define PACKBUFF_SIZE 2000 #define LOG_BUF_LEN 8192 /* has to be a power of 2 */ #define VIS_INTERVAL 5000 /* 5 seconds */ @@ -96,15 +92,11 @@ #define DBG_ROUTES 2 /* route or hna added / changed / deleted */ #define DBG_ALL 3 -#define LOG_BUF_LEN 8192 /* has to be a power of 2 */ - /* * Vis */ -/* #define VIS_SUBCLUSTERS_DISABLED */ - /* * Kernel headers */ @@ -158,13 +150,6 @@ static inline void bat_dbg(char type __always_unused, } #endif -#define bat_warning(net_dev, fmt, arg...) \ - do { \ - struct net_device *_netdev = (net_dev); \ - struct bat_priv *_batpriv = netdev_priv(_netdev); \ - bat_dbg(DBG_ALL, _batpriv, fmt, ## arg); \ - pr_warning("%s: " fmt, _netdev->name, ## arg); \ - } while (0) #define bat_info(net_dev, fmt, arg...) \ do { \ struct net_device *_netdev = (net_dev); \ diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 6b7fb6b7e6f9..54863c9385de 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2009-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2009-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -247,7 +247,7 @@ static bool purge_orig_node(struct bat_priv *bat_priv, orig_node->hna_buff_len); /* update bonding candidates, we could have lost * some candidates. */ - update_bonding_candidates(bat_priv, orig_node); + update_bonding_candidates(orig_node); } } diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index d474ceb2a4eb..8019fbddffd0 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index 2284e8129cb2..e7571879af3f 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -50,6 +50,7 @@ /* fragmentation defines */ #define UNI_FRAG_HEAD 0x01 +#define UNI_FRAG_LARGETAIL 0x02 struct batman_packet { uint8_t packet_type; diff --git a/net/batman-adv/ring_buffer.c b/net/batman-adv/ring_buffer.c index defd37c9be1f..5bb6a619afee 100644 --- a/net/batman-adv/ring_buffer.c +++ b/net/batman-adv/ring_buffer.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/ring_buffer.h b/net/batman-adv/ring_buffer.h index 6b0cb9aaeba5..0395b2741864 100644 --- a/net/batman-adv/ring_buffer.h +++ b/net/batman-adv/ring_buffer.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 8828eddd3f72..028f73967b00 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -433,8 +433,7 @@ static char count_real_packets(struct ethhdr *ethhdr, } /* copy primary address for bonding */ -static void mark_bonding_address(struct bat_priv *bat_priv, - struct orig_node *orig_node, +static void mark_bonding_address(struct orig_node *orig_node, struct orig_node *orig_neigh_node, struct batman_packet *batman_packet) @@ -447,8 +446,7 @@ static void mark_bonding_address(struct bat_priv *bat_priv, } /* mark possible bond.candidates in the neighbor list */ -void update_bonding_candidates(struct bat_priv *bat_priv, - struct orig_node *orig_node) +void update_bonding_candidates(struct orig_node *orig_node) { int candidates; int interference_candidate; @@ -730,9 +728,8 @@ void receive_bat_packet(struct ethhdr *ethhdr, update_orig(bat_priv, orig_node, ethhdr, batman_packet, if_incoming, hna_buff, hna_buff_len, is_duplicate); - mark_bonding_address(bat_priv, orig_node, - orig_neigh_node, batman_packet); - update_bonding_candidates(bat_priv, orig_node); + mark_bonding_address(orig_node, orig_neigh_node, batman_packet); + update_bonding_candidates(orig_node); /* is single hop (direct) neighbor */ if (is_single_hop_neigh) { @@ -810,13 +807,11 @@ static int recv_my_icmp_packet(struct bat_priv *bat_priv, { struct orig_node *orig_node; struct icmp_packet_rr *icmp_packet; - struct ethhdr *ethhdr; struct batman_if *batman_if; int ret; uint8_t dstaddr[ETH_ALEN]; icmp_packet = (struct icmp_packet_rr *)skb->data; - ethhdr = (struct ethhdr *)skb_mac_header(skb); /* add data to device queue */ if (icmp_packet->msg_type != ECHO_REQUEST) { @@ -848,7 +843,6 @@ static int recv_my_icmp_packet(struct bat_priv *bat_priv, return NET_RX_DROP; icmp_packet = (struct icmp_packet_rr *)skb->data; - ethhdr = (struct ethhdr *)skb_mac_header(skb); memcpy(icmp_packet->dst, icmp_packet->orig, ETH_ALEN); memcpy(icmp_packet->orig, @@ -866,17 +860,15 @@ static int recv_my_icmp_packet(struct bat_priv *bat_priv, } static int recv_icmp_ttl_exceeded(struct bat_priv *bat_priv, - struct sk_buff *skb, size_t icmp_len) + struct sk_buff *skb) { struct orig_node *orig_node; struct icmp_packet *icmp_packet; - struct ethhdr *ethhdr; struct batman_if *batman_if; int ret; uint8_t dstaddr[ETH_ALEN]; icmp_packet = (struct icmp_packet *)skb->data; - ethhdr = (struct ethhdr *)skb_mac_header(skb); /* send TTL exceeded if packet is an echo request (traceroute) */ if (icmp_packet->msg_type != ECHO_REQUEST) { @@ -909,7 +901,6 @@ static int recv_icmp_ttl_exceeded(struct bat_priv *bat_priv, return NET_RX_DROP; icmp_packet = (struct icmp_packet *) skb->data; - ethhdr = (struct ethhdr *)skb_mac_header(skb); memcpy(icmp_packet->dst, icmp_packet->orig, ETH_ALEN); memcpy(icmp_packet->orig, @@ -978,7 +969,7 @@ int recv_icmp_packet(struct sk_buff *skb, struct batman_if *recv_if) /* TTL exceeded */ if (icmp_packet->ttl < 2) - return recv_icmp_ttl_exceeded(bat_priv, skb, hdr_size); + return recv_icmp_ttl_exceeded(bat_priv, skb); ret = NET_RX_DROP; @@ -1001,7 +992,6 @@ int recv_icmp_packet(struct sk_buff *skb, struct batman_if *recv_if) return NET_RX_DROP; icmp_packet = (struct icmp_packet_rr *)skb->data; - ethhdr = (struct ethhdr *)skb_mac_header(skb); /* decrement ttl */ icmp_packet->ttl--; @@ -1193,7 +1183,7 @@ int route_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if, dstaddr); if (unicast_packet->packet_type == BAT_UNICAST_FRAG && - 2 * skb->len - hdr_size <= batman_if->net_dev->mtu) { + frag_can_reassemble(skb, batman_if->net_dev->mtu)) { ret = frag_reassemble_skb(skb, bat_priv, &new_skb); diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h index f108f230bfdb..ceeca6f6ad16 100644 --- a/net/batman-adv/routing.h +++ b/net/batman-adv/routing.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -42,7 +42,6 @@ int recv_vis_packet(struct sk_buff *skb, struct batman_if *recv_if); int recv_bat_packet(struct sk_buff *skb, struct batman_if *recv_if); struct neigh_node *find_router(struct bat_priv *bat_priv, struct orig_node *orig_node, struct batman_if *recv_if); -void update_bonding_candidates(struct bat_priv *bat_priv, - struct orig_node *orig_node); +void update_bonding_candidates(struct orig_node *orig_node); #endif /* _NET_BATMAN_ADV_ROUTING_H_ */ diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index b89b9f7709ae..7cc620e8aa1e 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -49,7 +49,7 @@ static unsigned long own_send_time(struct bat_priv *bat_priv) } /* when do we schedule a forwarded packet to be sent */ -static unsigned long forward_send_time(struct bat_priv *bat_priv) +static unsigned long forward_send_time(void) { return jiffies + msecs_to_jiffies(random32() % (JITTER/2)); } @@ -356,7 +356,7 @@ void schedule_forward_packet(struct orig_node *orig_node, else batman_packet->flags &= ~DIRECTLINK; - send_time = forward_send_time(bat_priv); + send_time = forward_send_time(); add_bat_packet_to_list(bat_priv, (unsigned char *)batman_packet, sizeof(struct batman_packet) + hna_buff_len, diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h index c4cefa8e4f85..bc53adede58d 100644 --- a/net/batman-adv/send.h +++ b/net/batman-adv/send.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index e89ede192ed0..145e0f782923 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h index 02b77334d10d..e7b0e1a34a55 100644 --- a/net/batman-adv/soft-interface.h +++ b/net/batman-adv/soft-interface.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index a633b5a435e2..f6917dde42ce 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index 10c4c5c319b6..a4f3a37fd6ed 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index bf3f6f5a12c4..7270405046e9 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index ee41fef04b21..bedf29425775 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors: * * Andreas Langer * @@ -50,12 +50,12 @@ static struct sk_buff *frag_merge_packet(struct list_head *head, skb = tfp->skb; } + if (skb_linearize(skb) < 0 || skb_linearize(tmp_skb) < 0) + goto err; + skb_pull(tmp_skb, sizeof(struct unicast_frag_packet)); - if (pskb_expand_head(skb, 0, tmp_skb->len, GFP_ATOMIC) < 0) { - /* free buffered skb, skb will be freed later */ - kfree_skb(tfp->skb); - return NULL; - } + if (pskb_expand_head(skb, 0, tmp_skb->len, GFP_ATOMIC) < 0) + goto err; /* move free entry to end */ tfp->skb = NULL; @@ -70,6 +70,11 @@ static struct sk_buff *frag_merge_packet(struct list_head *head, unicast_packet->packet_type = BAT_UNICAST; return skb; + +err: + /* free buffered skb, skb will be freed later */ + kfree_skb(tfp->skb); + return NULL; } static void frag_create_entry(struct list_head *head, struct sk_buff *skb) @@ -224,7 +229,8 @@ int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv, struct unicast_frag_packet *frag1, *frag2; int uc_hdr_len = sizeof(struct unicast_packet); int ucf_hdr_len = sizeof(struct unicast_frag_packet); - int data_len = skb->len; + int data_len = skb->len - uc_hdr_len; + int large_tail = 0; if (!bat_priv->primary_if) goto dropped; @@ -232,10 +238,11 @@ int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv, frag_skb = dev_alloc_skb(data_len - (data_len / 2) + ucf_hdr_len); if (!frag_skb) goto dropped; + skb_reserve(frag_skb, ucf_hdr_len); unicast_packet = (struct unicast_packet *) skb->data; memcpy(&tmp_uc, unicast_packet, uc_hdr_len); - skb_split(skb, frag_skb, data_len / 2); + skb_split(skb, frag_skb, data_len / 2 + uc_hdr_len); if (my_skb_head_push(skb, ucf_hdr_len - uc_hdr_len) < 0 || my_skb_head_push(frag_skb, ucf_hdr_len) < 0) @@ -253,8 +260,11 @@ int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv, memcpy(frag1->orig, bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN); memcpy(frag2, frag1, sizeof(struct unicast_frag_packet)); - frag1->flags |= UNI_FRAG_HEAD; - frag2->flags &= ~UNI_FRAG_HEAD; + if (data_len & 1) + large_tail = UNI_FRAG_LARGETAIL; + + frag1->flags = UNI_FRAG_HEAD | large_tail; + frag2->flags = large_tail; frag1->seqno = htons((uint16_t)atomic_inc_return( &batman_if->frag_seqno)); diff --git a/net/batman-adv/unicast.h b/net/batman-adv/unicast.h index e32b7867a9a4..8897308281d4 100644 --- a/net/batman-adv/unicast.h +++ b/net/batman-adv/unicast.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors: * * Andreas Langer * @@ -22,6 +22,8 @@ #ifndef _NET_BATMAN_ADV_UNICAST_H_ #define _NET_BATMAN_ADV_UNICAST_H_ +#include "packet.h" + #define FRAG_TIMEOUT 10000 /* purge frag list entrys after time in ms */ #define FRAG_BUFFER_SIZE 6 /* number of list elements in buffer */ @@ -32,4 +34,25 @@ int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv); int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv, struct batman_if *batman_if, uint8_t dstaddr[]); +static inline int frag_can_reassemble(struct sk_buff *skb, int mtu) +{ + struct unicast_frag_packet *unicast_packet; + int uneven_correction = 0; + unsigned int merged_size; + + unicast_packet = (struct unicast_frag_packet *)skb->data; + + if (unicast_packet->flags & UNI_FRAG_LARGETAIL) { + if (unicast_packet->flags & UNI_FRAG_HEAD) + uneven_correction = 1; + else + uneven_correction = -1; + } + + merged_size = (skb->len - sizeof(struct unicast_frag_packet)) * 2; + merged_size += sizeof(struct unicast_packet) + uneven_correction; + + return merged_size <= mtu; +} + #endif /* _NET_BATMAN_ADV_UNICAST_H_ */ diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index cd4c4231fa48..7db9ad82cc00 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2008-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2008-2011 B.A.T.M.A.N. contributors: * * Simon Wunderlich * @@ -64,6 +64,7 @@ static void free_info(struct kref *ref) spin_unlock_bh(&bat_priv->vis_list_lock); kfree_skb(info->skb_packet); + kfree(info); } /* Compare two vis packets, used by the hashing algorithm */ @@ -268,10 +269,10 @@ int vis_seq_print_text(struct seq_file *seq, void *offset) buff_pos += sprintf(buff + buff_pos, "%pM,", entry->addr); - for (i = 0; i < packet->entries; i++) + for (j = 0; j < packet->entries; j++) buff_pos += vis_data_read_entry( buff + buff_pos, - &entries[i], + &entries[j], entry->addr, entry->primary); @@ -444,7 +445,7 @@ static struct vis_info *add_packet(struct bat_priv *bat_priv, info); if (hash_added < 0) { /* did not work (for some reason) */ - kref_put(&old_info->refcount, free_info); + kref_put(&info->refcount, free_info); info = NULL; } @@ -815,7 +816,7 @@ static void send_vis_packets(struct work_struct *work) container_of(work, struct delayed_work, work); struct bat_priv *bat_priv = container_of(delayed_work, struct bat_priv, vis_work); - struct vis_info *info, *temp; + struct vis_info *info; spin_lock_bh(&bat_priv->vis_hash_lock); purge_vis_packets(bat_priv); @@ -825,8 +826,9 @@ static void send_vis_packets(struct work_struct *work) send_list_add(bat_priv, bat_priv->my_vis_info); } - list_for_each_entry_safe(info, temp, &bat_priv->vis_send_list, - send_list) { + while (!list_empty(&bat_priv->vis_send_list)) { + info = list_first_entry(&bat_priv->vis_send_list, + typeof(*info), send_list); kref_get(&info->refcount); spin_unlock_bh(&bat_priv->vis_hash_lock); diff --git a/net/batman-adv/vis.h b/net/batman-adv/vis.h index 2c3b33089a9b..31b820d07f23 100644 --- a/net/batman-adv/vis.h +++ b/net/batman-adv/vis.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2008-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2008-2011 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 6b90a4191734..99cd8d9d891b 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -379,14 +379,10 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 hci_conn_hold(acl); if (acl->state == BT_OPEN || acl->state == BT_CLOSED) { - acl->sec_level = sec_level; + acl->sec_level = BT_SECURITY_LOW; + acl->pending_sec_level = sec_level; acl->auth_type = auth_type; hci_acl_connect(acl); - } else { - if (acl->sec_level < sec_level) - acl->sec_level = sec_level; - if (acl->auth_type < auth_type) - acl->auth_type = auth_type; } if (type == ACL_LINK) @@ -442,11 +438,17 @@ static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level, __u8 auth_type) { BT_DBG("conn %p", conn); + if (conn->pending_sec_level > sec_level) + sec_level = conn->pending_sec_level; + if (sec_level > conn->sec_level) - conn->sec_level = sec_level; + conn->pending_sec_level = sec_level; else if (conn->link_mode & HCI_LM_AUTH) return 1; + /* Make sure we preserve an existing MITM requirement*/ + auth_type |= (conn->auth_type & 0x01); + conn->auth_type = auth_type; if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->pend)) { diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 8b602d881fd7..9c4541bc488a 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1011,6 +1011,10 @@ int hci_unregister_dev(struct hci_dev *hdev) destroy_workqueue(hdev->workqueue); + hci_dev_lock_bh(hdev); + hci_blacklist_clear(hdev); + hci_dev_unlock_bh(hdev); + __hci_dev_put(hdev); return 0; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 38100170d380..a290854fdaa6 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -692,13 +692,13 @@ static int hci_outgoing_auth_needed(struct hci_dev *hdev, if (conn->state != BT_CONFIG || !conn->out) return 0; - if (conn->sec_level == BT_SECURITY_SDP) + if (conn->pending_sec_level == BT_SECURITY_SDP) return 0; /* Only request authentication for SSP connections or non-SSP * devices with sec_level HIGH */ if (!(hdev->ssp_mode > 0 && conn->ssp_mode > 0) && - conn->sec_level != BT_SECURITY_HIGH) + conn->pending_sec_level != BT_SECURITY_HIGH) return 0; return 1; @@ -1095,9 +1095,10 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); if (conn) { - if (!ev->status) + if (!ev->status) { conn->link_mode |= HCI_LM_AUTH; - else + conn->sec_level = conn->pending_sec_level; + } else conn->sec_level = BT_SECURITY_LOW; clear_bit(HCI_CONN_AUTH_PEND, &conn->pend); diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index c791fcda7b2d..7550abb0c96a 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -305,33 +305,44 @@ static void l2cap_chan_del(struct sock *sk, int err) } } -/* Service level security */ -static inline int l2cap_check_security(struct sock *sk) +static inline u8 l2cap_get_auth_type(struct sock *sk) { - struct l2cap_conn *conn = l2cap_pi(sk)->conn; - __u8 auth_type; + if (sk->sk_type == SOCK_RAW) { + switch (l2cap_pi(sk)->sec_level) { + case BT_SECURITY_HIGH: + return HCI_AT_DEDICATED_BONDING_MITM; + case BT_SECURITY_MEDIUM: + return HCI_AT_DEDICATED_BONDING; + default: + return HCI_AT_NO_BONDING; + } + } else if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) { + if (l2cap_pi(sk)->sec_level == BT_SECURITY_LOW) + l2cap_pi(sk)->sec_level = BT_SECURITY_SDP; - if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) { if (l2cap_pi(sk)->sec_level == BT_SECURITY_HIGH) - auth_type = HCI_AT_NO_BONDING_MITM; + return HCI_AT_NO_BONDING_MITM; else - auth_type = HCI_AT_NO_BONDING; - - if (l2cap_pi(sk)->sec_level == BT_SECURITY_LOW) - l2cap_pi(sk)->sec_level = BT_SECURITY_SDP; + return HCI_AT_NO_BONDING; } else { switch (l2cap_pi(sk)->sec_level) { case BT_SECURITY_HIGH: - auth_type = HCI_AT_GENERAL_BONDING_MITM; - break; + return HCI_AT_GENERAL_BONDING_MITM; case BT_SECURITY_MEDIUM: - auth_type = HCI_AT_GENERAL_BONDING; - break; + return HCI_AT_GENERAL_BONDING; default: - auth_type = HCI_AT_NO_BONDING; - break; + return HCI_AT_NO_BONDING; } } +} + +/* Service level security */ +static inline int l2cap_check_security(struct sock *sk) +{ + struct l2cap_conn *conn = l2cap_pi(sk)->conn; + __u8 auth_type; + + auth_type = l2cap_get_auth_type(sk); return hci_conn_security(conn->hcon, l2cap_pi(sk)->sec_level, auth_type); @@ -1068,39 +1079,7 @@ static int l2cap_do_connect(struct sock *sk) err = -ENOMEM; - if (sk->sk_type == SOCK_RAW) { - switch (l2cap_pi(sk)->sec_level) { - case BT_SECURITY_HIGH: - auth_type = HCI_AT_DEDICATED_BONDING_MITM; - break; - case BT_SECURITY_MEDIUM: - auth_type = HCI_AT_DEDICATED_BONDING; - break; - default: - auth_type = HCI_AT_NO_BONDING; - break; - } - } else if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) { - if (l2cap_pi(sk)->sec_level == BT_SECURITY_HIGH) - auth_type = HCI_AT_NO_BONDING_MITM; - else - auth_type = HCI_AT_NO_BONDING; - - if (l2cap_pi(sk)->sec_level == BT_SECURITY_LOW) - l2cap_pi(sk)->sec_level = BT_SECURITY_SDP; - } else { - switch (l2cap_pi(sk)->sec_level) { - case BT_SECURITY_HIGH: - auth_type = HCI_AT_GENERAL_BONDING_MITM; - break; - case BT_SECURITY_MEDIUM: - auth_type = HCI_AT_GENERAL_BONDING; - break; - default: - auth_type = HCI_AT_NO_BONDING; - break; - } - } + auth_type = l2cap_get_auth_type(sk); hcon = hci_connect(hdev, ACL_LINK, dst, l2cap_pi(sk)->sec_level, auth_type); @@ -1127,7 +1106,8 @@ static int l2cap_do_connect(struct sock *sk) if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM) { l2cap_sock_clear_timer(sk); - sk->sk_state = BT_CONNECTED; + if (l2cap_check_security(sk)) + sk->sk_state = BT_CONNECTED; } else l2cap_do_start(sk); } @@ -1893,8 +1873,8 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms if (pi->mode == L2CAP_MODE_STREAMING) { l2cap_streaming_send(sk); } else { - if (pi->conn_state & L2CAP_CONN_REMOTE_BUSY && - pi->conn_state && L2CAP_CONN_WAIT_F) { + if ((pi->conn_state & L2CAP_CONN_REMOTE_BUSY) && + (pi->conn_state & L2CAP_CONN_WAIT_F)) { err = len; break; } diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index ff8aaa736650..6b83776534fb 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -1164,7 +1164,8 @@ static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci) * initiator rfcomm_process_rx already calls * rfcomm_session_put() */ if (s->sock->sk->sk_state != BT_CLOSED) - rfcomm_session_put(s); + if (list_empty(&s->dlcs)) + rfcomm_session_put(s); break; } } diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 2872393b2939..88485cc74dc3 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -328,12 +328,12 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, fdb = kmem_cache_alloc(br_fdb_cache, GFP_ATOMIC); if (fdb) { memcpy(fdb->addr.addr, addr, ETH_ALEN); - hlist_add_head_rcu(&fdb->hlist, head); - fdb->dst = source; fdb->is_local = is_local; fdb->is_static = is_local; fdb->ageing_timer = jiffies; + + hlist_add_head_rcu(&fdb->hlist, head); } return fdb; } diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index d9d1e2bac1d6..2a6801d8b728 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -365,7 +365,7 @@ int br_min_mtu(const struct net_bridge *br) void br_features_recompute(struct net_bridge *br) { struct net_bridge_port *p; - unsigned long features, mask; + u32 features, mask; features = mask = br->feature_mask; if (list_empty(&br->port_list)) @@ -379,7 +379,7 @@ void br_features_recompute(struct net_bridge *br) } done: - br->dev->features = netdev_fix_features(features, NULL); + br->dev->features = netdev_fix_features(br->dev, features); } /* called with RTNL */ diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 84aac7734bfc..9f22898c5359 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -182,7 +182,7 @@ struct net_bridge struct br_cpu_netstats __percpu *stats; spinlock_t hash_lock; struct hlist_head hash[BR_HASH_SIZE]; - unsigned long feature_mask; + u32 feature_mask; #ifdef CONFIG_BRIDGE_NETFILTER struct rtable fake_rtable; bool nf_call_iptables; diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c index fa9dab372b68..6008d6dc18a0 100644 --- a/net/caif/chnl_net.c +++ b/net/caif/chnl_net.c @@ -394,9 +394,7 @@ static void ipcaif_net_setup(struct net_device *dev) priv->conn_req.sockaddr.u.dgm.connection_id = -1; priv->flowenabled = false; - ASSERT_RTNL(); init_waitqueue_head(&priv->netmgmt_wq); - list_add(&priv->list_field, &chnl_net_list); } @@ -453,6 +451,8 @@ static int ipcaif_newlink(struct net *src_net, struct net_device *dev, ret = register_netdevice(dev); if (ret) pr_warn("device rtml registration failed\n"); + else + list_add(&caifdev->list_field, &chnl_net_list); return ret; } diff --git a/net/core/dev.c b/net/core/dev.c index 8b1d886ed23b..6392ea0a5910 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -132,6 +132,7 @@ #include <trace/events/skb.h> #include <linux/pci.h> #include <linux/inetdevice.h> +#include <linux/cpu_rmap.h> #include "net-sysfs.h" @@ -749,7 +750,8 @@ EXPORT_SYMBOL(dev_get_by_index); * @ha: hardware address * * Search for an interface by MAC address. Returns NULL if the device - * is not found or a pointer to the device. The caller must hold RCU + * is not found or a pointer to the device. + * The caller must hold RCU or RTNL. * The returned device has not had its ref count increased * and the caller must therefore be careful about locking * @@ -1285,7 +1287,7 @@ static int __dev_close(struct net_device *dev) return __dev_close_many(&single); } -int dev_close_many(struct list_head *head) +static int dev_close_many(struct list_head *head) { struct net_device *dev, *tmp; LIST_HEAD(tmp_list); @@ -1605,7 +1607,7 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) * expected that drivers will fix this mapping if they can before * calling netif_set_real_num_tx_queues. */ -void netif_setup_tc(struct net_device *dev, unsigned int txq) +static void netif_setup_tc(struct net_device *dev, unsigned int txq) { int i; struct netdev_tc_txq *tc = &dev->tc_to_txq[0]; @@ -1856,7 +1858,7 @@ EXPORT_SYMBOL(skb_checksum_help); * It may return NULL if the skb requires no segmentation. This is * only possible when GSO is used for verifying header integrity. */ -struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) +struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features) { struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); struct packet_type *ptype; @@ -2044,9 +2046,9 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol) protocol == htons(ETH_P_FCOE))); } -static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features) +static u32 harmonize_features(struct sk_buff *skb, __be16 protocol, u32 features) { - if (!can_checksum_protocol(protocol, features)) { + if (!can_checksum_protocol(features, protocol)) { features &= ~NETIF_F_ALL_CSUM; features &= ~NETIF_F_SG; } else if (illegal_highdma(skb->dev, skb)) { @@ -2056,10 +2058,10 @@ static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features return features; } -int netif_skb_features(struct sk_buff *skb) +u32 netif_skb_features(struct sk_buff *skb) { __be16 protocol = skb->protocol; - int features = skb->dev->features; + u32 features = skb->dev->features; if (protocol == htons(ETH_P_8021Q)) { struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; @@ -2104,7 +2106,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, int rc = NETDEV_TX_OK; if (likely(!skb->next)) { - int features; + u32 features; /* * If device doesnt need skb->dst, release it right now while @@ -2325,15 +2327,18 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, struct netdev_queue *txq) { spinlock_t *root_lock = qdisc_lock(q); - bool contended = qdisc_is_running(q); + bool contended; int rc; + qdisc_skb_cb(skb)->pkt_len = skb->len; + qdisc_calculate_pkt_len(skb, q); /* * Heuristic to force contended enqueues to serialize on a * separate lock before trying to get qdisc main lock. * This permits __QDISC_STATE_RUNNING owner to get the lock more often * and dequeue packets faster. */ + contended = qdisc_is_running(q); if (unlikely(contended)) spin_lock(&q->busylock); @@ -2351,7 +2356,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) skb_dst_force(skb); - qdisc_skb_cb(skb)->pkt_len = skb->len; qdisc_bstats_update(q, skb); if (sch_direct_xmit(skb, q, dev, txq, root_lock)) { @@ -2366,7 +2370,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, rc = NET_XMIT_SUCCESS; } else { skb_dst_force(skb); - rc = qdisc_enqueue_root(skb, q); + rc = q->enqueue(skb, q) & NET_XMIT_MASK; if (qdisc_run_begin(q)) { if (unlikely(contended)) { spin_unlock(&q->busylock); @@ -2585,6 +2589,53 @@ EXPORT_SYMBOL(__skb_get_rxhash); struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly; EXPORT_SYMBOL(rps_sock_flow_table); +static struct rps_dev_flow * +set_rps_cpu(struct net_device *dev, struct sk_buff *skb, + struct rps_dev_flow *rflow, u16 next_cpu) +{ + u16 tcpu; + + tcpu = rflow->cpu = next_cpu; + if (tcpu != RPS_NO_CPU) { +#ifdef CONFIG_RFS_ACCEL + struct netdev_rx_queue *rxqueue; + struct rps_dev_flow_table *flow_table; + struct rps_dev_flow *old_rflow; + u32 flow_id; + u16 rxq_index; + int rc; + + /* Should we steer this flow to a different hardware queue? */ + if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap) + goto out; + rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu); + if (rxq_index == skb_get_rx_queue(skb)) + goto out; + + rxqueue = dev->_rx + rxq_index; + flow_table = rcu_dereference(rxqueue->rps_flow_table); + if (!flow_table) + goto out; + flow_id = skb->rxhash & flow_table->mask; + rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb, + rxq_index, flow_id); + if (rc < 0) + goto out; + old_rflow = rflow; + rflow = &flow_table->flows[flow_id]; + rflow->cpu = next_cpu; + rflow->filter = rc; + if (old_rflow->filter == rflow->filter) + old_rflow->filter = RPS_NO_FILTER; + out: +#endif + rflow->last_qtail = + per_cpu(softnet_data, tcpu).input_queue_head; + } + + return rflow; +} + /* * get_rps_cpu is called from netif_receive_skb and returns the target * CPU from the RPS map of the receiving queue for a given skb. @@ -2615,7 +2666,8 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, map = rcu_dereference(rxqueue->rps_map); if (map) { - if (map->len == 1) { + if (map->len == 1 && + !rcu_dereference_raw(rxqueue->rps_flow_table)) { tcpu = map->cpus[0]; if (cpu_online(tcpu)) cpu = tcpu; @@ -2655,12 +2707,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, if (unlikely(tcpu != next_cpu) && (tcpu == RPS_NO_CPU || !cpu_online(tcpu) || ((int)(per_cpu(softnet_data, tcpu).input_queue_head - - rflow->last_qtail)) >= 0)) { - tcpu = rflow->cpu = next_cpu; - if (tcpu != RPS_NO_CPU) - rflow->last_qtail = per_cpu(softnet_data, - tcpu).input_queue_head; - } + rflow->last_qtail)) >= 0)) + rflow = set_rps_cpu(dev, skb, rflow, next_cpu); + if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) { *rflowp = rflow; cpu = tcpu; @@ -2681,6 +2730,46 @@ done: return cpu; } +#ifdef CONFIG_RFS_ACCEL + +/** + * rps_may_expire_flow - check whether an RFS hardware filter may be removed + * @dev: Device on which the filter was set + * @rxq_index: RX queue index + * @flow_id: Flow ID passed to ndo_rx_flow_steer() + * @filter_id: Filter ID returned by ndo_rx_flow_steer() + * + * Drivers that implement ndo_rx_flow_steer() should periodically call + * this function for each installed filter and remove the filters for + * which it returns %true. + */ +bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, + u32 flow_id, u16 filter_id) +{ + struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index; + struct rps_dev_flow_table *flow_table; + struct rps_dev_flow *rflow; + bool expire = true; + int cpu; + + rcu_read_lock(); + flow_table = rcu_dereference(rxqueue->rps_flow_table); + if (flow_table && flow_id <= flow_table->mask) { + rflow = &flow_table->flows[flow_id]; + cpu = ACCESS_ONCE(rflow->cpu); + if (rflow->filter == filter_id && cpu != RPS_NO_CPU && + ((int)(per_cpu(softnet_data, cpu).input_queue_head - + rflow->last_qtail) < + (int)(10 * flow_table->mask))) + expire = false; + } + rcu_read_unlock(); + return expire; +} +EXPORT_SYMBOL(rps_may_expire_flow); + +#endif /* CONFIG_RFS_ACCEL */ + /* Called from hardirq (IPI) context */ static void rps_trigger_softirq(void *data) { @@ -3476,6 +3565,8 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) __skb_pull(skb, skb_headlen(skb)); skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb)); skb->vlan_tci = 0; + skb->dev = napi->dev; + skb->skb_iif = 0; napi->skb = skb; } @@ -3963,12 +4054,15 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos) void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct net_device *dev = (v == SEQ_START_TOKEN) ? - first_net_device(seq_file_net(seq)) : - next_net_device((struct net_device *)v); + struct net_device *dev = v; + + if (v == SEQ_START_TOKEN) + dev = first_net_device_rcu(seq_file_net(seq)); + else + dev = next_net_device_rcu(dev); ++*pos; - return rcu_dereference(dev); + return dev; } void dev_seq_stop(struct seq_file *seq, void *v) @@ -5125,41 +5219,49 @@ static void rollback_registered(struct net_device *dev) rollback_registered_many(&single); } -unsigned long netdev_fix_features(unsigned long features, const char *name) +u32 netdev_fix_features(struct net_device *dev, u32 features) { + /* Fix illegal checksum combinations */ + if ((features & NETIF_F_HW_CSUM) && + (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { + netdev_info(dev, "mixed HW and IP checksum settings.\n"); + features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); + } + + if ((features & NETIF_F_NO_CSUM) && + (features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { + netdev_info(dev, "mixed no checksumming and other settings.\n"); + features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); + } + /* Fix illegal SG+CSUM combinations. */ if ((features & NETIF_F_SG) && !(features & NETIF_F_ALL_CSUM)) { - if (name) - printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no " - "checksum feature.\n", name); + netdev_info(dev, + "Dropping NETIF_F_SG since no checksum feature.\n"); features &= ~NETIF_F_SG; } /* TSO requires that SG is present as well. */ if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) { - if (name) - printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no " - "SG feature.\n", name); + netdev_info(dev, "Dropping NETIF_F_TSO since no SG feature.\n"); features &= ~NETIF_F_TSO; } + /* UFO needs SG and checksumming */ if (features & NETIF_F_UFO) { /* maybe split UFO into V4 and V6? */ if (!((features & NETIF_F_GEN_CSUM) || (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { - if (name) - printk(KERN_ERR "%s: Dropping NETIF_F_UFO " - "since no checksum offload features.\n", - name); + netdev_info(dev, + "Dropping NETIF_F_UFO since no checksum offload features.\n"); features &= ~NETIF_F_UFO; } if (!(features & NETIF_F_SG)) { - if (name) - printk(KERN_ERR "%s: Dropping NETIF_F_UFO " - "since no NETIF_F_SG feature.\n", name); + netdev_info(dev, + "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n"); features &= ~NETIF_F_UFO; } } @@ -5302,22 +5404,7 @@ int register_netdevice(struct net_device *dev) if (dev->iflink == -1) dev->iflink = dev->ifindex; - /* Fix illegal checksum combinations */ - if ((dev->features & NETIF_F_HW_CSUM) && - (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { - printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n", - dev->name); - dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); - } - - if ((dev->features & NETIF_F_NO_CSUM) && - (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { - printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n", - dev->name); - dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); - } - - dev->features = netdev_fix_features(dev->features, dev->name); + dev->features = netdev_fix_features(dev, dev->features); /* Enable software GSO if SG is supported. */ if (dev->features & NETIF_F_SG) @@ -5720,31 +5807,36 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev_net_set(dev, &init_net); + dev->gso_max_size = GSO_MAX_SIZE; + + INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list); + dev->ethtool_ntuple_list.count = 0; + INIT_LIST_HEAD(&dev->napi_list); + INIT_LIST_HEAD(&dev->unreg_list); + INIT_LIST_HEAD(&dev->link_watch_list); + dev->priv_flags = IFF_XMIT_DST_RELEASE; + setup(dev); + dev->num_tx_queues = txqs; dev->real_num_tx_queues = txqs; if (netif_alloc_netdev_queues(dev)) - goto free_pcpu; + goto free_all; #ifdef CONFIG_RPS dev->num_rx_queues = rxqs; dev->real_num_rx_queues = rxqs; if (netif_alloc_rx_queues(dev)) - goto free_pcpu; + goto free_all; #endif - dev->gso_max_size = GSO_MAX_SIZE; - - INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list); - dev->ethtool_ntuple_list.count = 0; - INIT_LIST_HEAD(&dev->napi_list); - INIT_LIST_HEAD(&dev->unreg_list); - INIT_LIST_HEAD(&dev->link_watch_list); - dev->priv_flags = IFF_XMIT_DST_RELEASE; - setup(dev); strcpy(dev->name, name); dev->group = INIT_NETDEV_GROUP; return dev; +free_all: + free_netdev(dev); + return NULL; + free_pcpu: free_percpu(dev->pcpu_refcnt); kfree(dev->_tx); @@ -6053,8 +6145,7 @@ static int dev_cpu_callback(struct notifier_block *nfb, * @one to the master device with current feature set @all. Will not * enable anything that is off in @mask. Returns the new feature set. */ -unsigned long netdev_increment_features(unsigned long all, unsigned long one, - unsigned long mask) +u32 netdev_increment_features(u32 all, u32 one, u32 mask) { /* If device needs checksumming, downgrade to it. */ if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM)) diff --git a/net/core/dst.c b/net/core/dst.c index b99c7c7ffce2..c1674fde827d 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -164,6 +164,8 @@ int dst_discard(struct sk_buff *skb) } EXPORT_SYMBOL(dst_discard); +const u32 dst_default_metrics[RTAX_MAX]; + void *dst_alloc(struct dst_ops *ops) { struct dst_entry *dst; @@ -180,6 +182,7 @@ void *dst_alloc(struct dst_ops *ops) dst->lastuse = jiffies; dst->path = dst; dst->input = dst->output = dst_discard; + dst_init_metrics(dst, dst_default_metrics, true); #if RT_CACHE_DEBUG >= 2 atomic_inc(&dst_total); #endif @@ -282,6 +285,42 @@ void dst_release(struct dst_entry *dst) } EXPORT_SYMBOL(dst_release); +u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old) +{ + u32 *p = kmalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC); + + if (p) { + u32 *old_p = __DST_METRICS_PTR(old); + unsigned long prev, new; + + memcpy(p, old_p, sizeof(u32) * RTAX_MAX); + + new = (unsigned long) p; + prev = cmpxchg(&dst->_metrics, old, new); + + if (prev != old) { + kfree(p); + p = __DST_METRICS_PTR(prev); + if (prev & DST_METRICS_READ_ONLY) + p = NULL; + } + } + return p; +} +EXPORT_SYMBOL(dst_cow_metrics_generic); + +/* Caller asserts that dst_metrics_read_only(dst) is false. */ +void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old) +{ + unsigned long prev, new; + + new = (unsigned long) dst_default_metrics; + prev = cmpxchg(&dst->_metrics, old, new); + if (prev == old) + kfree(__DST_METRICS_PTR(old)); +} +EXPORT_SYMBOL(__dst_destroy_metrics_generic); + /** * skb_dst_set_noref - sets skb dst, without a reference * @skb: buffer diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 17741782a345..5984ee0c7136 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -817,7 +817,7 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) if (regs.len > reglen) regs.len = reglen; - regbuf = vmalloc(reglen); + regbuf = vzalloc(reglen); if (!regbuf) return -ENOMEM; @@ -1458,7 +1458,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) void __user *useraddr = ifr->ifr_data; u32 ethcmd; int rc; - unsigned long old_features; + u32 old_features; if (!dev || !netif_device_present(dev)) return -ENODEV; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 60a902913429..799f06e03a22 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -316,7 +316,7 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries) { size_t size = entries * sizeof(struct neighbour *); struct neigh_hash_table *ret; - struct neighbour **buckets; + struct neighbour __rcu **buckets; ret = kmalloc(sizeof(*ret), GFP_ATOMIC); if (!ret) @@ -324,14 +324,14 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries) if (size <= PAGE_SIZE) buckets = kzalloc(size, GFP_ATOMIC); else - buckets = (struct neighbour **) + buckets = (struct neighbour __rcu **) __get_free_pages(GFP_ATOMIC | __GFP_ZERO, get_order(size)); if (!buckets) { kfree(ret); return NULL; } - rcu_assign_pointer(ret->hash_buckets, buckets); + ret->hash_buckets = buckets; ret->hash_mask = entries - 1; get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd)); return ret; @@ -343,7 +343,7 @@ static void neigh_hash_free_rcu(struct rcu_head *head) struct neigh_hash_table, rcu); size_t size = (nht->hash_mask + 1) * sizeof(struct neighbour *); - struct neighbour **buckets = nht->hash_buckets; + struct neighbour __rcu **buckets = nht->hash_buckets; if (size <= PAGE_SIZE) kfree(buckets); @@ -1540,7 +1540,7 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl) panic("cannot create neighbour proc dir entry"); #endif - tbl->nht = neigh_hash_alloc(8); + RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(8)); phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *); tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL); @@ -1602,7 +1602,8 @@ int neigh_table_clear(struct neigh_table *tbl) } write_unlock(&neigh_tbl_lock); - call_rcu(&tbl->nht->rcu, neigh_hash_free_rcu); + call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu, + neigh_hash_free_rcu); tbl->nht = NULL; kfree(tbl->phash_buckets); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index e23c01be5a5b..2e4a393dfc3b 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -99,7 +99,7 @@ NETDEVICE_SHOW(addr_assign_type, fmt_dec); NETDEVICE_SHOW(addr_len, fmt_dec); NETDEVICE_SHOW(iflink, fmt_dec); NETDEVICE_SHOW(ifindex, fmt_dec); -NETDEVICE_SHOW(features, fmt_long_hex); +NETDEVICE_SHOW(features, fmt_hex); NETDEVICE_SHOW(type, fmt_dec); NETDEVICE_SHOW(link_mode, fmt_dec); @@ -295,6 +295,20 @@ static ssize_t show_ifalias(struct device *dev, return ret; } +NETDEVICE_SHOW(group, fmt_dec); + +static int change_group(struct net_device *net, unsigned long new_group) +{ + dev_set_group(net, (int) new_group); + return 0; +} + +static ssize_t store_group(struct device *dev, struct device_attribute *attr, + const char *buf, size_t len) +{ + return netdev_store(dev, attr, buf, len, change_group); +} + static struct device_attribute net_class_attributes[] = { __ATTR(addr_assign_type, S_IRUGO, show_addr_assign_type, NULL), __ATTR(addr_len, S_IRUGO, show_addr_len, NULL), @@ -316,6 +330,7 @@ static struct device_attribute net_class_attributes[] = { __ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags), __ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len, store_tx_queue_len), + __ATTR(group, S_IRUGO | S_IWUSR, show_group, store_group), {} }; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index a9e7fc4c461f..d73b77adb676 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -251,6 +251,7 @@ struct pktgen_dev { int max_pkt_size; /* = ETH_ZLEN; */ int pkt_overhead; /* overhead for MPLS, VLANs, IPSEC etc */ int nfrags; + struct page *page; u64 delay; /* nano-seconds */ __u64 count; /* Default No packets to send */ @@ -1134,6 +1135,10 @@ static ssize_t pktgen_if_write(struct file *file, if (node_possible(value)) { pkt_dev->node = value; sprintf(pg_result, "OK: node=%d", pkt_dev->node); + if (pkt_dev->page) { + put_page(pkt_dev->page); + pkt_dev->page = NULL; + } } else sprintf(pg_result, "ERROR: node not possible"); @@ -2605,6 +2610,90 @@ static inline __be16 build_tci(unsigned int id, unsigned int cfi, return htons(id | (cfi << 12) | (prio << 13)); } +static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, + int datalen) +{ + struct timeval timestamp; + struct pktgen_hdr *pgh; + + pgh = (struct pktgen_hdr *)skb_put(skb, sizeof(*pgh)); + datalen -= sizeof(*pgh); + + if (pkt_dev->nfrags <= 0) { + pgh = (struct pktgen_hdr *)skb_put(skb, datalen); + memset(pgh + 1, 0, datalen); + } else { + int frags = pkt_dev->nfrags; + int i, len; + + + if (frags > MAX_SKB_FRAGS) + frags = MAX_SKB_FRAGS; + len = datalen - frags * PAGE_SIZE; + if (len > 0) { + memset(skb_put(skb, len), 0, len); + datalen = frags * PAGE_SIZE; + } + + i = 0; + while (datalen > 0) { + if (unlikely(!pkt_dev->page)) { + int node = numa_node_id(); + + if (pkt_dev->node >= 0 && (pkt_dev->flags & F_NODE)) + node = pkt_dev->node; + pkt_dev->page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0); + if (!pkt_dev->page) + break; + } + skb_shinfo(skb)->frags[i].page = pkt_dev->page; + get_page(pkt_dev->page); + skb_shinfo(skb)->frags[i].page_offset = 0; + skb_shinfo(skb)->frags[i].size = + (datalen < PAGE_SIZE ? datalen : PAGE_SIZE); + datalen -= skb_shinfo(skb)->frags[i].size; + skb->len += skb_shinfo(skb)->frags[i].size; + skb->data_len += skb_shinfo(skb)->frags[i].size; + i++; + skb_shinfo(skb)->nr_frags = i; + } + + while (i < frags) { + int rem; + + if (i == 0) + break; + + rem = skb_shinfo(skb)->frags[i - 1].size / 2; + if (rem == 0) + break; + + skb_shinfo(skb)->frags[i - 1].size -= rem; + + skb_shinfo(skb)->frags[i] = + skb_shinfo(skb)->frags[i - 1]; + get_page(skb_shinfo(skb)->frags[i].page); + skb_shinfo(skb)->frags[i].page = + skb_shinfo(skb)->frags[i - 1].page; + skb_shinfo(skb)->frags[i].page_offset += + skb_shinfo(skb)->frags[i - 1].size; + skb_shinfo(skb)->frags[i].size = rem; + i++; + skb_shinfo(skb)->nr_frags = i; + } + } + + /* Stamp the time, and sequence number, + * convert them to network byte order + */ + pgh->pgh_magic = htonl(PKTGEN_MAGIC); + pgh->seq_num = htonl(pkt_dev->seq_num); + + do_gettimeofday(×tamp); + pgh->tv_sec = htonl(timestamp.tv_sec); + pgh->tv_usec = htonl(timestamp.tv_usec); +} + static struct sk_buff *fill_packet_ipv4(struct net_device *odev, struct pktgen_dev *pkt_dev) { @@ -2613,7 +2702,6 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, struct udphdr *udph; int datalen, iplen; struct iphdr *iph; - struct pktgen_hdr *pgh = NULL; __be16 protocol = htons(ETH_P_IP); __be32 *mpls; __be16 *vlan_tci = NULL; /* Encapsulates priority and VLAN ID */ @@ -2729,76 +2817,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, pkt_dev->pkt_overhead); skb->dev = odev; skb->pkt_type = PACKET_HOST; - - if (pkt_dev->nfrags <= 0) { - pgh = (struct pktgen_hdr *)skb_put(skb, datalen); - memset(pgh + 1, 0, datalen - sizeof(struct pktgen_hdr)); - } else { - int frags = pkt_dev->nfrags; - int i, len; - - pgh = (struct pktgen_hdr *)(((char *)(udph)) + 8); - - if (frags > MAX_SKB_FRAGS) - frags = MAX_SKB_FRAGS; - if (datalen > frags * PAGE_SIZE) { - len = datalen - frags * PAGE_SIZE; - memset(skb_put(skb, len), 0, len); - datalen = frags * PAGE_SIZE; - } - - i = 0; - while (datalen > 0) { - struct page *page = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0); - skb_shinfo(skb)->frags[i].page = page; - skb_shinfo(skb)->frags[i].page_offset = 0; - skb_shinfo(skb)->frags[i].size = - (datalen < PAGE_SIZE ? datalen : PAGE_SIZE); - datalen -= skb_shinfo(skb)->frags[i].size; - skb->len += skb_shinfo(skb)->frags[i].size; - skb->data_len += skb_shinfo(skb)->frags[i].size; - i++; - skb_shinfo(skb)->nr_frags = i; - } - - while (i < frags) { - int rem; - - if (i == 0) - break; - - rem = skb_shinfo(skb)->frags[i - 1].size / 2; - if (rem == 0) - break; - - skb_shinfo(skb)->frags[i - 1].size -= rem; - - skb_shinfo(skb)->frags[i] = - skb_shinfo(skb)->frags[i - 1]; - get_page(skb_shinfo(skb)->frags[i].page); - skb_shinfo(skb)->frags[i].page = - skb_shinfo(skb)->frags[i - 1].page; - skb_shinfo(skb)->frags[i].page_offset += - skb_shinfo(skb)->frags[i - 1].size; - skb_shinfo(skb)->frags[i].size = rem; - i++; - skb_shinfo(skb)->nr_frags = i; - } - } - - /* Stamp the time, and sequence number, - * convert them to network byte order - */ - if (pgh) { - struct timeval timestamp; - - pgh->pgh_magic = htonl(PKTGEN_MAGIC); - pgh->seq_num = htonl(pkt_dev->seq_num); - - do_gettimeofday(×tamp); - pgh->tv_sec = htonl(timestamp.tv_sec); - pgh->tv_usec = htonl(timestamp.tv_usec); - } + pktgen_finalize_skb(pkt_dev, skb, datalen); #ifdef CONFIG_XFRM if (!process_ipsec(pkt_dev, skb, protocol)) @@ -2980,7 +2999,6 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, struct udphdr *udph; int datalen; struct ipv6hdr *iph; - struct pktgen_hdr *pgh = NULL; __be16 protocol = htons(ETH_P_IPV6); __be32 *mpls; __be16 *vlan_tci = NULL; /* Encapsulates priority and VLAN ID */ @@ -3083,75 +3101,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, skb->dev = odev; skb->pkt_type = PACKET_HOST; - if (pkt_dev->nfrags <= 0) - pgh = (struct pktgen_hdr *)skb_put(skb, datalen); - else { - int frags = pkt_dev->nfrags; - int i; - - pgh = (struct pktgen_hdr *)(((char *)(udph)) + 8); - - if (frags > MAX_SKB_FRAGS) - frags = MAX_SKB_FRAGS; - if (datalen > frags * PAGE_SIZE) { - skb_put(skb, datalen - frags * PAGE_SIZE); - datalen = frags * PAGE_SIZE; - } - - i = 0; - while (datalen > 0) { - struct page *page = alloc_pages(GFP_KERNEL, 0); - skb_shinfo(skb)->frags[i].page = page; - skb_shinfo(skb)->frags[i].page_offset = 0; - skb_shinfo(skb)->frags[i].size = - (datalen < PAGE_SIZE ? datalen : PAGE_SIZE); - datalen -= skb_shinfo(skb)->frags[i].size; - skb->len += skb_shinfo(skb)->frags[i].size; - skb->data_len += skb_shinfo(skb)->frags[i].size; - i++; - skb_shinfo(skb)->nr_frags = i; - } - - while (i < frags) { - int rem; - - if (i == 0) - break; - - rem = skb_shinfo(skb)->frags[i - 1].size / 2; - if (rem == 0) - break; - - skb_shinfo(skb)->frags[i - 1].size -= rem; - - skb_shinfo(skb)->frags[i] = - skb_shinfo(skb)->frags[i - 1]; - get_page(skb_shinfo(skb)->frags[i].page); - skb_shinfo(skb)->frags[i].page = - skb_shinfo(skb)->frags[i - 1].page; - skb_shinfo(skb)->frags[i].page_offset += - skb_shinfo(skb)->frags[i - 1].size; - skb_shinfo(skb)->frags[i].size = rem; - i++; - skb_shinfo(skb)->nr_frags = i; - } - } - - /* Stamp the time, and sequence number, - * convert them to network byte order - * should we update cloned packets too ? - */ - if (pgh) { - struct timeval timestamp; - - pgh->pgh_magic = htonl(PKTGEN_MAGIC); - pgh->seq_num = htonl(pkt_dev->seq_num); - - do_gettimeofday(×tamp); - pgh->tv_sec = htonl(timestamp.tv_sec); - pgh->tv_usec = htonl(timestamp.tv_usec); - } - /* pkt_dev->seq_num++; FF: you really mean this? */ + pktgen_finalize_skb(pkt_dev, skb, datalen); return skb; } @@ -3884,6 +3834,8 @@ static int pktgen_remove_device(struct pktgen_thread *t, free_SAs(pkt_dev); #endif vfree(pkt_dev->flows); + if (pkt_dev->page) + put_page(pkt_dev->page); kfree(pkt_dev); return 0; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a0b2eeb3b610..da0fe457c858 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1122,8 +1122,7 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) return -EOPNOTSUPP; if (af_ops->validate_link_af) { - err = af_ops->validate_link_af(dev, - tb[IFLA_AF_SPEC]); + err = af_ops->validate_link_af(dev, af); if (err < 0) return err; } @@ -1548,6 +1547,8 @@ struct net_device *rtnl_create_link(struct net *src_net, struct net *net, set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); if (tb[IFLA_LINKMODE]) dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); + if (tb[IFLA_GROUP]) + dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP])); return dev; @@ -1606,10 +1607,6 @@ replay: else { if (ifname[0]) dev = __dev_get_by_name(net, ifname); - else if (tb[IFLA_GROUP]) - return rtnl_group_changelink(net, - nla_get_u32(tb[IFLA_GROUP]), - ifm, tb); else dev = NULL; } @@ -1676,8 +1673,13 @@ replay: return do_setlink(dev, ifm, tb, ifname, modified); } - if (!(nlh->nlmsg_flags & NLM_F_CREATE)) + if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { + if (ifm->ifi_index == 0 && tb[IFLA_GROUP]) + return rtnl_group_changelink(net, + nla_get_u32(tb[IFLA_GROUP]), + ifm, tb); return -ENODEV; + } if (ifm->ifi_index) return -EOPNOTSUPP; @@ -1702,6 +1704,9 @@ replay: snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind); dest_net = rtnl_link_get_net(net, tb); + if (IS_ERR(dest_net)) + return PTR_ERR(dest_net); + dev = rtnl_create_link(net, dest_net, ifname, ops, tb); if (IS_ERR(dev)) @@ -1850,7 +1855,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (kind != 2 && security_netlink_recv(skb, CAP_NET_ADMIN)) return -EPERM; - if (kind == 2 && (nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) { + if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { struct sock *rtnl; rtnl_dumpit_func dumpit; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d31bb36ae0dc..14cf560b4a3e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -210,6 +210,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, shinfo = skb_shinfo(skb); memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); atomic_set(&shinfo->dataref, 1); + kmemcheck_annotate_variable(shinfo->destructor_arg); if (fclone) { struct sk_buff *child = skb + 1; @@ -2497,7 +2498,7 @@ EXPORT_SYMBOL_GPL(skb_pull_rcsum); * a pointer to the first in a list of new skbs for the segments. * In case of error it returns ERR_PTR(err). */ -struct sk_buff *skb_segment(struct sk_buff *skb, int features) +struct sk_buff *skb_segment(struct sk_buff *skb, u32 features) { struct sk_buff *segs = NULL; struct sk_buff *tail = NULL; @@ -2507,7 +2508,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) unsigned int offset = doffset; unsigned int headroom; unsigned int len; - int sg = features & NETIF_F_SG; + int sg = !!(features & NETIF_F_SG); int nfrags = skb_shinfo(skb)->nr_frags; int err = -ENOMEM; int i = 0; @@ -2744,8 +2745,12 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) merge: if (offset > headlen) { - skbinfo->frags[0].page_offset += offset - headlen; - skbinfo->frags[0].size -= offset - headlen; + unsigned int eat = offset - headlen; + + skbinfo->frags[0].page_offset += eat; + skbinfo->frags[0].size -= eat; + skb->data_len -= eat; + skb->len -= eat; offset = headlen; } diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index d900ab99814a..6b03f561caec 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -583,7 +583,7 @@ static int dcbnl_getapp(struct net_device *netdev, struct nlattr **tb, u8 up, idtype; int ret = -EINVAL; - if (!tb[DCB_ATTR_APP] || !netdev->dcbnl_ops->getapp) + if (!tb[DCB_ATTR_APP]) goto out; ret = nla_parse_nested(app_tb, DCB_APP_ATTR_MAX, tb[DCB_ATTR_APP], @@ -604,7 +604,16 @@ static int dcbnl_getapp(struct net_device *netdev, struct nlattr **tb, goto out; id = nla_get_u16(app_tb[DCB_APP_ATTR_ID]); - up = netdev->dcbnl_ops->getapp(netdev, idtype, id); + + if (netdev->dcbnl_ops->getapp) { + up = netdev->dcbnl_ops->getapp(netdev, idtype, id); + } else { + struct dcb_app app = { + .selector = idtype, + .protocol = id, + }; + up = dcb_getapp(netdev, &app); + } /* send this back */ dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index e96d5e810039..fadecd20d75b 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -583,6 +583,15 @@ done: dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks); } +/* + * Convert RFC 3390 larger initial window into an equivalent number of packets. + * This is based on the numbers specified in RFC 5681, 3.1. + */ +static inline u32 rfc3390_bytes_to_packets(const u32 smss) +{ + return smss <= 1095 ? 4 : (smss > 2190 ? 2 : 3); +} + static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) { struct ccid2_hc_tx_sock *hc = ccid_priv(ccid); diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 5e636365d33c..42c9c62d3417 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -112,6 +112,7 @@ static int dn_dst_gc(struct dst_ops *ops); static struct dst_entry *dn_dst_check(struct dst_entry *, __u32); static unsigned int dn_dst_default_advmss(const struct dst_entry *dst); static unsigned int dn_dst_default_mtu(const struct dst_entry *dst); +static void dn_dst_destroy(struct dst_entry *); static struct dst_entry *dn_dst_negative_advice(struct dst_entry *); static void dn_dst_link_failure(struct sk_buff *); static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu); @@ -133,11 +134,18 @@ static struct dst_ops dn_dst_ops = { .check = dn_dst_check, .default_advmss = dn_dst_default_advmss, .default_mtu = dn_dst_default_mtu, + .cow_metrics = dst_cow_metrics_generic, + .destroy = dn_dst_destroy, .negative_advice = dn_dst_negative_advice, .link_failure = dn_dst_link_failure, .update_pmtu = dn_dst_update_pmtu, }; +static void dn_dst_destroy(struct dst_entry *dst) +{ + dst_destroy_metrics_generic(dst); +} + static __inline__ unsigned dn_hash(__le16 src, __le16 dst) { __u16 tmp = (__u16 __force)(src ^ dst); @@ -814,14 +822,14 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) { struct dn_fib_info *fi = res->fi; struct net_device *dev = rt->dst.dev; + unsigned int mss_metric; struct neighbour *n; - unsigned int metric; if (fi) { if (DN_FIB_RES_GW(*res) && DN_FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) rt->rt_gateway = DN_FIB_RES_GW(*res); - dst_import_metrics(&rt->dst, fi->fib_metrics); + dst_init_metrics(&rt->dst, fi->fib_metrics, true); } rt->rt_type = res->type; @@ -834,10 +842,10 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) if (dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu) dst_metric_set(&rt->dst, RTAX_MTU, rt->dst.dev->mtu); - metric = dst_metric_raw(&rt->dst, RTAX_ADVMSS); - if (metric) { + mss_metric = dst_metric_raw(&rt->dst, RTAX_ADVMSS); + if (mss_metric) { unsigned int mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->dst)); - if (metric > mss) + if (mss_metric > mss) dst_metric_set(&rt->dst, RTAX_ADVMSS, mss); } return 0; diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 0c877a74e1f4..3fb14b7c13cf 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -428,7 +428,7 @@ static void __exit dsa_cleanup_module(void) } module_exit(dsa_cleanup_module); -MODULE_AUTHOR("Lennert Buytenhek <buytenh@wantstofly.org>") +MODULE_AUTHOR("Lennert Buytenhek <buytenh@wantstofly.org>"); MODULE_DESCRIPTION("Driver for Distributed Switch Architecture switch chips"); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:dsa"); diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index 15dcc1a586b4..0c2826337919 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -265,13 +265,13 @@ static void ec_tx_done(struct sk_buff *skb, int result) static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len) { - struct sock *sk = sock->sk; struct sockaddr_ec *saddr=(struct sockaddr_ec *)msg->msg_name; struct net_device *dev; struct ec_addr addr; int err; unsigned char port, cb; #if defined(CONFIG_ECONET_AUNUDP) || defined(CONFIG_ECONET_NATIVE) + struct sock *sk = sock->sk; struct sk_buff *skb; struct ec_cb *eb; #endif @@ -488,10 +488,10 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, error_free_buf: vfree(userbuf); +error: #else err = -EPROTOTYPE; #endif - error: mutex_unlock(&econet_mutex); return err; diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 8949a05ac307..cbb505ba9324 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -55,45 +55,9 @@ config IP_ADVANCED_ROUTER If unsure, say N here. -choice - prompt "Choose IP: FIB lookup algorithm (choose FIB_HASH if unsure)" - depends on IP_ADVANCED_ROUTER - default ASK_IP_FIB_HASH - -config ASK_IP_FIB_HASH - bool "FIB_HASH" - ---help--- - Current FIB is very proven and good enough for most users. - -config IP_FIB_TRIE - bool "FIB_TRIE" - ---help--- - Use new experimental LC-trie as FIB lookup algorithm. - This improves lookup performance if you have a large - number of routes. - - LC-trie is a longest matching prefix lookup algorithm which - performs better than FIB_HASH for large routing tables. - But, it consumes more memory and is more complex. - - LC-trie is described in: - - IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson - IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, - June 1999 - - An experimental study of compression methods for dynamic tries - Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002. - <http://www.csc.kth.se/~snilsson/software/dyntrie2/> - -endchoice - -config IP_FIB_HASH - def_bool ASK_IP_FIB_HASH || !IP_ADVANCED_ROUTER - config IP_FIB_TRIE_STATS bool "FIB TRIE statistics" - depends on IP_FIB_TRIE + depends on IP_ADVANCED_ROUTER ---help--- Keep track of statistics on structure of FIB TRIE table. Useful for testing and measuring TRIE performance. diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 4978d22f9a75..0dc772d0d125 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -10,12 +10,10 @@ obj-y := route.o inetpeer.o protocol.o \ tcp_minisocks.o tcp_cong.o \ datagram.o raw.o udp.o udplite.o \ arp.o icmp.o devinet.o af_inet.o igmp.o \ - fib_frontend.o fib_semantics.o \ + fib_frontend.o fib_semantics.o fib_trie.o \ inet_fragment.o obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o -obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o -obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o obj-$(CONFIG_IP_MROUTE) += ipmr.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index f2b61107df6c..7ceb80447631 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -880,6 +880,19 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) } EXPORT_SYMBOL(inet_ioctl); +#ifdef CONFIG_COMPAT +int inet_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + struct sock *sk = sock->sk; + int err = -ENOIOCTLCMD; + + if (sk->sk_prot->compat_ioctl) + err = sk->sk_prot->compat_ioctl(sk, cmd, arg); + + return err; +} +#endif + const struct proto_ops inet_stream_ops = { .family = PF_INET, .owner = THIS_MODULE, @@ -903,6 +916,7 @@ const struct proto_ops inet_stream_ops = { #ifdef CONFIG_COMPAT .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, + .compat_ioctl = inet_compat_ioctl, #endif }; EXPORT_SYMBOL(inet_stream_ops); @@ -929,6 +943,7 @@ const struct proto_ops inet_dgram_ops = { #ifdef CONFIG_COMPAT .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, + .compat_ioctl = inet_compat_ioctl, #endif }; EXPORT_SYMBOL(inet_dgram_ops); @@ -959,6 +974,7 @@ static const struct proto_ops inet_sockraw_ops = { #ifdef CONFIG_COMPAT .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, + .compat_ioctl = inet_compat_ioctl, #endif }; @@ -1215,7 +1231,7 @@ out: return err; } -static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features) +static struct sk_buff *inet_gso_segment(struct sk_buff *skb, u32 features) { struct sk_buff *segs = ERR_PTR(-EINVAL); struct iphdr *iph; diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 86961bec70ab..325053df6e70 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -201,7 +201,10 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb) top_iph->ttl = 0; top_iph->check = 0; - ah->hdrlen = (XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2; + if (x->props.flags & XFRM_STATE_ALIGN4) + ah->hdrlen = (XFRM_ALIGN4(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2; + else + ah->hdrlen = (XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2; ah->reserved = 0; ah->spi = x->id.spi; @@ -299,9 +302,15 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) nexthdr = ah->nexthdr; ah_hlen = (ah->hdrlen + 2) << 2; - if (ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_full_len) && - ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len)) - goto out; + if (x->props.flags & XFRM_STATE_ALIGN4) { + if (ah_hlen != XFRM_ALIGN4(sizeof(*ah) + ahp->icv_full_len) && + ah_hlen != XFRM_ALIGN4(sizeof(*ah) + ahp->icv_trunc_len)) + goto out; + } else { + if (ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_full_len) && + ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len)) + goto out; + } if (!pskb_may_pull(skb, ah_hlen)) goto out; @@ -450,8 +459,12 @@ static int ah_init_state(struct xfrm_state *x) BUG_ON(ahp->icv_trunc_len > MAX_AH_AUTH_LEN); - x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + - ahp->icv_trunc_len); + if (x->props.flags & XFRM_STATE_ALIGN4) + x->props.header_len = XFRM_ALIGN4(sizeof(struct ip_auth_hdr) + + ahp->icv_trunc_len); + else + x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + + ahp->icv_trunc_len); if (x->props.mode == XFRM_MODE_TUNNEL) x->props.header_len += sizeof(struct iphdr); x->data = ahp; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 04c8b69fd426..7927589813b5 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1017,14 +1017,13 @@ static int arp_req_set_proxy(struct net *net, struct net_device *dev, int on) IPV4_DEVCONF_ALL(net, PROXY_ARP) = on; return 0; } - if (__in_dev_get_rcu(dev)) { - IN_DEV_CONF_SET(__in_dev_get_rcu(dev), PROXY_ARP, on); + if (__in_dev_get_rtnl(dev)) { + IN_DEV_CONF_SET(__in_dev_get_rtnl(dev), PROXY_ARP, on); return 0; } return -ENXIO; } -/* must be called with rcu_read_lock() */ static int arp_req_set_public(struct net *net, struct arpreq *r, struct net_device *dev) { @@ -1233,10 +1232,10 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg) if (!(r.arp_flags & ATF_NETMASK)) ((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr = htonl(0xFFFFFFFFUL); - rcu_read_lock(); + rtnl_lock(); if (r.arp_dev[0]) { err = -ENODEV; - dev = dev_get_by_name_rcu(net, r.arp_dev); + dev = __dev_get_by_name(net, r.arp_dev); if (dev == NULL) goto out; @@ -1263,7 +1262,7 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg) break; } out: - rcu_read_unlock(); + rtnl_unlock(); if (cmd == SIOCGARP && !err && copy_to_user(arg, &r, sizeof(r))) err = -EFAULT; return err; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 1d2cdd43a878..2a49c061b34c 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -51,11 +51,11 @@ static int __net_init fib4_rules_init(struct net *net) { struct fib_table *local_table, *main_table; - local_table = fib_hash_table(RT_TABLE_LOCAL); + local_table = fib_trie_table(RT_TABLE_LOCAL); if (local_table == NULL) return -ENOMEM; - main_table = fib_hash_table(RT_TABLE_MAIN); + main_table = fib_trie_table(RT_TABLE_MAIN); if (main_table == NULL) goto fail; @@ -82,7 +82,7 @@ struct fib_table *fib_new_table(struct net *net, u32 id) if (tb) return tb; - tb = fib_hash_table(id); + tb = fib_trie_table(id); if (!tb) return NULL; h = id & (FIB_TABLE_HASHSZ - 1); @@ -114,21 +114,6 @@ struct fib_table *fib_get_table(struct net *net, u32 id) } #endif /* CONFIG_IP_MULTIPLE_TABLES */ -void fib_select_default(struct net *net, - const struct flowi *flp, struct fib_result *res) -{ - struct fib_table *tb; - int table = RT_TABLE_MAIN; -#ifdef CONFIG_IP_MULTIPLE_TABLES - if (res->r == NULL || res->r->action != FR_ACT_TO_TBL) - return; - table = res->r->table; -#endif - tb = fib_get_table(net, table); - if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) - fib_table_select_default(tb, flp, res); -} - static void fib_flush(struct net *net) { int flushed = 0; @@ -1101,5 +1086,5 @@ void __init ip_fib_init(void) register_netdevice_notifier(&fib_netdev_notifier); register_inetaddr_notifier(&fib_inetaddr_notifier); - fib_hash_init(); + fib_trie_init(); } diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c deleted file mode 100644 index b3acb0417b21..000000000000 --- a/net/ipv4/fib_hash.c +++ /dev/null @@ -1,1133 +0,0 @@ -/* - * INET An implementation of the TCP/IP protocol suite for the LINUX - * operating system. INET is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * IPv4 FIB: lookup engine and maintenance routines. - * - * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <asm/uaccess.h> -#include <asm/system.h> -#include <linux/bitops.h> -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/errno.h> -#include <linux/in.h> -#include <linux/inet.h> -#include <linux/inetdevice.h> -#include <linux/netdevice.h> -#include <linux/if_arp.h> -#include <linux/proc_fs.h> -#include <linux/skbuff.h> -#include <linux/netlink.h> -#include <linux/init.h> -#include <linux/slab.h> - -#include <net/net_namespace.h> -#include <net/ip.h> -#include <net/protocol.h> -#include <net/route.h> -#include <net/tcp.h> -#include <net/sock.h> -#include <net/ip_fib.h> - -#include "fib_lookup.h" - -static struct kmem_cache *fn_hash_kmem __read_mostly; -static struct kmem_cache *fn_alias_kmem __read_mostly; - -struct fib_node { - struct hlist_node fn_hash; - struct list_head fn_alias; - __be32 fn_key; - struct fib_alias fn_embedded_alias; -}; - -#define EMBEDDED_HASH_SIZE (L1_CACHE_BYTES / sizeof(struct hlist_head)) - -struct fn_zone { - struct fn_zone __rcu *fz_next; /* Next not empty zone */ - struct hlist_head __rcu *fz_hash; /* Hash table pointer */ - seqlock_t fz_lock; - u32 fz_hashmask; /* (fz_divisor - 1) */ - - u8 fz_order; /* Zone order (0..32) */ - u8 fz_revorder; /* 32 - fz_order */ - __be32 fz_mask; /* inet_make_mask(order) */ -#define FZ_MASK(fz) ((fz)->fz_mask) - - struct hlist_head fz_embedded_hash[EMBEDDED_HASH_SIZE]; - - int fz_nent; /* Number of entries */ - int fz_divisor; /* Hash size (mask+1) */ -}; - -struct fn_hash { - struct fn_zone *fn_zones[33]; - struct fn_zone __rcu *fn_zone_list; -}; - -static inline u32 fn_hash(__be32 key, struct fn_zone *fz) -{ - u32 h = ntohl(key) >> fz->fz_revorder; - h ^= (h>>20); - h ^= (h>>10); - h ^= (h>>5); - h &= fz->fz_hashmask; - return h; -} - -static inline __be32 fz_key(__be32 dst, struct fn_zone *fz) -{ - return dst & FZ_MASK(fz); -} - -static unsigned int fib_hash_genid; - -#define FZ_MAX_DIVISOR ((PAGE_SIZE<<MAX_ORDER) / sizeof(struct hlist_head)) - -static struct hlist_head *fz_hash_alloc(int divisor) -{ - unsigned long size = divisor * sizeof(struct hlist_head); - - if (size <= PAGE_SIZE) - return kzalloc(size, GFP_KERNEL); - - return (struct hlist_head *) - __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(size)); -} - -/* The fib hash lock must be held when this is called. */ -static inline void fn_rebuild_zone(struct fn_zone *fz, - struct hlist_head *old_ht, - int old_divisor) -{ - int i; - - for (i = 0; i < old_divisor; i++) { - struct hlist_node *node, *n; - struct fib_node *f; - - hlist_for_each_entry_safe(f, node, n, &old_ht[i], fn_hash) { - struct hlist_head *new_head; - - hlist_del_rcu(&f->fn_hash); - - new_head = rcu_dereference_protected(fz->fz_hash, 1) + - fn_hash(f->fn_key, fz); - hlist_add_head_rcu(&f->fn_hash, new_head); - } - } -} - -static void fz_hash_free(struct hlist_head *hash, int divisor) -{ - unsigned long size = divisor * sizeof(struct hlist_head); - - if (size <= PAGE_SIZE) - kfree(hash); - else - free_pages((unsigned long)hash, get_order(size)); -} - -static void fn_rehash_zone(struct fn_zone *fz) -{ - struct hlist_head *ht, *old_ht; - int old_divisor, new_divisor; - u32 new_hashmask; - - new_divisor = old_divisor = fz->fz_divisor; - - switch (old_divisor) { - case EMBEDDED_HASH_SIZE: - new_divisor *= EMBEDDED_HASH_SIZE; - break; - case EMBEDDED_HASH_SIZE*EMBEDDED_HASH_SIZE: - new_divisor *= (EMBEDDED_HASH_SIZE/2); - break; - default: - if ((old_divisor << 1) > FZ_MAX_DIVISOR) { - printk(KERN_CRIT "route.c: bad divisor %d!\n", old_divisor); - return; - } - new_divisor = (old_divisor << 1); - break; - } - - new_hashmask = (new_divisor - 1); - -#if RT_CACHE_DEBUG >= 2 - printk(KERN_DEBUG "fn_rehash_zone: hash for zone %d grows from %d\n", - fz->fz_order, old_divisor); -#endif - - ht = fz_hash_alloc(new_divisor); - - if (ht) { - struct fn_zone nfz; - - memcpy(&nfz, fz, sizeof(nfz)); - - write_seqlock_bh(&fz->fz_lock); - old_ht = rcu_dereference_protected(fz->fz_hash, 1); - RCU_INIT_POINTER(nfz.fz_hash, ht); - nfz.fz_hashmask = new_hashmask; - nfz.fz_divisor = new_divisor; - fn_rebuild_zone(&nfz, old_ht, old_divisor); - fib_hash_genid++; - rcu_assign_pointer(fz->fz_hash, ht); - fz->fz_hashmask = new_hashmask; - fz->fz_divisor = new_divisor; - write_sequnlock_bh(&fz->fz_lock); - - if (old_ht != fz->fz_embedded_hash) { - synchronize_rcu(); - fz_hash_free(old_ht, old_divisor); - } - } -} - -static void fn_free_node_rcu(struct rcu_head *head) -{ - struct fib_node *f = container_of(head, struct fib_node, fn_embedded_alias.rcu); - - kmem_cache_free(fn_hash_kmem, f); -} - -static inline void fn_free_node(struct fib_node *f) -{ - call_rcu(&f->fn_embedded_alias.rcu, fn_free_node_rcu); -} - -static void fn_free_alias_rcu(struct rcu_head *head) -{ - struct fib_alias *fa = container_of(head, struct fib_alias, rcu); - - kmem_cache_free(fn_alias_kmem, fa); -} - -static inline void fn_free_alias(struct fib_alias *fa, struct fib_node *f) -{ - fib_release_info(fa->fa_info); - if (fa == &f->fn_embedded_alias) - fa->fa_info = NULL; - else - call_rcu(&fa->rcu, fn_free_alias_rcu); -} - -static struct fn_zone * -fn_new_zone(struct fn_hash *table, int z) -{ - int i; - struct fn_zone *fz = kzalloc(sizeof(struct fn_zone), GFP_KERNEL); - if (!fz) - return NULL; - - seqlock_init(&fz->fz_lock); - fz->fz_divisor = z ? EMBEDDED_HASH_SIZE : 1; - fz->fz_hashmask = fz->fz_divisor - 1; - RCU_INIT_POINTER(fz->fz_hash, fz->fz_embedded_hash); - fz->fz_order = z; - fz->fz_revorder = 32 - z; - fz->fz_mask = inet_make_mask(z); - - /* Find the first not empty zone with more specific mask */ - for (i = z + 1; i <= 32; i++) - if (table->fn_zones[i]) - break; - if (i > 32) { - /* No more specific masks, we are the first. */ - rcu_assign_pointer(fz->fz_next, - rtnl_dereference(table->fn_zone_list)); - rcu_assign_pointer(table->fn_zone_list, fz); - } else { - rcu_assign_pointer(fz->fz_next, - rtnl_dereference(table->fn_zones[i]->fz_next)); - rcu_assign_pointer(table->fn_zones[i]->fz_next, fz); - } - table->fn_zones[z] = fz; - fib_hash_genid++; - return fz; -} - -int fib_table_lookup(struct fib_table *tb, - const struct flowi *flp, struct fib_result *res, - int fib_flags) -{ - int err; - struct fn_zone *fz; - struct fn_hash *t = (struct fn_hash *)tb->tb_data; - - rcu_read_lock(); - for (fz = rcu_dereference(t->fn_zone_list); - fz != NULL; - fz = rcu_dereference(fz->fz_next)) { - struct hlist_head *head; - struct hlist_node *node; - struct fib_node *f; - __be32 k; - unsigned int seq; - - do { - seq = read_seqbegin(&fz->fz_lock); - k = fz_key(flp->fl4_dst, fz); - - head = rcu_dereference(fz->fz_hash) + fn_hash(k, fz); - hlist_for_each_entry_rcu(f, node, head, fn_hash) { - if (f->fn_key != k) - continue; - - err = fib_semantic_match(&f->fn_alias, - flp, res, - fz->fz_order, fib_flags); - if (err <= 0) - goto out; - } - } while (read_seqretry(&fz->fz_lock, seq)); - } - err = 1; -out: - rcu_read_unlock(); - return err; -} - -void fib_table_select_default(struct fib_table *tb, - const struct flowi *flp, struct fib_result *res) -{ - int order, last_idx; - struct hlist_node *node; - struct fib_node *f; - struct fib_info *fi = NULL; - struct fib_info *last_resort; - struct fn_hash *t = (struct fn_hash *)tb->tb_data; - struct fn_zone *fz = t->fn_zones[0]; - struct hlist_head *head; - - if (fz == NULL) - return; - - last_idx = -1; - last_resort = NULL; - order = -1; - - rcu_read_lock(); - head = rcu_dereference(fz->fz_hash); - hlist_for_each_entry_rcu(f, node, head, fn_hash) { - struct fib_alias *fa; - - list_for_each_entry_rcu(fa, &f->fn_alias, fa_list) { - struct fib_info *next_fi = fa->fa_info; - - if (fa->fa_scope != res->scope || - fa->fa_type != RTN_UNICAST) - continue; - - if (next_fi->fib_priority > res->fi->fib_priority) - break; - if (!next_fi->fib_nh[0].nh_gw || - next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) - continue; - - fib_alias_accessed(fa); - - if (fi == NULL) { - if (next_fi != res->fi) - break; - } else if (!fib_detect_death(fi, order, &last_resort, - &last_idx, tb->tb_default)) { - fib_result_assign(res, fi); - tb->tb_default = order; - goto out; - } - fi = next_fi; - order++; - } - } - - if (order <= 0 || fi == NULL) { - tb->tb_default = -1; - goto out; - } - - if (!fib_detect_death(fi, order, &last_resort, &last_idx, - tb->tb_default)) { - fib_result_assign(res, fi); - tb->tb_default = order; - goto out; - } - - if (last_idx >= 0) - fib_result_assign(res, last_resort); - tb->tb_default = last_idx; -out: - rcu_read_unlock(); -} - -/* Insert node F to FZ. */ -static inline void fib_insert_node(struct fn_zone *fz, struct fib_node *f) -{ - struct hlist_head *head = rtnl_dereference(fz->fz_hash) + fn_hash(f->fn_key, fz); - - hlist_add_head_rcu(&f->fn_hash, head); -} - -/* Return the node in FZ matching KEY. */ -static struct fib_node *fib_find_node(struct fn_zone *fz, __be32 key) -{ - struct hlist_head *head = rtnl_dereference(fz->fz_hash) + fn_hash(key, fz); - struct hlist_node *node; - struct fib_node *f; - - hlist_for_each_entry_rcu(f, node, head, fn_hash) { - if (f->fn_key == key) - return f; - } - - return NULL; -} - - -static struct fib_alias *fib_fast_alloc(struct fib_node *f) -{ - struct fib_alias *fa = &f->fn_embedded_alias; - - if (fa->fa_info != NULL) - fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); - return fa; -} - -/* Caller must hold RTNL. */ -int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) -{ - struct fn_hash *table = (struct fn_hash *) tb->tb_data; - struct fib_node *new_f = NULL; - struct fib_node *f; - struct fib_alias *fa, *new_fa; - struct fn_zone *fz; - struct fib_info *fi; - u8 tos = cfg->fc_tos; - __be32 key; - int err; - - if (cfg->fc_dst_len > 32) - return -EINVAL; - - fz = table->fn_zones[cfg->fc_dst_len]; - if (!fz && !(fz = fn_new_zone(table, cfg->fc_dst_len))) - return -ENOBUFS; - - key = 0; - if (cfg->fc_dst) { - if (cfg->fc_dst & ~FZ_MASK(fz)) - return -EINVAL; - key = fz_key(cfg->fc_dst, fz); - } - - fi = fib_create_info(cfg); - if (IS_ERR(fi)) - return PTR_ERR(fi); - - if (fz->fz_nent > (fz->fz_divisor<<1) && - fz->fz_divisor < FZ_MAX_DIVISOR && - (cfg->fc_dst_len == 32 || - (1 << cfg->fc_dst_len) > fz->fz_divisor)) - fn_rehash_zone(fz); - - f = fib_find_node(fz, key); - - if (!f) - fa = NULL; - else - fa = fib_find_alias(&f->fn_alias, tos, fi->fib_priority); - - /* Now fa, if non-NULL, points to the first fib alias - * with the same keys [prefix,tos,priority], if such key already - * exists or to the node before which we will insert new one. - * - * If fa is NULL, we will need to allocate a new one and - * insert to the head of f. - * - * If f is NULL, no fib node matched the destination key - * and we need to allocate a new one of those as well. - */ - - if (fa && fa->fa_tos == tos && - fa->fa_info->fib_priority == fi->fib_priority) { - struct fib_alias *fa_first, *fa_match; - - err = -EEXIST; - if (cfg->fc_nlflags & NLM_F_EXCL) - goto out; - - /* We have 2 goals: - * 1. Find exact match for type, scope, fib_info to avoid - * duplicate routes - * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it - */ - fa_match = NULL; - fa_first = fa; - fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list); - list_for_each_entry_continue(fa, &f->fn_alias, fa_list) { - if (fa->fa_tos != tos) - break; - if (fa->fa_info->fib_priority != fi->fib_priority) - break; - if (fa->fa_type == cfg->fc_type && - fa->fa_scope == cfg->fc_scope && - fa->fa_info == fi) { - fa_match = fa; - break; - } - } - - if (cfg->fc_nlflags & NLM_F_REPLACE) { - u8 state; - - fa = fa_first; - if (fa_match) { - if (fa == fa_match) - err = 0; - goto out; - } - err = -ENOBUFS; - new_fa = fib_fast_alloc(f); - if (new_fa == NULL) - goto out; - - new_fa->fa_tos = fa->fa_tos; - new_fa->fa_info = fi; - new_fa->fa_type = cfg->fc_type; - new_fa->fa_scope = cfg->fc_scope; - state = fa->fa_state; - new_fa->fa_state = state & ~FA_S_ACCESSED; - fib_hash_genid++; - list_replace_rcu(&fa->fa_list, &new_fa->fa_list); - - fn_free_alias(fa, f); - if (state & FA_S_ACCESSED) - rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); - rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, - tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE); - return 0; - } - - /* Error if we find a perfect match which - * uses the same scope, type, and nexthop - * information. - */ - if (fa_match) - goto out; - - if (!(cfg->fc_nlflags & NLM_F_APPEND)) - fa = fa_first; - } - - err = -ENOENT; - if (!(cfg->fc_nlflags & NLM_F_CREATE)) - goto out; - - err = -ENOBUFS; - - if (!f) { - new_f = kmem_cache_zalloc(fn_hash_kmem, GFP_KERNEL); - if (new_f == NULL) - goto out; - - INIT_HLIST_NODE(&new_f->fn_hash); - INIT_LIST_HEAD(&new_f->fn_alias); - new_f->fn_key = key; - f = new_f; - } - - new_fa = fib_fast_alloc(f); - if (new_fa == NULL) - goto out; - - new_fa->fa_info = fi; - new_fa->fa_tos = tos; - new_fa->fa_type = cfg->fc_type; - new_fa->fa_scope = cfg->fc_scope; - new_fa->fa_state = 0; - - /* - * Insert new entry to the list. - */ - - if (new_f) - fib_insert_node(fz, new_f); - list_add_tail_rcu(&new_fa->fa_list, - (fa ? &fa->fa_list : &f->fn_alias)); - fib_hash_genid++; - - if (new_f) - fz->fz_nent++; - rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); - - rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, tb->tb_id, - &cfg->fc_nlinfo, 0); - return 0; - -out: - if (new_f) - kmem_cache_free(fn_hash_kmem, new_f); - fib_release_info(fi); - return err; -} - -int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) -{ - struct fn_hash *table = (struct fn_hash *)tb->tb_data; - struct fib_node *f; - struct fib_alias *fa, *fa_to_delete; - struct fn_zone *fz; - __be32 key; - - if (cfg->fc_dst_len > 32) - return -EINVAL; - - if ((fz = table->fn_zones[cfg->fc_dst_len]) == NULL) - return -ESRCH; - - key = 0; - if (cfg->fc_dst) { - if (cfg->fc_dst & ~FZ_MASK(fz)) - return -EINVAL; - key = fz_key(cfg->fc_dst, fz); - } - - f = fib_find_node(fz, key); - - if (!f) - fa = NULL; - else - fa = fib_find_alias(&f->fn_alias, cfg->fc_tos, 0); - if (!fa) - return -ESRCH; - - fa_to_delete = NULL; - fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list); - list_for_each_entry_continue(fa, &f->fn_alias, fa_list) { - struct fib_info *fi = fa->fa_info; - - if (fa->fa_tos != cfg->fc_tos) - break; - - if ((!cfg->fc_type || - fa->fa_type == cfg->fc_type) && - (cfg->fc_scope == RT_SCOPE_NOWHERE || - fa->fa_scope == cfg->fc_scope) && - (!cfg->fc_protocol || - fi->fib_protocol == cfg->fc_protocol) && - fib_nh_match(cfg, fi) == 0) { - fa_to_delete = fa; - break; - } - } - - if (fa_to_delete) { - int kill_fn; - - fa = fa_to_delete; - rtmsg_fib(RTM_DELROUTE, key, fa, cfg->fc_dst_len, - tb->tb_id, &cfg->fc_nlinfo, 0); - - kill_fn = 0; - list_del_rcu(&fa->fa_list); - if (list_empty(&f->fn_alias)) { - hlist_del_rcu(&f->fn_hash); - kill_fn = 1; - } - fib_hash_genid++; - - if (fa->fa_state & FA_S_ACCESSED) - rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); - fn_free_alias(fa, f); - if (kill_fn) { - fn_free_node(f); - fz->fz_nent--; - } - - return 0; - } - return -ESRCH; -} - -static int fn_flush_list(struct fn_zone *fz, int idx) -{ - struct hlist_head *head = rtnl_dereference(fz->fz_hash) + idx; - struct hlist_node *node, *n; - struct fib_node *f; - int found = 0; - - hlist_for_each_entry_safe(f, node, n, head, fn_hash) { - struct fib_alias *fa, *fa_node; - int kill_f; - - kill_f = 0; - list_for_each_entry_safe(fa, fa_node, &f->fn_alias, fa_list) { - struct fib_info *fi = fa->fa_info; - - if (fi && (fi->fib_flags&RTNH_F_DEAD)) { - list_del_rcu(&fa->fa_list); - if (list_empty(&f->fn_alias)) { - hlist_del_rcu(&f->fn_hash); - kill_f = 1; - } - fib_hash_genid++; - - fn_free_alias(fa, f); - found++; - } - } - if (kill_f) { - fn_free_node(f); - fz->fz_nent--; - } - } - return found; -} - -/* caller must hold RTNL. */ -int fib_table_flush(struct fib_table *tb) -{ - struct fn_hash *table = (struct fn_hash *) tb->tb_data; - struct fn_zone *fz; - int found = 0; - - for (fz = rtnl_dereference(table->fn_zone_list); - fz != NULL; - fz = rtnl_dereference(fz->fz_next)) { - int i; - - for (i = fz->fz_divisor - 1; i >= 0; i--) - found += fn_flush_list(fz, i); - } - return found; -} - -void fib_free_table(struct fib_table *tb) -{ - struct fn_hash *table = (struct fn_hash *) tb->tb_data; - struct fn_zone *fz, *next; - - next = table->fn_zone_list; - while (next != NULL) { - fz = next; - next = fz->fz_next; - - if (fz->fz_hash != fz->fz_embedded_hash) - fz_hash_free(fz->fz_hash, fz->fz_divisor); - - kfree(fz); - } - - kfree(tb); -} - -static inline int -fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb, - struct fib_table *tb, - struct fn_zone *fz, - struct hlist_head *head) -{ - struct hlist_node *node; - struct fib_node *f; - int i, s_i; - - s_i = cb->args[4]; - i = 0; - hlist_for_each_entry_rcu(f, node, head, fn_hash) { - struct fib_alias *fa; - - list_for_each_entry_rcu(fa, &f->fn_alias, fa_list) { - if (i < s_i) - goto next; - - if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, - RTM_NEWROUTE, - tb->tb_id, - fa->fa_type, - fa->fa_scope, - f->fn_key, - fz->fz_order, - fa->fa_tos, - fa->fa_info, - NLM_F_MULTI) < 0) { - cb->args[4] = i; - return -1; - } -next: - i++; - } - } - cb->args[4] = i; - return skb->len; -} - -static inline int -fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb, - struct fib_table *tb, - struct fn_zone *fz) -{ - int h, s_h; - struct hlist_head *head = rcu_dereference(fz->fz_hash); - - if (head == NULL) - return skb->len; - s_h = cb->args[3]; - for (h = s_h; h < fz->fz_divisor; h++) { - if (hlist_empty(head + h)) - continue; - if (fn_hash_dump_bucket(skb, cb, tb, fz, head + h) < 0) { - cb->args[3] = h; - return -1; - } - memset(&cb->args[4], 0, - sizeof(cb->args) - 4*sizeof(cb->args[0])); - } - cb->args[3] = h; - return skb->len; -} - -int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, - struct netlink_callback *cb) -{ - int m = 0, s_m; - struct fn_zone *fz; - struct fn_hash *table = (struct fn_hash *)tb->tb_data; - - s_m = cb->args[2]; - rcu_read_lock(); - for (fz = rcu_dereference(table->fn_zone_list); - fz != NULL; - fz = rcu_dereference(fz->fz_next), m++) { - if (m < s_m) - continue; - if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) { - cb->args[2] = m; - rcu_read_unlock(); - return -1; - } - memset(&cb->args[3], 0, - sizeof(cb->args) - 3*sizeof(cb->args[0])); - } - rcu_read_unlock(); - cb->args[2] = m; - return skb->len; -} - -void __init fib_hash_init(void) -{ - fn_hash_kmem = kmem_cache_create("ip_fib_hash", sizeof(struct fib_node), - 0, SLAB_PANIC, NULL); - - fn_alias_kmem = kmem_cache_create("ip_fib_alias", sizeof(struct fib_alias), - 0, SLAB_PANIC, NULL); - -} - -struct fib_table *fib_hash_table(u32 id) -{ - struct fib_table *tb; - - tb = kmalloc(sizeof(struct fib_table) + sizeof(struct fn_hash), - GFP_KERNEL); - if (tb == NULL) - return NULL; - - tb->tb_id = id; - tb->tb_default = -1; - - memset(tb->tb_data, 0, sizeof(struct fn_hash)); - return tb; -} - -/* ------------------------------------------------------------------------ */ -#ifdef CONFIG_PROC_FS - -struct fib_iter_state { - struct seq_net_private p; - struct fn_zone *zone; - int bucket; - struct hlist_head *hash_head; - struct fib_node *fn; - struct fib_alias *fa; - loff_t pos; - unsigned int genid; - int valid; -}; - -static struct fib_alias *fib_get_first(struct seq_file *seq) -{ - struct fib_iter_state *iter = seq->private; - struct fib_table *main_table; - struct fn_hash *table; - - main_table = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN); - table = (struct fn_hash *)main_table->tb_data; - - iter->bucket = 0; - iter->hash_head = NULL; - iter->fn = NULL; - iter->fa = NULL; - iter->pos = 0; - iter->genid = fib_hash_genid; - iter->valid = 1; - - for (iter->zone = rcu_dereference(table->fn_zone_list); - iter->zone != NULL; - iter->zone = rcu_dereference(iter->zone->fz_next)) { - int maxslot; - - if (!iter->zone->fz_nent) - continue; - - iter->hash_head = rcu_dereference(iter->zone->fz_hash); - maxslot = iter->zone->fz_divisor; - - for (iter->bucket = 0; iter->bucket < maxslot; - ++iter->bucket, ++iter->hash_head) { - struct hlist_node *node; - struct fib_node *fn; - - hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) { - struct fib_alias *fa; - - list_for_each_entry(fa, &fn->fn_alias, fa_list) { - iter->fn = fn; - iter->fa = fa; - goto out; - } - } - } - } -out: - return iter->fa; -} - -static struct fib_alias *fib_get_next(struct seq_file *seq) -{ - struct fib_iter_state *iter = seq->private; - struct fib_node *fn; - struct fib_alias *fa; - - /* Advance FA, if any. */ - fn = iter->fn; - fa = iter->fa; - if (fa) { - BUG_ON(!fn); - list_for_each_entry_continue(fa, &fn->fn_alias, fa_list) { - iter->fa = fa; - goto out; - } - } - - fa = iter->fa = NULL; - - /* Advance FN. */ - if (fn) { - struct hlist_node *node = &fn->fn_hash; - hlist_for_each_entry_continue(fn, node, fn_hash) { - iter->fn = fn; - - list_for_each_entry(fa, &fn->fn_alias, fa_list) { - iter->fa = fa; - goto out; - } - } - } - - fn = iter->fn = NULL; - - /* Advance hash chain. */ - if (!iter->zone) - goto out; - - for (;;) { - struct hlist_node *node; - int maxslot; - - maxslot = iter->zone->fz_divisor; - - while (++iter->bucket < maxslot) { - iter->hash_head++; - - hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) { - list_for_each_entry(fa, &fn->fn_alias, fa_list) { - iter->fn = fn; - iter->fa = fa; - goto out; - } - } - } - - iter->zone = rcu_dereference(iter->zone->fz_next); - - if (!iter->zone) - goto out; - - iter->bucket = 0; - iter->hash_head = rcu_dereference(iter->zone->fz_hash); - - hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) { - list_for_each_entry(fa, &fn->fn_alias, fa_list) { - iter->fn = fn; - iter->fa = fa; - goto out; - } - } - } -out: - iter->pos++; - return fa; -} - -static struct fib_alias *fib_get_idx(struct seq_file *seq, loff_t pos) -{ - struct fib_iter_state *iter = seq->private; - struct fib_alias *fa; - - if (iter->valid && pos >= iter->pos && iter->genid == fib_hash_genid) { - fa = iter->fa; - pos -= iter->pos; - } else - fa = fib_get_first(seq); - - if (fa) - while (pos && (fa = fib_get_next(seq))) - --pos; - return pos ? NULL : fa; -} - -static void *fib_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(RCU) -{ - void *v = NULL; - - rcu_read_lock(); - if (fib_get_table(seq_file_net(seq), RT_TABLE_MAIN)) - v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; - return v; -} - -static void *fib_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - ++*pos; - return v == SEQ_START_TOKEN ? fib_get_first(seq) : fib_get_next(seq); -} - -static void fib_seq_stop(struct seq_file *seq, void *v) - __releases(RCU) -{ - rcu_read_unlock(); -} - -static unsigned fib_flag_trans(int type, __be32 mask, struct fib_info *fi) -{ - static const unsigned type2flags[RTN_MAX + 1] = { - [7] = RTF_REJECT, - [8] = RTF_REJECT, - }; - unsigned flags = type2flags[type]; - - if (fi && fi->fib_nh->nh_gw) - flags |= RTF_GATEWAY; - if (mask == htonl(0xFFFFFFFF)) - flags |= RTF_HOST; - flags |= RTF_UP; - return flags; -} - -/* - * This outputs /proc/net/route. - * - * It always works in backward compatibility mode. - * The format of the file is not supposed to be changed. - */ -static int fib_seq_show(struct seq_file *seq, void *v) -{ - struct fib_iter_state *iter; - int len; - __be32 prefix, mask; - unsigned flags; - struct fib_node *f; - struct fib_alias *fa; - struct fib_info *fi; - - if (v == SEQ_START_TOKEN) { - seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway " - "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU" - "\tWindow\tIRTT"); - goto out; - } - - iter = seq->private; - f = iter->fn; - fa = iter->fa; - fi = fa->fa_info; - prefix = f->fn_key; - mask = FZ_MASK(iter->zone); - flags = fib_flag_trans(fa->fa_type, mask, fi); - if (fi) - seq_printf(seq, - "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u%n", - fi->fib_dev ? fi->fib_dev->name : "*", prefix, - fi->fib_nh->nh_gw, flags, 0, 0, fi->fib_priority, - mask, (fi->fib_advmss ? fi->fib_advmss + 40 : 0), - fi->fib_window, - fi->fib_rtt >> 3, &len); - else - seq_printf(seq, - "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u%n", - prefix, 0, flags, 0, 0, 0, mask, 0, 0, 0, &len); - - seq_printf(seq, "%*s\n", 127 - len, ""); -out: - return 0; -} - -static const struct seq_operations fib_seq_ops = { - .start = fib_seq_start, - .next = fib_seq_next, - .stop = fib_seq_stop, - .show = fib_seq_show, -}; - -static int fib_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &fib_seq_ops, - sizeof(struct fib_iter_state)); -} - -static const struct file_operations fib_seq_fops = { - .owner = THIS_MODULE, - .open = fib_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - -int __net_init fib_proc_init(struct net *net) -{ - if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_seq_fops)) - return -ENOMEM; - return 0; -} - -void __net_exit fib_proc_exit(struct net *net) -{ - proc_net_remove(net, "route"); -} -#endif /* CONFIG_PROC_FS */ diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index c079cc0ec651..d5c40d8f6632 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -25,7 +25,7 @@ static inline void fib_alias_accessed(struct fib_alias *fa) } /* Exported by fib_semantics.c */ -extern int fib_semantic_match(struct list_head *head, +extern int fib_semantic_match(struct fib_table *tb, struct list_head *head, const struct flowi *flp, struct fib_result *res, int prefixlen, int fib_flags); extern void fib_release_info(struct fib_info *); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 9aff11d7278f..146bd82ef60d 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -49,7 +49,7 @@ static DEFINE_SPINLOCK(fib_info_lock); static struct hlist_head *fib_info_hash; static struct hlist_head *fib_info_laddrhash; -static unsigned int fib_hash_size; +static unsigned int fib_info_hash_size; static unsigned int fib_info_cnt; #define DEVINDEX_HASHBITS 8 @@ -152,6 +152,8 @@ static void free_fib_info_rcu(struct rcu_head *head) { struct fib_info *fi = container_of(head, struct fib_info, rcu); + if (fi->fib_metrics != (u32 *) dst_default_metrics) + kfree(fi->fib_metrics); kfree(fi); } @@ -221,7 +223,7 @@ static inline unsigned int fib_devindex_hashfn(unsigned int val) static inline unsigned int fib_info_hashfn(const struct fib_info *fi) { - unsigned int mask = (fib_hash_size - 1); + unsigned int mask = (fib_info_hash_size - 1); unsigned int val = fi->fib_nhs; val ^= fi->fib_protocol; @@ -613,14 +615,14 @@ out: static inline unsigned int fib_laddr_hashfn(__be32 val) { - unsigned int mask = (fib_hash_size - 1); + unsigned int mask = (fib_info_hash_size - 1); return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask; } -static struct hlist_head *fib_hash_alloc(int bytes) +static struct hlist_head *fib_info_hash_alloc(int bytes) { if (bytes <= PAGE_SIZE) return kzalloc(bytes, GFP_KERNEL); @@ -630,7 +632,7 @@ static struct hlist_head *fib_hash_alloc(int bytes) get_order(bytes)); } -static void fib_hash_free(struct hlist_head *hash, int bytes) +static void fib_info_hash_free(struct hlist_head *hash, int bytes) { if (!hash) return; @@ -641,18 +643,18 @@ static void fib_hash_free(struct hlist_head *hash, int bytes) free_pages((unsigned long) hash, get_order(bytes)); } -static void fib_hash_move(struct hlist_head *new_info_hash, - struct hlist_head *new_laddrhash, - unsigned int new_size) +static void fib_info_hash_move(struct hlist_head *new_info_hash, + struct hlist_head *new_laddrhash, + unsigned int new_size) { struct hlist_head *old_info_hash, *old_laddrhash; - unsigned int old_size = fib_hash_size; + unsigned int old_size = fib_info_hash_size; unsigned int i, bytes; spin_lock_bh(&fib_info_lock); old_info_hash = fib_info_hash; old_laddrhash = fib_info_laddrhash; - fib_hash_size = new_size; + fib_info_hash_size = new_size; for (i = 0; i < old_size; i++) { struct hlist_head *head = &fib_info_hash[i]; @@ -693,8 +695,8 @@ static void fib_hash_move(struct hlist_head *new_info_hash, spin_unlock_bh(&fib_info_lock); bytes = old_size * sizeof(struct hlist_head *); - fib_hash_free(old_info_hash, bytes); - fib_hash_free(old_laddrhash, bytes); + fib_info_hash_free(old_info_hash, bytes); + fib_info_hash_free(old_laddrhash, bytes); } struct fib_info *fib_create_info(struct fib_config *cfg) @@ -718,8 +720,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg) #endif err = -ENOBUFS; - if (fib_info_cnt >= fib_hash_size) { - unsigned int new_size = fib_hash_size << 1; + if (fib_info_cnt >= fib_info_hash_size) { + unsigned int new_size = fib_info_hash_size << 1; struct hlist_head *new_info_hash; struct hlist_head *new_laddrhash; unsigned int bytes; @@ -727,21 +729,27 @@ struct fib_info *fib_create_info(struct fib_config *cfg) if (!new_size) new_size = 1; bytes = new_size * sizeof(struct hlist_head *); - new_info_hash = fib_hash_alloc(bytes); - new_laddrhash = fib_hash_alloc(bytes); + new_info_hash = fib_info_hash_alloc(bytes); + new_laddrhash = fib_info_hash_alloc(bytes); if (!new_info_hash || !new_laddrhash) { - fib_hash_free(new_info_hash, bytes); - fib_hash_free(new_laddrhash, bytes); + fib_info_hash_free(new_info_hash, bytes); + fib_info_hash_free(new_laddrhash, bytes); } else - fib_hash_move(new_info_hash, new_laddrhash, new_size); + fib_info_hash_move(new_info_hash, new_laddrhash, new_size); - if (!fib_hash_size) + if (!fib_info_hash_size) goto failure; } fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); if (fi == NULL) goto failure; + if (cfg->fc_mx) { + fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); + if (!fi->fib_metrics) + goto failure; + } else + fi->fib_metrics = (u32 *) dst_default_metrics; fib_info_cnt++; fi->fib_net = hold_net(net); @@ -881,8 +889,9 @@ failure: } /* Note! fib_semantic_match intentionally uses RCU list functions. */ -int fib_semantic_match(struct list_head *head, const struct flowi *flp, - struct fib_result *res, int prefixlen, int fib_flags) +int fib_semantic_match(struct fib_table *tb, struct list_head *head, + const struct flowi *flp, struct fib_result *res, + int prefixlen, int fib_flags) { struct fib_alias *fa; int nh_sel = 0; @@ -946,6 +955,8 @@ out_fill_res: res->type = fa->fa_type; res->scope = fa->fa_scope; res->fi = fa->fa_info; + res->table = tb; + res->fa_head = head; if (!(fib_flags & FIB_LOOKUP_NOREF)) atomic_inc(&res->fi->fib_clntref); return 0; @@ -1125,6 +1136,62 @@ int fib_sync_down_dev(struct net_device *dev, int force) return ret; } +/* Must be invoked inside of an RCU protected region. */ +void fib_select_default(struct fib_result *res) +{ + struct fib_info *fi = NULL, *last_resort = NULL; + struct list_head *fa_head = res->fa_head; + struct fib_table *tb = res->table; + int order = -1, last_idx = -1; + struct fib_alias *fa; + + list_for_each_entry_rcu(fa, fa_head, fa_list) { + struct fib_info *next_fi = fa->fa_info; + + if (fa->fa_scope != res->scope || + fa->fa_type != RTN_UNICAST) + continue; + + if (next_fi->fib_priority > res->fi->fib_priority) + break; + if (!next_fi->fib_nh[0].nh_gw || + next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) + continue; + + fib_alias_accessed(fa); + + if (fi == NULL) { + if (next_fi != res->fi) + break; + } else if (!fib_detect_death(fi, order, &last_resort, + &last_idx, tb->tb_default)) { + fib_result_assign(res, fi); + tb->tb_default = order; + goto out; + } + fi = next_fi; + order++; + } + + if (order <= 0 || fi == NULL) { + tb->tb_default = -1; + goto out; + } + + if (!fib_detect_death(fi, order, &last_resort, &last_idx, + tb->tb_default)) { + fib_result_assign(res, fi); + tb->tb_default = order; + goto out; + } + + if (last_idx >= 0) + fib_result_assign(res, last_resort); + tb->tb_default = last_idx; +out: + rcu_read_unlock(); +} + #ifdef CONFIG_IP_ROUTE_MULTIPATH /* diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 0f280348e0fd..1eae90b054eb 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -95,7 +95,7 @@ typedef unsigned int t_key; #define IS_TNODE(n) (!(n->parent & T_LEAF)) #define IS_LEAF(n) (n->parent & T_LEAF) -struct node { +struct rt_trie_node { unsigned long parent; t_key key; }; @@ -126,7 +126,7 @@ struct tnode { struct work_struct work; struct tnode *tnode_free; }; - struct node *child[0]; + struct rt_trie_node *child[0]; }; #ifdef CONFIG_IP_FIB_TRIE_STATS @@ -151,16 +151,16 @@ struct trie_stat { }; struct trie { - struct node *trie; + struct rt_trie_node *trie; #ifdef CONFIG_IP_FIB_TRIE_STATS struct trie_use_stats stats; #endif }; -static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n); -static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, +static void put_child(struct trie *t, struct tnode *tn, int i, struct rt_trie_node *n); +static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n, int wasfull); -static struct node *resize(struct trie *t, struct tnode *tn); +static struct rt_trie_node *resize(struct trie *t, struct tnode *tn); static struct tnode *inflate(struct trie *t, struct tnode *tn); static struct tnode *halve(struct trie *t, struct tnode *tn); /* tnodes to free after resize(); protected by RTNL */ @@ -177,12 +177,12 @@ static const int sync_pages = 128; static struct kmem_cache *fn_alias_kmem __read_mostly; static struct kmem_cache *trie_leaf_kmem __read_mostly; -static inline struct tnode *node_parent(struct node *node) +static inline struct tnode *node_parent(struct rt_trie_node *node) { return (struct tnode *)(node->parent & ~NODE_TYPE_MASK); } -static inline struct tnode *node_parent_rcu(struct node *node) +static inline struct tnode *node_parent_rcu(struct rt_trie_node *node) { struct tnode *ret = node_parent(node); @@ -192,22 +192,22 @@ static inline struct tnode *node_parent_rcu(struct node *node) /* Same as rcu_assign_pointer * but that macro() assumes that value is a pointer. */ -static inline void node_set_parent(struct node *node, struct tnode *ptr) +static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr) { smp_wmb(); node->parent = (unsigned long)ptr | NODE_TYPE(node); } -static inline struct node *tnode_get_child(struct tnode *tn, unsigned int i) +static inline struct rt_trie_node *tnode_get_child(struct tnode *tn, unsigned int i) { BUG_ON(i >= 1U << tn->bits); return tn->child[i]; } -static inline struct node *tnode_get_child_rcu(struct tnode *tn, unsigned int i) +static inline struct rt_trie_node *tnode_get_child_rcu(struct tnode *tn, unsigned int i) { - struct node *ret = tnode_get_child(tn, i); + struct rt_trie_node *ret = tnode_get_child(tn, i); return rcu_dereference_rtnl(ret); } @@ -378,7 +378,7 @@ static void __tnode_free_rcu(struct rcu_head *head) { struct tnode *tn = container_of(head, struct tnode, rcu); size_t size = sizeof(struct tnode) + - (sizeof(struct node *) << tn->bits); + (sizeof(struct rt_trie_node *) << tn->bits); if (size <= PAGE_SIZE) kfree(tn); @@ -402,7 +402,7 @@ static void tnode_free_safe(struct tnode *tn) tn->tnode_free = tnode_free_head; tnode_free_head = tn; tnode_free_size += sizeof(struct tnode) + - (sizeof(struct node *) << tn->bits); + (sizeof(struct rt_trie_node *) << tn->bits); } static void tnode_free_flush(void) @@ -443,7 +443,7 @@ static struct leaf_info *leaf_info_new(int plen) static struct tnode *tnode_new(t_key key, int pos, int bits) { - size_t sz = sizeof(struct tnode) + (sizeof(struct node *) << bits); + size_t sz = sizeof(struct tnode) + (sizeof(struct rt_trie_node *) << bits); struct tnode *tn = tnode_alloc(sz); if (tn) { @@ -456,7 +456,7 @@ static struct tnode *tnode_new(t_key key, int pos, int bits) } pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode), - sizeof(struct node) << bits); + sizeof(struct rt_trie_node) << bits); return tn; } @@ -465,7 +465,7 @@ static struct tnode *tnode_new(t_key key, int pos, int bits) * and no bits are skipped. See discussion in dyntree paper p. 6 */ -static inline int tnode_full(const struct tnode *tn, const struct node *n) +static inline int tnode_full(const struct tnode *tn, const struct rt_trie_node *n) { if (n == NULL || IS_LEAF(n)) return 0; @@ -474,7 +474,7 @@ static inline int tnode_full(const struct tnode *tn, const struct node *n) } static inline void put_child(struct trie *t, struct tnode *tn, int i, - struct node *n) + struct rt_trie_node *n) { tnode_put_child_reorg(tn, i, n, -1); } @@ -484,10 +484,10 @@ static inline void put_child(struct trie *t, struct tnode *tn, int i, * Update the value of full_children and empty_children. */ -static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, +static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n, int wasfull) { - struct node *chi = tn->child[i]; + struct rt_trie_node *chi = tn->child[i]; int isfull; BUG_ON(i >= 1<<tn->bits); @@ -515,7 +515,7 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, } #define MAX_WORK 10 -static struct node *resize(struct trie *t, struct tnode *tn) +static struct rt_trie_node *resize(struct trie *t, struct tnode *tn) { int i; struct tnode *old_tn; @@ -605,7 +605,7 @@ static struct node *resize(struct trie *t, struct tnode *tn) /* Keep root node larger */ - if (!node_parent((struct node *)tn)) { + if (!node_parent((struct rt_trie_node *)tn)) { inflate_threshold_use = inflate_threshold_root; halve_threshold_use = halve_threshold_root; } else { @@ -635,7 +635,7 @@ static struct node *resize(struct trie *t, struct tnode *tn) /* Return if at least one inflate is run */ if (max_work != MAX_WORK) - return (struct node *) tn; + return (struct rt_trie_node *) tn; /* * Halve as long as the number of empty children in this @@ -663,7 +663,7 @@ static struct node *resize(struct trie *t, struct tnode *tn) if (tn->empty_children == tnode_child_length(tn) - 1) { one_child: for (i = 0; i < tnode_child_length(tn); i++) { - struct node *n; + struct rt_trie_node *n; n = tn->child[i]; if (!n) @@ -676,7 +676,7 @@ one_child: return n; } } - return (struct node *) tn; + return (struct rt_trie_node *) tn; } static struct tnode *inflate(struct trie *t, struct tnode *tn) @@ -723,14 +723,14 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) goto nomem; } - put_child(t, tn, 2*i, (struct node *) left); - put_child(t, tn, 2*i+1, (struct node *) right); + put_child(t, tn, 2*i, (struct rt_trie_node *) left); + put_child(t, tn, 2*i+1, (struct rt_trie_node *) right); } } for (i = 0; i < olen; i++) { struct tnode *inode; - struct node *node = tnode_get_child(oldtnode, i); + struct rt_trie_node *node = tnode_get_child(oldtnode, i); struct tnode *left, *right; int size, j; @@ -825,7 +825,7 @@ nomem: static struct tnode *halve(struct trie *t, struct tnode *tn) { struct tnode *oldtnode = tn; - struct node *left, *right; + struct rt_trie_node *left, *right; int i; int olen = tnode_child_length(tn); @@ -856,7 +856,7 @@ static struct tnode *halve(struct trie *t, struct tnode *tn) if (!newn) goto nomem; - put_child(t, tn, i/2, (struct node *)newn); + put_child(t, tn, i/2, (struct rt_trie_node *)newn); } } @@ -958,7 +958,7 @@ fib_find_node(struct trie *t, u32 key) { int pos; struct tnode *tn; - struct node *n; + struct rt_trie_node *n; pos = 0; n = rcu_dereference_rtnl(t->trie); @@ -993,17 +993,17 @@ static void trie_rebalance(struct trie *t, struct tnode *tn) key = tn->key; - while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) { + while (tn != NULL && (tp = node_parent((struct rt_trie_node *)tn)) != NULL) { cindex = tkey_extract_bits(key, tp->pos, tp->bits); wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); tn = (struct tnode *) resize(t, (struct tnode *)tn); tnode_put_child_reorg((struct tnode *)tp, cindex, - (struct node *)tn, wasfull); + (struct rt_trie_node *)tn, wasfull); - tp = node_parent((struct node *) tn); + tp = node_parent((struct rt_trie_node *) tn); if (!tp) - rcu_assign_pointer(t->trie, (struct node *)tn); + rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); tnode_free_flush(); if (!tp) @@ -1015,7 +1015,7 @@ static void trie_rebalance(struct trie *t, struct tnode *tn) if (IS_TNODE(tn)) tn = (struct tnode *)resize(t, (struct tnode *)tn); - rcu_assign_pointer(t->trie, (struct node *)tn); + rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); tnode_free_flush(); } @@ -1025,7 +1025,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) { int pos, newpos; struct tnode *tp = NULL, *tn = NULL; - struct node *n; + struct rt_trie_node *n; struct leaf *l; int missbit; struct list_head *fa_head = NULL; @@ -1111,10 +1111,10 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) if (t->trie && n == NULL) { /* Case 2: n is NULL, and will just insert a new leaf */ - node_set_parent((struct node *)l, tp); + node_set_parent((struct rt_trie_node *)l, tp); cindex = tkey_extract_bits(key, tp->pos, tp->bits); - put_child(t, (struct tnode *)tp, cindex, (struct node *)l); + put_child(t, (struct tnode *)tp, cindex, (struct rt_trie_node *)l); } else { /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */ /* @@ -1141,18 +1141,18 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) return NULL; } - node_set_parent((struct node *)tn, tp); + node_set_parent((struct rt_trie_node *)tn, tp); missbit = tkey_extract_bits(key, newpos, 1); - put_child(t, tn, missbit, (struct node *)l); + put_child(t, tn, missbit, (struct rt_trie_node *)l); put_child(t, tn, 1-missbit, n); if (tp) { cindex = tkey_extract_bits(key, tp->pos, tp->bits); put_child(t, (struct tnode *)tp, cindex, - (struct node *)tn); + (struct rt_trie_node *)tn); } else { - rcu_assign_pointer(t->trie, (struct node *)tn); + rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); tp = tn; } } @@ -1340,7 +1340,7 @@ err: } /* should be called with rcu_read_lock */ -static int check_leaf(struct trie *t, struct leaf *l, +static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, t_key key, const struct flowi *flp, struct fib_result *res, int fib_flags) { @@ -1356,7 +1356,7 @@ static int check_leaf(struct trie *t, struct leaf *l, if (l->key != (key & ntohl(mask))) continue; - err = fib_semantic_match(&li->falh, flp, res, plen, fib_flags); + err = fib_semantic_match(tb, &li->falh, flp, res, plen, fib_flags); #ifdef CONFIG_IP_FIB_TRIE_STATS if (err <= 0) @@ -1376,7 +1376,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, { struct trie *t = (struct trie *) tb->tb_data; int ret; - struct node *n; + struct rt_trie_node *n; struct tnode *pn; int pos, bits; t_key key = ntohl(flp->fl4_dst); @@ -1398,7 +1398,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, /* Just a leaf? */ if (IS_LEAF(n)) { - ret = check_leaf(t, (struct leaf *)n, key, flp, res, fib_flags); + ret = check_leaf(tb, t, (struct leaf *)n, key, flp, res, fib_flags); goto found; } @@ -1423,7 +1423,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, } if (IS_LEAF(n)) { - ret = check_leaf(t, (struct leaf *)n, key, flp, res, fib_flags); + ret = check_leaf(tb, t, (struct leaf *)n, key, flp, res, fib_flags); if (ret > 0) goto backtrace; goto found; @@ -1541,7 +1541,7 @@ backtrace: if (chopped_off <= pn->bits) { cindex &= ~(1 << (chopped_off-1)); } else { - struct tnode *parent = node_parent_rcu((struct node *) pn); + struct tnode *parent = node_parent_rcu((struct rt_trie_node *) pn); if (!parent) goto failed; @@ -1568,7 +1568,7 @@ found: */ static void trie_leaf_remove(struct trie *t, struct leaf *l) { - struct tnode *tp = node_parent((struct node *) l); + struct tnode *tp = node_parent((struct rt_trie_node *) l); pr_debug("entering trie_leaf_remove(%p)\n", l); @@ -1706,7 +1706,7 @@ static int trie_flush_leaf(struct leaf *l) * Scan for the next right leaf starting at node p->child[idx] * Since we have back pointer, no recursion necessary. */ -static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c) +static struct leaf *leaf_walk_rcu(struct tnode *p, struct rt_trie_node *c) { do { t_key idx; @@ -1732,7 +1732,7 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c) } /* Node empty, walk back up to parent */ - c = (struct node *) p; + c = (struct rt_trie_node *) p; } while ((p = node_parent_rcu(c)) != NULL); return NULL; /* Root of trie */ @@ -1753,7 +1753,7 @@ static struct leaf *trie_firstleaf(struct trie *t) static struct leaf *trie_nextleaf(struct leaf *l) { - struct node *c = (struct node *) l; + struct rt_trie_node *c = (struct rt_trie_node *) l; struct tnode *p = node_parent_rcu(c); if (!p) @@ -1802,80 +1802,6 @@ void fib_free_table(struct fib_table *tb) kfree(tb); } -void fib_table_select_default(struct fib_table *tb, - const struct flowi *flp, - struct fib_result *res) -{ - struct trie *t = (struct trie *) tb->tb_data; - int order, last_idx; - struct fib_info *fi = NULL; - struct fib_info *last_resort; - struct fib_alias *fa = NULL; - struct list_head *fa_head; - struct leaf *l; - - last_idx = -1; - last_resort = NULL; - order = -1; - - rcu_read_lock(); - - l = fib_find_node(t, 0); - if (!l) - goto out; - - fa_head = get_fa_head(l, 0); - if (!fa_head) - goto out; - - if (list_empty(fa_head)) - goto out; - - list_for_each_entry_rcu(fa, fa_head, fa_list) { - struct fib_info *next_fi = fa->fa_info; - - if (fa->fa_scope != res->scope || - fa->fa_type != RTN_UNICAST) - continue; - - if (next_fi->fib_priority > res->fi->fib_priority) - break; - if (!next_fi->fib_nh[0].nh_gw || - next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) - continue; - - fib_alias_accessed(fa); - - if (fi == NULL) { - if (next_fi != res->fi) - break; - } else if (!fib_detect_death(fi, order, &last_resort, - &last_idx, tb->tb_default)) { - fib_result_assign(res, fi); - tb->tb_default = order; - goto out; - } - fi = next_fi; - order++; - } - if (order <= 0 || fi == NULL) { - tb->tb_default = -1; - goto out; - } - - if (!fib_detect_death(fi, order, &last_resort, &last_idx, - tb->tb_default)) { - fib_result_assign(res, fi); - tb->tb_default = order; - goto out; - } - if (last_idx >= 0) - fib_result_assign(res, last_resort); - tb->tb_default = last_idx; -out: - rcu_read_unlock(); -} - static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb) @@ -1990,7 +1916,7 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, return skb->len; } -void __init fib_hash_init(void) +void __init fib_trie_init(void) { fn_alias_kmem = kmem_cache_create("ip_fib_alias", sizeof(struct fib_alias), @@ -2003,8 +1929,7 @@ void __init fib_hash_init(void) } -/* Fix more generic FIB names for init later */ -struct fib_table *fib_hash_table(u32 id) +struct fib_table *fib_trie_table(u32 id) { struct fib_table *tb; struct trie *t; @@ -2036,7 +1961,7 @@ struct fib_trie_iter { unsigned int depth; }; -static struct node *fib_trie_get_next(struct fib_trie_iter *iter) +static struct rt_trie_node *fib_trie_get_next(struct fib_trie_iter *iter) { struct tnode *tn = iter->tnode; unsigned int cindex = iter->index; @@ -2050,7 +1975,7 @@ static struct node *fib_trie_get_next(struct fib_trie_iter *iter) iter->tnode, iter->index, iter->depth); rescan: while (cindex < (1<<tn->bits)) { - struct node *n = tnode_get_child_rcu(tn, cindex); + struct rt_trie_node *n = tnode_get_child_rcu(tn, cindex); if (n) { if (IS_LEAF(n)) { @@ -2069,7 +1994,7 @@ rescan: } /* Current node exhausted, pop back up */ - p = node_parent_rcu((struct node *)tn); + p = node_parent_rcu((struct rt_trie_node *)tn); if (p) { cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1; tn = p; @@ -2081,10 +2006,10 @@ rescan: return NULL; } -static struct node *fib_trie_get_first(struct fib_trie_iter *iter, +static struct rt_trie_node *fib_trie_get_first(struct fib_trie_iter *iter, struct trie *t) { - struct node *n; + struct rt_trie_node *n; if (!t) return NULL; @@ -2108,7 +2033,7 @@ static struct node *fib_trie_get_first(struct fib_trie_iter *iter, static void trie_collect_stats(struct trie *t, struct trie_stat *s) { - struct node *n; + struct rt_trie_node *n; struct fib_trie_iter iter; memset(s, 0, sizeof(*s)); @@ -2181,7 +2106,7 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) seq_putc(seq, '\n'); seq_printf(seq, "\tPointers: %u\n", pointers); - bytes += sizeof(struct node *) * pointers; + bytes += sizeof(struct rt_trie_node *) * pointers; seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers); seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024); } @@ -2262,7 +2187,7 @@ static const struct file_operations fib_triestat_fops = { .release = single_release_net, }; -static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos) +static struct rt_trie_node *fib_trie_get_idx(struct seq_file *seq, loff_t pos) { struct fib_trie_iter *iter = seq->private; struct net *net = seq_file_net(seq); @@ -2275,7 +2200,7 @@ static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos) struct fib_table *tb; hlist_for_each_entry_rcu(tb, node, head, tb_hlist) { - struct node *n; + struct rt_trie_node *n; for (n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); @@ -2304,7 +2229,7 @@ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) struct fib_table *tb = iter->tb; struct hlist_node *tb_node; unsigned int h; - struct node *n; + struct rt_trie_node *n; ++*pos; /* next node in same table */ @@ -2390,7 +2315,7 @@ static inline const char *rtn_type(char *buf, size_t len, unsigned int t) static int fib_trie_seq_show(struct seq_file *seq, void *v) { const struct fib_trie_iter *iter = seq->private; - struct node *n = v; + struct rt_trie_node *n = v; if (!node_parent_rcu(n)) fib_table_print(seq, iter->tb); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 4aa1b7f01ea0..ad2bcf1b69ae 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -233,48 +233,11 @@ static inline void icmp_xmit_unlock(struct sock *sk) * Send an ICMP frame. */ -/* - * Check transmit rate limitation for given message. - * The rate information is held in the destination cache now. - * This function is generic and could be used for other purposes - * too. It uses a Token bucket filter as suggested by Alexey Kuznetsov. - * - * Note that the same dst_entry fields are modified by functions in - * route.c too, but these work for packet destinations while xrlim_allow - * works for icmp destinations. This means the rate limiting information - * for one "ip object" is shared - and these ICMPs are twice limited: - * by source and by destination. - * - * RFC 1812: 4.3.2.8 SHOULD be able to limit error message rate - * SHOULD allow setting of rate limits - * - * Shared between ICMPv4 and ICMPv6. - */ -#define XRLIM_BURST_FACTOR 6 -int xrlim_allow(struct dst_entry *dst, int timeout) -{ - unsigned long now, token = dst->rate_tokens; - int rc = 0; - - now = jiffies; - token += now - dst->rate_last; - dst->rate_last = now; - if (token > XRLIM_BURST_FACTOR * timeout) - token = XRLIM_BURST_FACTOR * timeout; - if (token >= timeout) { - token -= timeout; - rc = 1; - } - dst->rate_tokens = token; - return rc; -} -EXPORT_SYMBOL(xrlim_allow); - -static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt, +static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, int type, int code) { struct dst_entry *dst = &rt->dst; - int rc = 1; + bool rc = true; if (type > NR_ICMP_TYPES) goto out; @@ -288,8 +251,12 @@ static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt, goto out; /* Limit if icmp type is enabled in ratemask. */ - if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) - rc = xrlim_allow(dst, net->ipv4.sysctl_icmp_ratelimit); + if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) { + if (!rt->peer) + rt_bind_peer(rt, 1); + rc = inet_peer_xrlim_allow(rt->peer, + net->ipv4.sysctl_icmp_ratelimit); + } out: return rc; } diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 2746c1fa6417..2ada17129fce 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -858,7 +858,7 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) nlmsg_len(nlh) < hdrlen) return -EINVAL; - if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) { + if (nlh->nlmsg_flags & NLM_F_DUMP) { if (nlmsg_attrlen(nlh, hdrlen)) { struct nlattr *attr; diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index d9bc85751c74..709fbb4132d7 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -475,7 +475,7 @@ static int cleanup_once(unsigned long ttl) struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) { struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; - struct inet_peer_base *base = family_to_base(AF_INET); + struct inet_peer_base *base = family_to_base(daddr->family); struct inet_peer *p; /* Look up for the address quickly, lockless. @@ -512,6 +512,9 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) atomic_set(&p->rid, 0); atomic_set(&p->ip_id_count, secure_ip_id(daddr->a4)); p->tcp_ts_stamp = 0; + p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; + p->rate_tokens = 0; + p->rate_last = 0; INIT_LIST_HEAD(&p->unused); @@ -579,3 +582,44 @@ void inet_putpeer(struct inet_peer *p) local_bh_enable(); } EXPORT_SYMBOL_GPL(inet_putpeer); + +/* + * Check transmit rate limitation for given message. + * The rate information is held in the inet_peer entries now. + * This function is generic and could be used for other purposes + * too. It uses a Token bucket filter as suggested by Alexey Kuznetsov. + * + * Note that the same inet_peer fields are modified by functions in + * route.c too, but these work for packet destinations while xrlim_allow + * works for icmp destinations. This means the rate limiting information + * for one "ip object" is shared - and these ICMPs are twice limited: + * by source and by destination. + * + * RFC 1812: 4.3.2.8 SHOULD be able to limit error message rate + * SHOULD allow setting of rate limits + * + * Shared between ICMPv4 and ICMPv6. + */ +#define XRLIM_BURST_FACTOR 6 +bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout) +{ + unsigned long now, token; + bool rc = false; + + if (!peer) + return true; + + token = peer->rate_tokens; + now = jiffies; + token += now - peer->rate_last; + peer->rate_last = now; + if (token > XRLIM_BURST_FACTOR * timeout) + token = XRLIM_BURST_FACTOR * timeout; + if (token >= timeout) { + token -= timeout; + rc = true; + } + peer->rate_tokens = token; + return rc; +} +EXPORT_SYMBOL(inet_peer_xrlim_allow); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 3f3a9afd73e0..8b65a12654e7 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -60,6 +60,7 @@ #include <linux/notifier.h> #include <linux/if_arp.h> #include <linux/netfilter_ipv4.h> +#include <linux/compat.h> #include <net/ipip.h> #include <net/checksum.h> #include <net/netlink.h> @@ -1434,6 +1435,81 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) } } +#ifdef CONFIG_COMPAT +struct compat_sioc_sg_req { + struct in_addr src; + struct in_addr grp; + compat_ulong_t pktcnt; + compat_ulong_t bytecnt; + compat_ulong_t wrong_if; +}; + +struct compat_sioc_vif_req { + vifi_t vifi; /* Which iface */ + compat_ulong_t icount; + compat_ulong_t ocount; + compat_ulong_t ibytes; + compat_ulong_t obytes; +}; + +int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) +{ + struct compat_sioc_sg_req sr; + struct compat_sioc_vif_req vr; + struct vif_device *vif; + struct mfc_cache *c; + struct net *net = sock_net(sk); + struct mr_table *mrt; + + mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); + if (mrt == NULL) + return -ENOENT; + + switch (cmd) { + case SIOCGETVIFCNT: + if (copy_from_user(&vr, arg, sizeof(vr))) + return -EFAULT; + if (vr.vifi >= mrt->maxvif) + return -EINVAL; + read_lock(&mrt_lock); + vif = &mrt->vif_table[vr.vifi]; + if (VIF_EXISTS(mrt, vr.vifi)) { + vr.icount = vif->pkt_in; + vr.ocount = vif->pkt_out; + vr.ibytes = vif->bytes_in; + vr.obytes = vif->bytes_out; + read_unlock(&mrt_lock); + + if (copy_to_user(arg, &vr, sizeof(vr))) + return -EFAULT; + return 0; + } + read_unlock(&mrt_lock); + return -EADDRNOTAVAIL; + case SIOCGETSGCNT: + if (copy_from_user(&sr, arg, sizeof(sr))) + return -EFAULT; + + rcu_read_lock(); + c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr); + if (c) { + sr.pktcnt = c->mfc_un.res.pkt; + sr.bytecnt = c->mfc_un.res.bytes; + sr.wrong_if = c->mfc_un.res.wrong_if; + rcu_read_unlock(); + + if (copy_to_user(arg, &sr, sizeof(sr))) + return -EFAULT; + return 0; + } + rcu_read_unlock(); + return -EADDRNOTAVAIL; + default: + return -ENOIOCTLCMD; + } +} +#endif + static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) { diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c index b8ddcc480ed9..a5e52a9f0a12 100644 --- a/net/ipv4/netfilter/arpt_mangle.c +++ b/net/ipv4/netfilter/arpt_mangle.c @@ -60,12 +60,12 @@ static int checkentry(const struct xt_tgchk_param *par) if (mangle->flags & ~ARPT_MANGLE_MASK || !(mangle->flags & ARPT_MANGLE_MASK)) - return false; + return -EINVAL; if (mangle->target != NF_DROP && mangle->target != NF_ACCEPT && mangle->target != XT_CONTINUE) - return false; - return true; + return -EINVAL; + return 0; } static struct xt_target arpt_mangle_reg __read_mostly = { diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index a3d5ab786e81..6390ba299b3d 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -76,6 +76,7 @@ #include <linux/seq_file.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> +#include <linux/compat.h> static struct raw_hashinfo raw_v4_hashinfo = { .lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock), @@ -838,6 +839,23 @@ static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg) } } +#ifdef CONFIG_COMPAT +static int compat_raw_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case SIOCOUTQ: + case SIOCINQ: + return -ENOIOCTLCMD; + default: +#ifdef CONFIG_IP_MROUTE + return ipmr_compat_ioctl(sk, cmd, compat_ptr(arg)); +#else + return -ENOIOCTLCMD; +#endif + } +} +#endif + struct proto raw_prot = { .name = "RAW", .owner = THIS_MODULE, @@ -860,6 +878,7 @@ struct proto raw_prot = { #ifdef CONFIG_COMPAT .compat_setsockopt = compat_raw_setsockopt, .compat_getsockopt = compat_raw_getsockopt, + .compat_ioctl = compat_raw_ioctl, #endif }; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 3e5b7cc2db4f..0455af851751 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -152,6 +152,41 @@ static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, { } +static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) +{ + struct rtable *rt = (struct rtable *) dst; + struct inet_peer *peer; + u32 *p = NULL; + + if (!rt->peer) + rt_bind_peer(rt, 1); + + peer = rt->peer; + if (peer) { + u32 *old_p = __DST_METRICS_PTR(old); + unsigned long prev, new; + + p = peer->metrics; + if (inet_metrics_new(peer)) + memcpy(p, old_p, sizeof(u32) * RTAX_MAX); + + new = (unsigned long) p; + prev = cmpxchg(&dst->_metrics, old, new); + + if (prev != old) { + p = __DST_METRICS_PTR(prev); + if (prev & DST_METRICS_READ_ONLY) + p = NULL; + } else { + if (rt->fi) { + fib_info_put(rt->fi); + rt->fi = NULL; + } + } + } + return p; +} + static struct dst_ops ipv4_dst_ops = { .family = AF_INET, .protocol = cpu_to_be16(ETH_P_IP), @@ -159,6 +194,7 @@ static struct dst_ops ipv4_dst_ops = { .check = ipv4_dst_check, .default_advmss = ipv4_default_advmss, .default_mtu = ipv4_default_mtu, + .cow_metrics = ipv4_cow_metrics, .destroy = ipv4_dst_destroy, .ifdown = ipv4_dst_ifdown, .negative_advice = ipv4_negative_advice, @@ -1441,6 +1477,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, if (rt->peer) atomic_inc(&rt->peer->refcnt); + if (rt->fi) + atomic_inc(&rt->fi->fib_clntref); if (arp_bind_neighbour(&rt->dst) || !(rt->dst.neighbour->nud_state & @@ -1525,6 +1563,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) { struct rtable *rt = skb_rtable(skb); struct in_device *in_dev; + struct inet_peer *peer; int log_martians; rcu_read_lock(); @@ -1536,33 +1575,41 @@ void ip_rt_send_redirect(struct sk_buff *skb) log_martians = IN_DEV_LOG_MARTIANS(in_dev); rcu_read_unlock(); + if (!rt->peer) + rt_bind_peer(rt, 1); + peer = rt->peer; + if (!peer) { + icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); + return; + } + /* No redirected packets during ip_rt_redirect_silence; * reset the algorithm. */ - if (time_after(jiffies, rt->dst.rate_last + ip_rt_redirect_silence)) - rt->dst.rate_tokens = 0; + if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) + peer->rate_tokens = 0; /* Too many ignored redirects; do not send anything * set dst.rate_last to the last seen redirected packet. */ - if (rt->dst.rate_tokens >= ip_rt_redirect_number) { - rt->dst.rate_last = jiffies; + if (peer->rate_tokens >= ip_rt_redirect_number) { + peer->rate_last = jiffies; return; } /* Check for load limit; set rate_last to the latest sent * redirect. */ - if (rt->dst.rate_tokens == 0 || + if (peer->rate_tokens == 0 || time_after(jiffies, - (rt->dst.rate_last + - (ip_rt_redirect_load << rt->dst.rate_tokens)))) { + (peer->rate_last + + (ip_rt_redirect_load << peer->rate_tokens)))) { icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); - rt->dst.rate_last = jiffies; - ++rt->dst.rate_tokens; + peer->rate_last = jiffies; + ++peer->rate_tokens; #ifdef CONFIG_IP_ROUTE_VERBOSE if (log_martians && - rt->dst.rate_tokens == ip_rt_redirect_number && + peer->rate_tokens == ip_rt_redirect_number && net_ratelimit()) printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", &rt->rt_src, rt->rt_iif, @@ -1574,7 +1621,9 @@ void ip_rt_send_redirect(struct sk_buff *skb) static int ip_error(struct sk_buff *skb) { struct rtable *rt = skb_rtable(skb); + struct inet_peer *peer; unsigned long now; + bool send; int code; switch (rt->dst.error) { @@ -1594,15 +1643,24 @@ static int ip_error(struct sk_buff *skb) break; } - now = jiffies; - rt->dst.rate_tokens += now - rt->dst.rate_last; - if (rt->dst.rate_tokens > ip_rt_error_burst) - rt->dst.rate_tokens = ip_rt_error_burst; - rt->dst.rate_last = now; - if (rt->dst.rate_tokens >= ip_rt_error_cost) { - rt->dst.rate_tokens -= ip_rt_error_cost; - icmp_send(skb, ICMP_DEST_UNREACH, code, 0); + if (!rt->peer) + rt_bind_peer(rt, 1); + peer = rt->peer; + + send = true; + if (peer) { + now = jiffies; + peer->rate_tokens += now - peer->rate_last; + if (peer->rate_tokens > ip_rt_error_burst) + peer->rate_tokens = ip_rt_error_burst; + peer->rate_last = now; + if (peer->rate_tokens >= ip_rt_error_cost) + peer->rate_tokens -= ip_rt_error_cost; + else + send = false; } + if (send) + icmp_send(skb, ICMP_DEST_UNREACH, code, 0); out: kfree_skb(skb); return 0; @@ -1704,7 +1762,6 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) } dst_metric_set(dst, RTAX_MTU, mtu); dst_set_expires(dst, ip_rt_mtu_expires); - call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); } } @@ -1720,6 +1777,10 @@ static void ipv4_dst_destroy(struct dst_entry *dst) struct rtable *rt = (struct rtable *) dst; struct inet_peer *peer = rt->peer; + if (rt->fi) { + fib_info_put(rt->fi); + rt->fi = NULL; + } if (peer) { rt->peer = NULL; inet_putpeer(peer); @@ -1815,6 +1876,33 @@ static unsigned int ipv4_default_mtu(const struct dst_entry *dst) return mtu; } +static void rt_init_metrics(struct rtable *rt, struct fib_info *fi) +{ + struct inet_peer *peer; + int create = 0; + + /* If a peer entry exists for this destination, we must hook + * it up in order to get at cached metrics. + */ + if (rt->fl.flags & FLOWI_FLAG_PRECOW_METRICS) + create = 1; + + rt_bind_peer(rt, create); + peer = rt->peer; + if (peer) { + if (inet_metrics_new(peer)) + memcpy(peer->metrics, fi->fib_metrics, + sizeof(u32) * RTAX_MAX); + dst_init_metrics(&rt->dst, peer->metrics, false); + } else { + if (fi->fib_metrics != (u32 *) dst_default_metrics) { + rt->fi = fi; + atomic_inc(&fi->fib_clntref); + } + dst_init_metrics(&rt->dst, fi->fib_metrics, true); + } +} + static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) { struct dst_entry *dst = &rt->dst; @@ -1824,7 +1912,7 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) rt->rt_gateway = FIB_RES_GW(*res); - dst_import_metrics(dst, fi->fib_metrics); + rt_init_metrics(rt, fi); #ifdef CONFIG_IP_ROUTE_CLASSID dst->tclassid = FIB_RES_NH(*res).nh_tclassid; #endif @@ -2645,7 +2733,7 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, else #endif if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif) - fib_select_default(net, &fl, &res); + fib_select_default(&res); if (!fl.fl4_src) fl.fl4_src = FIB_RES_PREFSRC(res); @@ -2707,6 +2795,11 @@ static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 coo return NULL; } +static unsigned int ipv4_blackhole_default_mtu(const struct dst_entry *dst) +{ + return 0; +} + static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) { } @@ -2716,6 +2809,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = { .protocol = cpu_to_be16(ETH_P_IP), .destroy = ipv4_dst_destroy, .check = ipv4_blackhole_dst_check, + .default_mtu = ipv4_blackhole_default_mtu, .update_pmtu = ipv4_rt_blackhole_update_pmtu, }; @@ -2752,6 +2846,9 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi rt->peer = ort->peer; if (rt->peer) atomic_inc(&rt->peer->refcnt); + rt->fi = ort->fi; + if (rt->fi) + atomic_inc(&rt->fi->fib_clntref); dst_free(new); } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 6c11eece262c..f9867d2dbef4 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2653,7 +2653,7 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname, EXPORT_SYMBOL(compat_tcp_getsockopt); #endif -struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) +struct sk_buff *tcp_tso_segment(struct sk_buff *skb, u32 features) { struct sk_buff *segs = ERR_PTR(-EINVAL); struct tcphdr *th; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2549b29b062d..2f692cefd3b0 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -817,7 +817,7 @@ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst) __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0); if (!cwnd) - cwnd = rfc3390_bytes_to_packets(tp->mss_cache); + cwnd = TCP_INIT_CWND; return min_t(__u32, cwnd, tp->snd_cwnd_clamp); } @@ -4399,7 +4399,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) if (!skb_copy_datagram_iovec(skb, 0, tp->ucopy.iov, chunk)) { tp->ucopy.len -= chunk; tp->copied_seq += chunk; - eaten = (chunk == skb->len && !th->fin); + eaten = (chunk == skb->len); tcp_rcv_space_adjust(sk); } local_bh_disable(); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 856f68466d49..02f583b3744a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1994,7 +1994,6 @@ static void *listening_get_next(struct seq_file *seq, void *cur) } req = req->dl_next; } - st->offset = 0; if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries) break; get_req: diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 8157b17959ee..d37baaa1dbe3 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2199,7 +2199,7 @@ int udp4_ufo_send_check(struct sk_buff *skb) return 0; } -struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, int features) +struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, u32 features) { struct sk_buff *segs = ERR_PTR(-EINVAL); unsigned int mss; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index b057d40addec..19fbdec6baaa 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -196,8 +196,11 @@ static void xfrm4_dst_destroy(struct dst_entry *dst) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; + dst_destroy_metrics_generic(dst); + if (likely(xdst->u.rt.peer)) inet_putpeer(xdst->u.rt.peer); + xfrm_dst_destroy(xdst); } @@ -215,6 +218,7 @@ static struct dst_ops xfrm4_dst_ops = { .protocol = cpu_to_be16(ETH_P_IP), .gc = xfrm4_garbage_collect, .update_pmtu = xfrm4_update_pmtu, + .cow_metrics = dst_cow_metrics_generic, .destroy = xfrm4_dst_destroy, .ifdown = xfrm4_dst_ifdown, .local_out = __ip_local_out, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 24a1cf110d80..fd6782e3a038 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2661,14 +2661,12 @@ static int addrconf_ifdown(struct net_device *dev, int how) struct net *net = dev_net(dev); struct inet6_dev *idev; struct inet6_ifaddr *ifa; - LIST_HEAD(keep_list); - int state; + int state, i; ASSERT_RTNL(); - /* Flush routes if device is being removed or it is not loopback */ - if (how || !(dev->flags & IFF_LOOPBACK)) - rt6_ifdown(net, dev); + rt6_ifdown(net, dev); + neigh_ifdown(&nd_tbl, dev); idev = __in6_dev_get(dev); if (idev == NULL) @@ -2689,6 +2687,23 @@ static int addrconf_ifdown(struct net_device *dev, int how) } + /* Step 2: clear hash table */ + for (i = 0; i < IN6_ADDR_HSIZE; i++) { + struct hlist_head *h = &inet6_addr_lst[i]; + struct hlist_node *n; + + spin_lock_bh(&addrconf_hash_lock); + restart: + hlist_for_each_entry_rcu(ifa, n, h, addr_lst) { + if (ifa->idev == idev) { + hlist_del_init_rcu(&ifa->addr_lst); + addrconf_del_timer(ifa); + goto restart; + } + } + spin_unlock_bh(&addrconf_hash_lock); + } + write_lock_bh(&idev->lock); /* Step 2: clear flags for stateless addrconf */ @@ -2722,52 +2737,23 @@ static int addrconf_ifdown(struct net_device *dev, int how) struct inet6_ifaddr, if_list); addrconf_del_timer(ifa); - /* If just doing link down, and address is permanent - and not link-local, then retain it. */ - if (!how && - (ifa->flags&IFA_F_PERMANENT) && - !(ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)) { - list_move_tail(&ifa->if_list, &keep_list); - - /* If not doing DAD on this address, just keep it. */ - if ((dev->flags&(IFF_NOARP|IFF_LOOPBACK)) || - idev->cnf.accept_dad <= 0 || - (ifa->flags & IFA_F_NODAD)) - continue; + list_del(&ifa->if_list); - /* If it was tentative already, no need to notify */ - if (ifa->flags & IFA_F_TENTATIVE) - continue; + write_unlock_bh(&idev->lock); - /* Flag it for later restoration when link comes up */ - ifa->flags |= IFA_F_TENTATIVE; - ifa->state = INET6_IFADDR_STATE_DAD; - } else { - list_del(&ifa->if_list); - - /* clear hash table */ - spin_lock_bh(&addrconf_hash_lock); - hlist_del_init_rcu(&ifa->addr_lst); - spin_unlock_bh(&addrconf_hash_lock); - - write_unlock_bh(&idev->lock); - spin_lock_bh(&ifa->state_lock); - state = ifa->state; - ifa->state = INET6_IFADDR_STATE_DEAD; - spin_unlock_bh(&ifa->state_lock); - - if (state != INET6_IFADDR_STATE_DEAD) { - __ipv6_ifa_notify(RTM_DELADDR, ifa); - atomic_notifier_call_chain(&inet6addr_chain, - NETDEV_DOWN, ifa); - } + spin_lock_bh(&ifa->state_lock); + state = ifa->state; + ifa->state = INET6_IFADDR_STATE_DEAD; + spin_unlock_bh(&ifa->state_lock); - in6_ifa_put(ifa); - write_lock_bh(&idev->lock); + if (state != INET6_IFADDR_STATE_DEAD) { + __ipv6_ifa_notify(RTM_DELADDR, ifa); + atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa); } - } + in6_ifa_put(ifa); - list_splice(&keep_list, &idev->addr_list); + write_lock_bh(&idev->lock); + } write_unlock_bh(&idev->lock); @@ -4156,8 +4142,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) addrconf_leave_solict(ifp->idev, &ifp->addr); dst_hold(&ifp->rt->dst); - if (ifp->state == INET6_IFADDR_STATE_DEAD && - ip6_del_rt(ifp->rt)) + if (ip6_del_rt(ifp->rt)) dst_free(&ifp->rt->dst); break; } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 978e80e2c4a8..3194aa909872 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -772,7 +772,7 @@ out: return err; } -static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features) +static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, u32 features) { struct sk_buff *segs = ERR_PTR(-EINVAL); struct ipv6hdr *ipv6h; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 03e62f94ff8e..a31d91b04c87 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -157,20 +157,20 @@ static int is_ineligible(struct sk_buff *skb) /* * Check the ICMP output rate limit */ -static inline int icmpv6_xrlim_allow(struct sock *sk, u8 type, - struct flowi *fl) +static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type, + struct flowi *fl) { struct dst_entry *dst; struct net *net = sock_net(sk); - int res = 0; + bool res = false; /* Informational messages are not limited. */ if (type & ICMPV6_INFOMSG_MASK) - return 1; + return true; /* Do not limit pmtu discovery, it would break it. */ if (type == ICMPV6_PKT_TOOBIG) - return 1; + return true; /* * Look up the output route. @@ -182,7 +182,7 @@ static inline int icmpv6_xrlim_allow(struct sock *sk, u8 type, IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) { - res = 1; + res = true; } else { struct rt6_info *rt = (struct rt6_info *)dst; int tmo = net->ipv6.sysctl.icmpv6_time; @@ -191,7 +191,9 @@ static inline int icmpv6_xrlim_allow(struct sock *sk, u8 type, if (rt->rt6i_dst.plen < 128) tmo >>= ((128 - rt->rt6i_dst.plen)>>5); - res = xrlim_allow(dst, tmo); + if (!rt->rt6i_peer) + rt6_bind_peer(rt, 1); + res = inet_peer_xrlim_allow(rt->rt6i_peer, tmo); } dst_release(dst); return res; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 5f8d242be3f3..2600e2288724 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -479,10 +479,13 @@ int ip6_forward(struct sk_buff *skb) else target = &hdr->daddr; + if (!rt->rt6i_peer) + rt6_bind_peer(rt, 1); + /* Limit redirects both by destination (here) and by source (inside ndisc_send_redirect) */ - if (xrlim_allow(dst, 1*HZ)) + if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ)) ndisc_send_redirect(skb, n, target); } else { int addrtype = ipv6_addr_type(&hdr->saddr); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 9fab274019c0..0e1d53bcf1e0 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -34,6 +34,7 @@ #include <linux/seq_file.h> #include <linux/init.h> #include <linux/slab.h> +#include <linux/compat.h> #include <net/protocol.h> #include <linux/skbuff.h> #include <net/sock.h> @@ -1804,6 +1805,80 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) } } +#ifdef CONFIG_COMPAT +struct compat_sioc_sg_req6 { + struct sockaddr_in6 src; + struct sockaddr_in6 grp; + compat_ulong_t pktcnt; + compat_ulong_t bytecnt; + compat_ulong_t wrong_if; +}; + +struct compat_sioc_mif_req6 { + mifi_t mifi; + compat_ulong_t icount; + compat_ulong_t ocount; + compat_ulong_t ibytes; + compat_ulong_t obytes; +}; + +int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) +{ + struct compat_sioc_sg_req6 sr; + struct compat_sioc_mif_req6 vr; + struct mif_device *vif; + struct mfc6_cache *c; + struct net *net = sock_net(sk); + struct mr6_table *mrt; + + mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); + if (mrt == NULL) + return -ENOENT; + + switch (cmd) { + case SIOCGETMIFCNT_IN6: + if (copy_from_user(&vr, arg, sizeof(vr))) + return -EFAULT; + if (vr.mifi >= mrt->maxvif) + return -EINVAL; + read_lock(&mrt_lock); + vif = &mrt->vif6_table[vr.mifi]; + if (MIF_EXISTS(mrt, vr.mifi)) { + vr.icount = vif->pkt_in; + vr.ocount = vif->pkt_out; + vr.ibytes = vif->bytes_in; + vr.obytes = vif->bytes_out; + read_unlock(&mrt_lock); + + if (copy_to_user(arg, &vr, sizeof(vr))) + return -EFAULT; + return 0; + } + read_unlock(&mrt_lock); + return -EADDRNOTAVAIL; + case SIOCGETSGCNT_IN6: + if (copy_from_user(&sr, arg, sizeof(sr))) + return -EFAULT; + + read_lock(&mrt_lock); + c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr); + if (c) { + sr.pktcnt = c->mfc_un.res.pkt; + sr.bytecnt = c->mfc_un.res.bytes; + sr.wrong_if = c->mfc_un.res.wrong_if; + read_unlock(&mrt_lock); + + if (copy_to_user(arg, &sr, sizeof(sr))) + return -EFAULT; + return 0; + } + read_unlock(&mrt_lock); + return -EADDRNOTAVAIL; + default: + return -ENOIOCTLCMD; + } +} +#endif static inline int ip6mr_forward2_finish(struct sk_buff *skb) { diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 2342545a5ee9..7254ce364006 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1553,7 +1553,9 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, "ICMPv6 Redirect: destination is not a neighbour.\n"); goto release; } - if (!xrlim_allow(dst, 1*HZ)) + if (!rt->rt6i_peer) + rt6_bind_peer(rt, 1); + if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ)) goto release; if (dev->addr_len) { diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 86c39526ba5e..364e86683388 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -31,6 +31,7 @@ #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> #include <linux/skbuff.h> +#include <linux/compat.h> #include <asm/uaccess.h> #include <asm/ioctls.h> @@ -123,18 +124,18 @@ static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb) } #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) -static int (*mh_filter)(struct sock *sock, struct sk_buff *skb); +typedef int mh_filter_t(struct sock *sock, struct sk_buff *skb); -int rawv6_mh_filter_register(int (*filter)(struct sock *sock, - struct sk_buff *skb)) +static mh_filter_t __rcu *mh_filter __read_mostly; + +int rawv6_mh_filter_register(mh_filter_t filter) { rcu_assign_pointer(mh_filter, filter); return 0; } EXPORT_SYMBOL(rawv6_mh_filter_register); -int rawv6_mh_filter_unregister(int (*filter)(struct sock *sock, - struct sk_buff *skb)) +int rawv6_mh_filter_unregister(mh_filter_t filter) { rcu_assign_pointer(mh_filter, NULL); synchronize_rcu(); @@ -192,10 +193,10 @@ static int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) * policy is placed in rawv6_rcv() because it is * required for each socket. */ - int (*filter)(struct sock *sock, struct sk_buff *skb); + mh_filter_t *filter; filter = rcu_dereference(mh_filter); - filtered = filter ? filter(sk, skb) : 0; + filtered = filter ? (*filter)(sk, skb) : 0; break; } #endif @@ -1157,6 +1158,23 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg) } } +#ifdef CONFIG_COMPAT +static int compat_rawv6_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case SIOCOUTQ: + case SIOCINQ: + return -ENOIOCTLCMD; + default: +#ifdef CONFIG_IPV6_MROUTE + return ip6mr_compat_ioctl(sk, cmd, compat_ptr(arg)); +#else + return -ENOIOCTLCMD; +#endif + } +} +#endif + static void rawv6_close(struct sock *sk, long timeout) { if (inet_sk(sk)->inet_num == IPPROTO_RAW) @@ -1215,6 +1233,7 @@ struct proto rawv6_prot = { #ifdef CONFIG_COMPAT .compat_setsockopt = compat_rawv6_setsockopt, .compat_getsockopt = compat_rawv6_getsockopt, + .compat_ioctl = compat_rawv6_ioctl, #endif }; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 373bd0416f69..12ec83d48806 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -72,8 +72,6 @@ #define RT6_TRACE(x...) do { ; } while (0) #endif -#define CLONE_OFFLINK_ROUTE 0 - static struct rt6_info * ip6_rt_copy(struct rt6_info *ort); static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); static unsigned int ip6_default_advmss(const struct dst_entry *dst); @@ -99,6 +97,36 @@ static struct rt6_info *rt6_get_route_info(struct net *net, struct in6_addr *gwaddr, int ifindex); #endif +static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) +{ + struct rt6_info *rt = (struct rt6_info *) dst; + struct inet_peer *peer; + u32 *p = NULL; + + if (!rt->rt6i_peer) + rt6_bind_peer(rt, 1); + + peer = rt->rt6i_peer; + if (peer) { + u32 *old_p = __DST_METRICS_PTR(old); + unsigned long prev, new; + + p = peer->metrics; + if (inet_metrics_new(peer)) + memcpy(p, old_p, sizeof(u32) * RTAX_MAX); + + new = (unsigned long) p; + prev = cmpxchg(&dst->_metrics, old, new); + + if (prev != old) { + p = __DST_METRICS_PTR(prev); + if (prev & DST_METRICS_READ_ONLY) + p = NULL; + } + } + return p; +} + static struct dst_ops ip6_dst_ops_template = { .family = AF_INET6, .protocol = cpu_to_be16(ETH_P_IPV6), @@ -107,6 +135,7 @@ static struct dst_ops ip6_dst_ops_template = { .check = ip6_dst_check, .default_advmss = ip6_default_advmss, .default_mtu = ip6_default_mtu, + .cow_metrics = ipv6_cow_metrics, .destroy = ip6_dst_destroy, .ifdown = ip6_dst_ifdown, .negative_advice = ip6_negative_advice, @@ -115,6 +144,11 @@ static struct dst_ops ip6_dst_ops_template = { .local_out = __ip6_local_out, }; +static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst) +{ + return 0; +} + static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) { } @@ -124,9 +158,14 @@ static struct dst_ops ip6_dst_blackhole_ops = { .protocol = cpu_to_be16(ETH_P_IPV6), .destroy = ip6_dst_destroy, .check = ip6_dst_check, + .default_mtu = ip6_blackhole_default_mtu, .update_pmtu = ip6_rt_blackhole_update_pmtu, }; +static const u32 ip6_template_metrics[RTAX_MAX] = { + [RTAX_HOPLIMIT - 1] = 255, +}; + static struct rt6_info ip6_null_entry_template = { .dst = { .__refcnt = ATOMIC_INIT(1), @@ -196,7 +235,6 @@ static void ip6_dst_destroy(struct dst_entry *dst) in6_dev_put(idev); } if (peer) { - BUG_ON(!(rt->rt6i_flags & RTF_CACHE)); rt->rt6i_peer = NULL; inet_putpeer(peer); } @@ -206,9 +244,6 @@ void rt6_bind_peer(struct rt6_info *rt, int create) { struct inet_peer *peer; - if (WARN_ON(!(rt->rt6i_flags & RTF_CACHE))) - return; - peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create); if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL) inet_putpeer(peer); @@ -738,13 +773,8 @@ restart: if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); - else { -#if CLONE_OFFLINK_ROUTE + else nrt = rt6_alloc_clone(rt, &fl->fl6_dst); -#else - goto out2; -#endif - } dst_release(&rt->dst); rt = nrt ? : net->ipv6.ip6_null_entry; @@ -935,7 +965,6 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) dst_metric_set(dst, RTAX_FEATURES, features); } dst_metric_set(dst, RTAX_MTU, mtu); - call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); } } @@ -2688,7 +2717,8 @@ static int __net_init ip6_route_net_init(struct net *net) net->ipv6.ip6_null_entry->dst.path = (struct dst_entry *)net->ipv6.ip6_null_entry; net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; - dst_metric_set(&net->ipv6.ip6_null_entry->dst, RTAX_HOPLIMIT, 255); + dst_init_metrics(&net->ipv6.ip6_null_entry->dst, + ip6_template_metrics, true); #ifdef CONFIG_IPV6_MULTIPLE_TABLES net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, @@ -2699,7 +2729,8 @@ static int __net_init ip6_route_net_init(struct net *net) net->ipv6.ip6_prohibit_entry->dst.path = (struct dst_entry *)net->ipv6.ip6_prohibit_entry; net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops; - dst_metric_set(&net->ipv6.ip6_prohibit_entry->dst, RTAX_HOPLIMIT, 255); + dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst, + ip6_template_metrics, true); net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, sizeof(*net->ipv6.ip6_blk_hole_entry), @@ -2709,7 +2740,8 @@ static int __net_init ip6_route_net_init(struct net *net) net->ipv6.ip6_blk_hole_entry->dst.path = (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; - dst_metric_set(&net->ipv6.ip6_blk_hole_entry->dst, RTAX_HOPLIMIT, 255); + dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst, + ip6_template_metrics, true); #endif net->ipv6.sysctl.flush_delay = 0; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 8ce38f10a547..b1599a345c10 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -412,7 +412,7 @@ static void prl_list_destroy_rcu(struct rcu_head *head) p = container_of(head, struct ip_tunnel_prl_entry, rcu_head); do { - n = p->next; + n = rcu_dereference_protected(p->next, 1); kfree(p); p = n; } while (p); @@ -421,15 +421,17 @@ static void prl_list_destroy_rcu(struct rcu_head *head) static int ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a) { - struct ip_tunnel_prl_entry *x, **p; + struct ip_tunnel_prl_entry *x; + struct ip_tunnel_prl_entry __rcu **p; int err = 0; ASSERT_RTNL(); if (a && a->addr != htonl(INADDR_ANY)) { - for (p = &t->prl; *p; p = &(*p)->next) { - if ((*p)->addr == a->addr) { - x = *p; + for (p = &t->prl; + (x = rtnl_dereference(*p)) != NULL; + p = &x->next) { + if (x->addr == a->addr) { *p = x->next; call_rcu(&x->rcu_head, prl_entry_destroy_rcu); t->prl_count--; @@ -438,9 +440,9 @@ ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a) } err = -ENXIO; } else { - if (t->prl) { + x = rtnl_dereference(t->prl); + if (x) { t->prl_count = 0; - x = t->prl; call_rcu(&x->rcu_head, prl_list_destroy_rcu); t->prl = NULL; } @@ -1179,7 +1181,7 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) if (!dev->tstats) return -ENOMEM; dev_hold(dev); - sitn->tunnels_wc[0] = tunnel; + rcu_assign_pointer(sitn->tunnels_wc[0], tunnel); return 0; } @@ -1196,11 +1198,12 @@ static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_hea for (prio = 1; prio < 4; prio++) { int h; for (h = 0; h < HASH_SIZE; h++) { - struct ip_tunnel *t = sitn->tunnels[prio][h]; + struct ip_tunnel *t; + t = rtnl_dereference(sitn->tunnels[prio][h]); while (t != NULL) { unregister_netdevice_queue(t->dev, head); - t = t->next; + t = rtnl_dereference(t->next); } } } diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index fa1d8f4e0051..7cb65ef79f9c 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -15,6 +15,8 @@ #include <net/addrconf.h> #include <net/inet_frag.h> +static struct ctl_table empty[1]; + static ctl_table ipv6_table_template[] = { { .procname = "route", @@ -35,6 +37,12 @@ static ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "neigh", + .maxlen = 0, + .mode = 0555, + .child = empty, + }, { } }; @@ -152,7 +160,6 @@ static struct ctl_table_header *ip6_base; int ipv6_static_sysctl_register(void) { - static struct ctl_table empty[1]; ip6_base = register_sysctl_paths(net_ipv6_ctl_path, empty); if (ip6_base == NULL) return -ENOMEM; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 9a009c66c8a3..a419a787eb69 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1299,7 +1299,7 @@ static int udp6_ufo_send_check(struct sk_buff *skb) return 0; } -static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, int features) +static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, u32 features) { struct sk_buff *segs = ERR_PTR(-EINVAL); unsigned int mss; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 7e74023ea6e4..834dc02f1d4f 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -98,6 +98,10 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, if (!xdst->u.rt6.rt6i_idev) return -ENODEV; + xdst->u.rt6.rt6i_peer = rt->rt6i_peer; + if (rt->rt6i_peer) + atomic_inc(&rt->rt6i_peer->refcnt); + /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST | @@ -216,6 +220,9 @@ static void xfrm6_dst_destroy(struct dst_entry *dst) if (likely(xdst->u.rt6.rt6i_idev)) in6_dev_put(xdst->u.rt6.rt6i_idev); + dst_destroy_metrics_generic(dst); + if (likely(xdst->u.rt6.rt6i_peer)) + inet_putpeer(xdst->u.rt6.rt6i_peer); xfrm_dst_destroy(xdst); } @@ -251,6 +258,7 @@ static struct dst_ops xfrm6_dst_ops = { .protocol = cpu_to_be16(ETH_P_IPV6), .gc = xfrm6_garbage_collect, .update_pmtu = xfrm6_update_pmtu, + .cow_metrics = dst_cow_metrics_generic, .destroy = xfrm6_dst_destroy, .ifdown = xfrm6_dst_ifdown, .local_out = __ip6_local_out, diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index 9109262abd24..c766056d0488 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -20,7 +20,7 @@ config MAC80211_HAS_RC def_bool n config MAC80211_RC_PID - bool "PID controller based rate control algorithm" if EMBEDDED + bool "PID controller based rate control algorithm" if EXPERT select MAC80211_HAS_RC ---help--- This option enables a TX rate control algorithm for @@ -28,14 +28,14 @@ config MAC80211_RC_PID rate. config MAC80211_RC_MINSTREL - bool "Minstrel" if EMBEDDED + bool "Minstrel" if EXPERT select MAC80211_HAS_RC default y ---help--- This option enables the 'minstrel' TX rate control algorithm config MAC80211_RC_MINSTREL_HT - bool "Minstrel 802.11n support" if EMBEDDED + bool "Minstrel 802.11n support" if EXPERT depends on MAC80211_RC_MINSTREL default y ---help--- diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 227ca82eef72..0c9d0c07eae6 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -76,7 +76,7 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, #endif /* CONFIG_MAC80211_HT_DEBUG */ if (drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_STOP, - &sta->sta, tid, NULL)) + &sta->sta, tid, NULL, 0)) printk(KERN_DEBUG "HW problem - can not stop rx " "aggregation for tid %d\n", tid); @@ -232,6 +232,9 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, if (buf_size == 0) buf_size = IEEE80211_MAX_AMPDU_BUF; + /* make sure the size doesn't exceed the maximum supported by the hw */ + if (buf_size > local->hw.max_rx_aggregation_subframes) + buf_size = local->hw.max_rx_aggregation_subframes; /* examine state machine */ mutex_lock(&sta->ampdu_mlme.mtx); @@ -287,7 +290,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, } ret = drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_START, - &sta->sta, tid, &start_seq_num); + &sta->sta, tid, &start_seq_num, 0); #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "Rx A-MPDU request on tid %d result %d\n", tid, ret); #endif /* CONFIG_MAC80211_HT_DEBUG */ diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 9cc472c6a6a5..63d852cb4ca2 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -190,7 +190,7 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, ret = drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_TX_STOP, - &sta->sta, tid, NULL); + &sta->sta, tid, NULL, 0); /* HW shall not deny going back to legacy */ if (WARN_ON(ret)) { @@ -311,7 +311,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) start_seq_num = sta->tid_seq[tid] >> 4; ret = drv_ampdu_action(local, sdata, IEEE80211_AMPDU_TX_START, - &sta->sta, tid, &start_seq_num); + &sta->sta, tid, &start_seq_num, 0); if (ret) { #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "BA request denied - HW unavailable for" @@ -342,7 +342,8 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) /* send AddBA request */ ieee80211_send_addba_request(sdata, sta->sta.addr, tid, tid_tx->dialog_token, start_seq_num, - 0x40, tid_tx->timeout); + local->hw.max_tx_aggregation_subframes, + tid_tx->timeout); } int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, @@ -487,7 +488,8 @@ static void ieee80211_agg_tx_operational(struct ieee80211_local *local, drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_TX_OPERATIONAL, - &sta->sta, tid, NULL); + &sta->sta, tid, NULL, + sta->ampdu_mlme.tid_tx[tid]->buf_size); /* * synchronize with TX path, while splicing the TX path @@ -742,9 +744,11 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, { struct tid_ampdu_tx *tid_tx; u16 capab, tid; + u8 buf_size; capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab); tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2; + buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6; mutex_lock(&sta->ampdu_mlme.mtx); @@ -767,12 +771,23 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, if (le16_to_cpu(mgmt->u.action.u.addba_resp.status) == WLAN_STATUS_SUCCESS) { + /* + * IEEE 802.11-2007 7.3.1.14: + * In an ADDBA Response frame, when the Status Code field + * is set to 0, the Buffer Size subfield is set to a value + * of at least 1. + */ + if (!buf_size) + goto out; + if (test_and_set_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state)) { /* ignore duplicate response */ goto out; } + tid_tx->buf_size = buf_size; + if (test_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state)) ieee80211_agg_tx_operational(local, sta, tid); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 4bc8a9250cfd..2ba3af850dda 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1215,6 +1215,9 @@ static int ieee80211_set_channel(struct wiphy *wiphy, { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata = NULL; + struct ieee80211_channel *old_oper; + enum nl80211_channel_type old_oper_type; + enum nl80211_channel_type old_vif_oper_type= NL80211_CHAN_NO_HT; if (netdev) sdata = IEEE80211_DEV_TO_SUB_IF(netdev); @@ -1232,13 +1235,23 @@ static int ieee80211_set_channel(struct wiphy *wiphy, break; } - local->oper_channel = chan; + if (sdata) + old_vif_oper_type = sdata->vif.bss_conf.channel_type; + old_oper_type = local->_oper_channel_type; if (!ieee80211_set_channel_type(local, sdata, channel_type)) return -EBUSY; - ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); - if (sdata && sdata->vif.type != NL80211_IFTYPE_MONITOR) + old_oper = local->oper_channel; + local->oper_channel = chan; + + /* Update driver if changes were actually made. */ + if ((old_oper != local->oper_channel) || + (old_oper_type != local->_oper_channel_type)) + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); + + if ((sdata && sdata->vif.type != NL80211_IFTYPE_MONITOR) && + old_vif_oper_type != sdata->vif.bss_conf.channel_type) ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_HT); return 0; @@ -1274,8 +1287,11 @@ static int ieee80211_scan(struct wiphy *wiphy, case NL80211_IFTYPE_P2P_GO: if (sdata->local->ops->hw_scan) break; - /* FIXME: implement NoA while scanning in software */ - return -EOPNOTSUPP; + /* + * FIXME: implement NoA while scanning in software, + * for now fall through to allow scanning only when + * beaconing hasn't been configured yet + */ case NL80211_IFTYPE_AP: if (sdata->u.ap.beacon) return -EOPNOTSUPP; @@ -1822,6 +1838,7 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, *cookie ^= 2; IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_TX_OFFCHAN; local->hw_roc_skb = skb; + local->hw_roc_skb_for_status = skb; mutex_unlock(&local->mtx); return 0; @@ -1875,6 +1892,7 @@ static int ieee80211_mgmt_tx_cancel_wait(struct wiphy *wiphy, if (ret == 0) { kfree_skb(local->hw_roc_skb); local->hw_roc_skb = NULL; + local->hw_roc_skb_for_status = NULL; } mutex_unlock(&local->mtx); diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 2dabdf7680d0..dacace6b1393 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -36,7 +36,7 @@ static ssize_t ieee80211_if_read( ret = (*format)(sdata, buf, sizeof(buf)); read_unlock(&dev_base_lock); - if (ret != -EINVAL) + if (ret >= 0) ret = simple_read_from_buffer(userbuf, count, ppos, buf, ret); return ret; @@ -81,6 +81,8 @@ static ssize_t ieee80211_if_fmt_##name( \ IEEE80211_IF_FMT(name, field, "%d\n") #define IEEE80211_IF_FMT_HEX(name, field) \ IEEE80211_IF_FMT(name, field, "%#x\n") +#define IEEE80211_IF_FMT_LHEX(name, field) \ + IEEE80211_IF_FMT(name, field, "%#lx\n") #define IEEE80211_IF_FMT_SIZE(name, field) \ IEEE80211_IF_FMT(name, field, "%zd\n") @@ -145,6 +147,9 @@ IEEE80211_IF_FILE(rc_rateidx_mask_2ghz, rc_rateidx_mask[IEEE80211_BAND_2GHZ], HEX); IEEE80211_IF_FILE(rc_rateidx_mask_5ghz, rc_rateidx_mask[IEEE80211_BAND_5GHZ], HEX); +IEEE80211_IF_FILE(flags, flags, HEX); +IEEE80211_IF_FILE(state, state, LHEX); +IEEE80211_IF_FILE(channel_type, vif.bss_conf.channel_type, DEC); /* STA attributes */ IEEE80211_IF_FILE(bssid, u.mgd.bssid, MAC); @@ -216,6 +221,104 @@ static ssize_t ieee80211_if_parse_smps(struct ieee80211_sub_if_data *sdata, __IEEE80211_IF_FILE_W(smps); +static ssize_t ieee80211_if_fmt_tkip_mic_test( + const struct ieee80211_sub_if_data *sdata, char *buf, int buflen) +{ + return -EOPNOTSUPP; +} + +static int hwaddr_aton(const char *txt, u8 *addr) +{ + int i; + + for (i = 0; i < ETH_ALEN; i++) { + int a, b; + + a = hex_to_bin(*txt++); + if (a < 0) + return -1; + b = hex_to_bin(*txt++); + if (b < 0) + return -1; + *addr++ = (a << 4) | b; + if (i < 5 && *txt++ != ':') + return -1; + } + + return 0; +} + +static ssize_t ieee80211_if_parse_tkip_mic_test( + struct ieee80211_sub_if_data *sdata, const char *buf, int buflen) +{ + struct ieee80211_local *local = sdata->local; + u8 addr[ETH_ALEN]; + struct sk_buff *skb; + struct ieee80211_hdr *hdr; + __le16 fc; + + /* + * Assume colon-delimited MAC address with possible white space + * following. + */ + if (buflen < 3 * ETH_ALEN - 1) + return -EINVAL; + if (hwaddr_aton(buf, addr) < 0) + return -EINVAL; + + if (!ieee80211_sdata_running(sdata)) + return -ENOTCONN; + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + 24 + 100); + if (!skb) + return -ENOMEM; + skb_reserve(skb, local->hw.extra_tx_headroom); + + hdr = (struct ieee80211_hdr *) skb_put(skb, 24); + memset(hdr, 0, 24); + fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA); + + switch (sdata->vif.type) { + case NL80211_IFTYPE_AP: + fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS); + /* DA BSSID SA */ + memcpy(hdr->addr1, addr, ETH_ALEN); + memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN); + memcpy(hdr->addr3, sdata->vif.addr, ETH_ALEN); + break; + case NL80211_IFTYPE_STATION: + fc |= cpu_to_le16(IEEE80211_FCTL_TODS); + /* BSSID SA DA */ + if (sdata->vif.bss_conf.bssid == NULL) { + dev_kfree_skb(skb); + return -ENOTCONN; + } + memcpy(hdr->addr1, sdata->vif.bss_conf.bssid, ETH_ALEN); + memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN); + memcpy(hdr->addr3, addr, ETH_ALEN); + break; + default: + dev_kfree_skb(skb); + return -EOPNOTSUPP; + } + hdr->frame_control = fc; + + /* + * Add some length to the test frame to make it look bit more valid. + * The exact contents does not matter since the recipient is required + * to drop this because of the Michael MIC failure. + */ + memset(skb_put(skb, 50), 0, 50); + + IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_TKIP_MIC_FAILURE; + + ieee80211_tx_skb(sdata, skb); + + return buflen; +} + +__IEEE80211_IF_FILE_W(tkip_mic_test); + /* AP attributes */ IEEE80211_IF_FILE(num_sta_ps, u.ap.num_sta_ps, ATOMIC); IEEE80211_IF_FILE(dtim_count, u.ap.dtim_count, DEC); @@ -283,6 +386,9 @@ IEEE80211_IF_FILE(dot11MeshHWMPRootMode, static void add_sta_files(struct ieee80211_sub_if_data *sdata) { DEBUGFS_ADD(drop_unencrypted); + DEBUGFS_ADD(flags); + DEBUGFS_ADD(state); + DEBUGFS_ADD(channel_type); DEBUGFS_ADD(rc_rateidx_mask_2ghz); DEBUGFS_ADD(rc_rateidx_mask_5ghz); @@ -291,22 +397,30 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_ADD(last_beacon); DEBUGFS_ADD(ave_beacon); DEBUGFS_ADD_MODE(smps, 0600); + DEBUGFS_ADD_MODE(tkip_mic_test, 0200); } static void add_ap_files(struct ieee80211_sub_if_data *sdata) { DEBUGFS_ADD(drop_unencrypted); + DEBUGFS_ADD(flags); + DEBUGFS_ADD(state); + DEBUGFS_ADD(channel_type); DEBUGFS_ADD(rc_rateidx_mask_2ghz); DEBUGFS_ADD(rc_rateidx_mask_5ghz); DEBUGFS_ADD(num_sta_ps); DEBUGFS_ADD(dtim_count); DEBUGFS_ADD(num_buffered_multicast); + DEBUGFS_ADD_MODE(tkip_mic_test, 0200); } static void add_wds_files(struct ieee80211_sub_if_data *sdata) { DEBUGFS_ADD(drop_unencrypted); + DEBUGFS_ADD(flags); + DEBUGFS_ADD(state); + DEBUGFS_ADD(channel_type); DEBUGFS_ADD(rc_rateidx_mask_2ghz); DEBUGFS_ADD(rc_rateidx_mask_5ghz); @@ -316,12 +430,18 @@ static void add_wds_files(struct ieee80211_sub_if_data *sdata) static void add_vlan_files(struct ieee80211_sub_if_data *sdata) { DEBUGFS_ADD(drop_unencrypted); + DEBUGFS_ADD(flags); + DEBUGFS_ADD(state); + DEBUGFS_ADD(channel_type); DEBUGFS_ADD(rc_rateidx_mask_2ghz); DEBUGFS_ADD(rc_rateidx_mask_5ghz); } static void add_monitor_files(struct ieee80211_sub_if_data *sdata) { + DEBUGFS_ADD(flags); + DEBUGFS_ADD(state); + DEBUGFS_ADD(channel_type); } #ifdef CONFIG_MAC80211_MESH diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 98d589960a49..78af32d4bc58 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -382,17 +382,17 @@ static inline int drv_ampdu_action(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, enum ieee80211_ampdu_mlme_action action, struct ieee80211_sta *sta, u16 tid, - u16 *ssn) + u16 *ssn, u8 buf_size) { int ret = -EOPNOTSUPP; might_sleep(); - trace_drv_ampdu_action(local, sdata, action, sta, tid, ssn); + trace_drv_ampdu_action(local, sdata, action, sta, tid, ssn, buf_size); if (local->ops->ampdu_action) ret = local->ops->ampdu_action(&local->hw, &sdata->vif, action, - sta, tid, ssn); + sta, tid, ssn, buf_size); trace_drv_return_int(local, ret); diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h index 49c84218b2f4..e5cce19a7d65 100644 --- a/net/mac80211/driver-trace.h +++ b/net/mac80211/driver-trace.h @@ -9,6 +9,11 @@ #undef TRACE_EVENT #define TRACE_EVENT(name, proto, ...) \ static inline void trace_ ## name(proto) {} +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(...) +#undef DEFINE_EVENT +#define DEFINE_EVENT(evt_class, name, proto, ...) \ +static inline void trace_ ## name(proto) {} #endif #undef TRACE_SYSTEM @@ -38,7 +43,7 @@ static inline void trace_ ## name(proto) {} * Tracing for driver callbacks. */ -TRACE_EVENT(drv_return_void, +DECLARE_EVENT_CLASS(local_only_evt, TP_PROTO(struct ieee80211_local *local), TP_ARGS(local), TP_STRUCT__entry( @@ -50,6 +55,11 @@ TRACE_EVENT(drv_return_void, TP_printk(LOCAL_PR_FMT, LOCAL_PR_ARG) ); +DEFINE_EVENT(local_only_evt, drv_return_void, + TP_PROTO(struct ieee80211_local *local), + TP_ARGS(local) +); + TRACE_EVENT(drv_return_int, TP_PROTO(struct ieee80211_local *local, int ret), TP_ARGS(local, ret), @@ -78,40 +88,14 @@ TRACE_EVENT(drv_return_u64, TP_printk(LOCAL_PR_FMT " - %llu", LOCAL_PR_ARG, __entry->ret) ); -TRACE_EVENT(drv_start, +DEFINE_EVENT(local_only_evt, drv_start, TP_PROTO(struct ieee80211_local *local), - - TP_ARGS(local), - - TP_STRUCT__entry( - LOCAL_ENTRY - ), - - TP_fast_assign( - LOCAL_ASSIGN; - ), - - TP_printk( - LOCAL_PR_FMT, LOCAL_PR_ARG - ) + TP_ARGS(local) ); -TRACE_EVENT(drv_stop, +DEFINE_EVENT(local_only_evt, drv_stop, TP_PROTO(struct ieee80211_local *local), - - TP_ARGS(local), - - TP_STRUCT__entry( - LOCAL_ENTRY - ), - - TP_fast_assign( - LOCAL_ASSIGN; - ), - - TP_printk( - LOCAL_PR_FMT, LOCAL_PR_ARG - ) + TP_ARGS(local) ); TRACE_EVENT(drv_add_interface, @@ -439,40 +423,14 @@ TRACE_EVENT(drv_hw_scan, ) ); -TRACE_EVENT(drv_sw_scan_start, +DEFINE_EVENT(local_only_evt, drv_sw_scan_start, TP_PROTO(struct ieee80211_local *local), - - TP_ARGS(local), - - TP_STRUCT__entry( - LOCAL_ENTRY - ), - - TP_fast_assign( - LOCAL_ASSIGN; - ), - - TP_printk( - LOCAL_PR_FMT, LOCAL_PR_ARG - ) + TP_ARGS(local) ); -TRACE_EVENT(drv_sw_scan_complete, +DEFINE_EVENT(local_only_evt, drv_sw_scan_complete, TP_PROTO(struct ieee80211_local *local), - - TP_ARGS(local), - - TP_STRUCT__entry( - LOCAL_ENTRY - ), - - TP_fast_assign( - LOCAL_ASSIGN; - ), - - TP_printk( - LOCAL_PR_FMT, LOCAL_PR_ARG - ) + TP_ARGS(local) ); TRACE_EVENT(drv_get_stats, @@ -702,23 +660,9 @@ TRACE_EVENT(drv_conf_tx, ) ); -TRACE_EVENT(drv_get_tsf, +DEFINE_EVENT(local_only_evt, drv_get_tsf, TP_PROTO(struct ieee80211_local *local), - - TP_ARGS(local), - - TP_STRUCT__entry( - LOCAL_ENTRY - ), - - TP_fast_assign( - LOCAL_ASSIGN; - ), - - TP_printk( - LOCAL_PR_FMT, - LOCAL_PR_ARG - ) + TP_ARGS(local) ); TRACE_EVENT(drv_set_tsf, @@ -742,41 +686,14 @@ TRACE_EVENT(drv_set_tsf, ) ); -TRACE_EVENT(drv_reset_tsf, +DEFINE_EVENT(local_only_evt, drv_reset_tsf, TP_PROTO(struct ieee80211_local *local), - - TP_ARGS(local), - - TP_STRUCT__entry( - LOCAL_ENTRY - ), - - TP_fast_assign( - LOCAL_ASSIGN; - ), - - TP_printk( - LOCAL_PR_FMT, LOCAL_PR_ARG - ) + TP_ARGS(local) ); -TRACE_EVENT(drv_tx_last_beacon, +DEFINE_EVENT(local_only_evt, drv_tx_last_beacon, TP_PROTO(struct ieee80211_local *local), - - TP_ARGS(local), - - TP_STRUCT__entry( - LOCAL_ENTRY - ), - - TP_fast_assign( - LOCAL_ASSIGN; - ), - - TP_printk( - LOCAL_PR_FMT, - LOCAL_PR_ARG - ) + TP_ARGS(local) ); TRACE_EVENT(drv_ampdu_action, @@ -784,9 +701,9 @@ TRACE_EVENT(drv_ampdu_action, struct ieee80211_sub_if_data *sdata, enum ieee80211_ampdu_mlme_action action, struct ieee80211_sta *sta, u16 tid, - u16 *ssn), + u16 *ssn, u8 buf_size), - TP_ARGS(local, sdata, action, sta, tid, ssn), + TP_ARGS(local, sdata, action, sta, tid, ssn, buf_size), TP_STRUCT__entry( LOCAL_ENTRY @@ -794,6 +711,7 @@ TRACE_EVENT(drv_ampdu_action, __field(u32, action) __field(u16, tid) __field(u16, ssn) + __field(u8, buf_size) VIF_ENTRY ), @@ -804,11 +722,13 @@ TRACE_EVENT(drv_ampdu_action, __entry->action = action; __entry->tid = tid; __entry->ssn = ssn ? *ssn : 0; + __entry->buf_size = buf_size; ), TP_printk( - LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " action:%d tid:%d", - LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->action, __entry->tid + LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " action:%d tid:%d buf:%d", + LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->action, + __entry->tid, __entry->buf_size ) ); @@ -959,22 +879,9 @@ TRACE_EVENT(drv_remain_on_channel, ) ); -TRACE_EVENT(drv_cancel_remain_on_channel, +DEFINE_EVENT(local_only_evt, drv_cancel_remain_on_channel, TP_PROTO(struct ieee80211_local *local), - - TP_ARGS(local), - - TP_STRUCT__entry( - LOCAL_ENTRY - ), - - TP_fast_assign( - LOCAL_ASSIGN; - ), - - TP_printk( - LOCAL_PR_FMT, LOCAL_PR_ARG - ) + TP_ARGS(local) ); /* @@ -1069,23 +976,9 @@ TRACE_EVENT(api_stop_tx_ba_cb, ) ); -TRACE_EVENT(api_restart_hw, +DEFINE_EVENT(local_only_evt, api_restart_hw, TP_PROTO(struct ieee80211_local *local), - - TP_ARGS(local), - - TP_STRUCT__entry( - LOCAL_ENTRY - ), - - TP_fast_assign( - LOCAL_ASSIGN; - ), - - TP_printk( - LOCAL_PR_FMT, - LOCAL_PR_ARG - ) + TP_ARGS(local) ); TRACE_EVENT(api_beacon_loss, @@ -1214,40 +1107,14 @@ TRACE_EVENT(api_chswitch_done, ) ); -TRACE_EVENT(api_ready_on_channel, +DEFINE_EVENT(local_only_evt, api_ready_on_channel, TP_PROTO(struct ieee80211_local *local), - - TP_ARGS(local), - - TP_STRUCT__entry( - LOCAL_ENTRY - ), - - TP_fast_assign( - LOCAL_ASSIGN; - ), - - TP_printk( - LOCAL_PR_FMT, LOCAL_PR_ARG - ) + TP_ARGS(local) ); -TRACE_EVENT(api_remain_on_channel_expired, +DEFINE_EVENT(local_only_evt, api_remain_on_channel_expired, TP_PROTO(struct ieee80211_local *local), - - TP_ARGS(local), - - TP_STRUCT__entry( - LOCAL_ENTRY - ), - - TP_fast_assign( - LOCAL_ASSIGN; - ), - - TP_printk( - LOCAL_PR_FMT, LOCAL_PR_ARG - ) + TP_ARGS(local) ); /* diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 53c7077ffd4f..775fb63471c4 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -270,7 +270,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, enum ieee80211_band band = rx_status->band; if (elems->ds_params && elems->ds_params_len == 1) - freq = ieee80211_channel_to_frequency(elems->ds_params[0]); + freq = ieee80211_channel_to_frequency(elems->ds_params[0], + band); else freq = rx_status->freq; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index c47d7c0e48a4..f2ef15d910a5 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -225,6 +225,7 @@ struct ieee80211_if_ap { struct sk_buff_head ps_bc_buf; atomic_t num_sta_ps; /* number of stations in PS mode */ int dtim_count; + bool dtim_bc_mc; }; struct ieee80211_if_wds { @@ -654,8 +655,6 @@ struct tpt_led_trigger { * well be on the operating channel * @SCAN_HW_SCANNING: The hardware is scanning for us, we have no way to * determine if we are on the operating channel or not - * @SCAN_OFF_CHANNEL: We're off our operating channel for scanning, - * gets only set in conjunction with SCAN_SW_SCANNING * @SCAN_COMPLETED: Set for our scan work function when the driver reported * that the scan completed. * @SCAN_ABORTED: Set for our scan work function when the driver reported @@ -664,7 +663,6 @@ struct tpt_led_trigger { enum { SCAN_SW_SCANNING, SCAN_HW_SCANNING, - SCAN_OFF_CHANNEL, SCAN_COMPLETED, SCAN_ABORTED, }; @@ -953,7 +951,7 @@ struct ieee80211_local { struct ieee80211_channel *hw_roc_channel; struct net_device *hw_roc_dev; - struct sk_buff *hw_roc_skb; + struct sk_buff *hw_roc_skb, *hw_roc_skb_for_status; struct work_struct hw_roc_start, hw_roc_done; enum nl80211_channel_type hw_roc_channel_type; unsigned int hw_roc_duration; @@ -1147,10 +1145,14 @@ void ieee80211_rx_bss_put(struct ieee80211_local *local, struct ieee80211_bss *bss); /* off-channel helpers */ -void ieee80211_offchannel_stop_beaconing(struct ieee80211_local *local); -void ieee80211_offchannel_stop_station(struct ieee80211_local *local); +bool ieee80211_cfg_on_oper_channel(struct ieee80211_local *local); +void ieee80211_offchannel_enable_all_ps(struct ieee80211_local *local, + bool tell_ap); +void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local, + bool offchannel_ps_enable); void ieee80211_offchannel_return(struct ieee80211_local *local, - bool enable_beaconing); + bool enable_beaconing, + bool offchannel_ps_disable); void ieee80211_hw_roc_setup(struct ieee80211_local *local); /* interface handling */ diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 8acba456744e..5a4e19b88032 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -382,6 +382,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, *tmp; u32 hw_reconf_flags = 0; int i; + enum nl80211_channel_type orig_ct; if (local->scan_sdata == sdata) ieee80211_scan_cancel(local); @@ -542,8 +543,14 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, hw_reconf_flags = 0; } + /* Re-calculate channel-type, in case there are multiple vifs + * on different channel types. + */ + orig_ct = local->_oper_channel_type; + ieee80211_set_channel_type(local, NULL, NL80211_CHAN_NO_HT); + /* do after stop to avoid reconfiguring when we stop anyway */ - if (hw_reconf_flags) + if (hw_reconf_flags || (orig_ct != local->_oper_channel_type)) ieee80211_hw_config(local, hw_reconf_flags); spin_lock_irqsave(&local->queue_stop_reason_lock, flags); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index a46ff06d7cb8..c155c0b69426 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -98,6 +98,41 @@ static void ieee80211_reconfig_filter(struct work_struct *work) ieee80211_configure_filter(local); } +/* + * Returns true if we are logically configured to be on + * the operating channel AND the hardware-conf is currently + * configured on the operating channel. Compares channel-type + * as well. + */ +bool ieee80211_cfg_on_oper_channel(struct ieee80211_local *local) +{ + struct ieee80211_channel *chan, *scan_chan; + enum nl80211_channel_type channel_type; + + /* This logic needs to match logic in ieee80211_hw_config */ + if (local->scan_channel) { + chan = local->scan_channel; + channel_type = NL80211_CHAN_NO_HT; + } else if (local->tmp_channel) { + chan = scan_chan = local->tmp_channel; + channel_type = local->tmp_channel_type; + } else { + chan = local->oper_channel; + channel_type = local->_oper_channel_type; + } + + if (chan != local->oper_channel || + channel_type != local->_oper_channel_type) + return false; + + /* Check current hardware-config against oper_channel. */ + if ((local->oper_channel != local->hw.conf.channel) || + (local->_oper_channel_type != local->hw.conf.channel_type)) + return false; + + return true; +} + int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) { struct ieee80211_channel *chan, *scan_chan; @@ -110,21 +145,27 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) scan_chan = local->scan_channel; + /* If this off-channel logic ever changes, ieee80211_on_oper_channel + * may need to change as well. + */ offchannel_flag = local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL; if (scan_chan) { chan = scan_chan; channel_type = NL80211_CHAN_NO_HT; - local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL; - } else if (local->tmp_channel && - local->oper_channel != local->tmp_channel) { + } else if (local->tmp_channel) { chan = scan_chan = local->tmp_channel; channel_type = local->tmp_channel_type; - local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL; } else { chan = local->oper_channel; channel_type = local->_oper_channel_type; - local->hw.conf.flags &= ~IEEE80211_CONF_OFFCHANNEL; } + + if (chan != local->oper_channel || + channel_type != local->_oper_channel_type) + local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL; + else + local->hw.conf.flags &= ~IEEE80211_CONF_OFFCHANNEL; + offchannel_flag ^= local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL; if (offchannel_flag || chan != local->hw.conf.channel || @@ -231,7 +272,7 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, if (changed & BSS_CHANGED_BEACON_ENABLED) { if (local->quiescing || !ieee80211_sdata_running(sdata) || - test_bit(SCAN_SW_SCANNING, &local->scanning)) { + test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state)) { sdata->vif.bss_conf.enable_beacon = false; } else { /* @@ -554,6 +595,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, local->hw.queues = 1; local->hw.max_rates = 1; local->hw.max_report_rates = 0; + local->hw.max_rx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF; local->hw.conf.long_frame_max_tx_count = wiphy->retry_long; local->hw.conf.short_frame_max_tx_count = wiphy->retry_short; local->user_power_level = -1; diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index ca3af4685b0a..2a57cc02c618 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -574,7 +574,7 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, &elems); if (elems.ds_params && elems.ds_params_len == 1) - freq = ieee80211_channel_to_frequency(elems.ds_params[0]); + freq = ieee80211_channel_to_frequency(elems.ds_params[0], band); else freq = rx_status->freq; @@ -645,7 +645,7 @@ void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata) if (test_and_clear_bit(MESH_WORK_GROW_MPATH_TABLE, &ifmsh->wrkq_flags)) mesh_mpath_table_grow(); - if (test_and_clear_bit(MESH_WORK_GROW_MPATH_TABLE, &ifmsh->wrkq_flags)) + if (test_and_clear_bit(MESH_WORK_GROW_MPP_TABLE, &ifmsh->wrkq_flags)) mesh_mpp_table_grow(); if (test_and_clear_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags)) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 45fbb9e33746..f77adf1a520e 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -28,8 +28,15 @@ #include "rate.h" #include "led.h" -#define IEEE80211_MAX_NULLFUNC_TRIES 2 -#define IEEE80211_MAX_PROBE_TRIES 5 +static int max_nullfunc_tries = 2; +module_param(max_nullfunc_tries, int, 0644); +MODULE_PARM_DESC(max_nullfunc_tries, + "Maximum nullfunc tx tries before disconnecting (reason 4)."); + +static int max_probe_tries = 5; +module_param(max_probe_tries, int, 0644); +MODULE_PARM_DESC(max_probe_tries, + "Maximum probe tries before disconnecting (reason 4)."); /* * Beacon loss timeout is calculated as N frames times the @@ -51,7 +58,11 @@ * a probe request because of beacon loss or for * checking the connection still works. */ -#define IEEE80211_PROBE_WAIT (HZ / 2) +static int probe_wait_ms = 500; +module_param(probe_wait_ms, int, 0644); +MODULE_PARM_DESC(probe_wait_ms, + "Maximum time(ms) to wait for probe response" + " before disconnecting (reason 4)."); /* * Weight given to the latest Beacon frame when calculating average signal @@ -161,6 +172,7 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband; struct sta_info *sta; u32 changed = 0; + int hti_cfreq; u16 ht_opmode; bool enable_ht = true; enum nl80211_channel_type prev_chantype; @@ -174,10 +186,27 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, if (!sband->ht_cap.ht_supported) enable_ht = false; - /* check that channel matches the right operating channel */ - if (local->hw.conf.channel->center_freq != - ieee80211_channel_to_frequency(hti->control_chan)) - enable_ht = false; + if (enable_ht) { + hti_cfreq = ieee80211_channel_to_frequency(hti->control_chan, + sband->band); + /* check that channel matches the right operating channel */ + if (local->hw.conf.channel->center_freq != hti_cfreq) { + /* Some APs mess this up, evidently. + * Netgear WNDR3700 sometimes reports 4 higher than + * the actual channel, for instance. + */ + printk(KERN_DEBUG + "%s: Wrong control channel in association" + " response: configured center-freq: %d" + " hti-cfreq: %d hti->control_chan: %d" + " band: %d. Disabling HT.\n", + sdata->name, + local->hw.conf.channel->center_freq, + hti_cfreq, hti->control_chan, + sband->band); + enable_ht = false; + } + } if (enable_ht) { channel_type = NL80211_CHAN_HT20; @@ -429,7 +458,8 @@ void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, container_of((void *)bss, struct cfg80211_bss, priv); struct ieee80211_channel *new_ch; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - int new_freq = ieee80211_channel_to_frequency(sw_elem->new_ch_num); + int new_freq = ieee80211_channel_to_frequency(sw_elem->new_ch_num, + cbss->channel->band); ASSERT_MGD_MTX(ifmgd); @@ -600,6 +630,14 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency) list_for_each_entry(sdata, &local->interfaces, list) { if (!ieee80211_sdata_running(sdata)) continue; + if (sdata->vif.type == NL80211_IFTYPE_AP) { + /* If an AP vif is found, then disable PS + * by setting the count to zero thereby setting + * ps_sdata to NULL. + */ + count = 0; + break; + } if (sdata->vif.type != NL80211_IFTYPE_STATION) continue; found = sdata; @@ -1089,7 +1127,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; const u8 *ssid; u8 *dst = ifmgd->associated->bssid; - u8 unicast_limit = max(1, IEEE80211_MAX_PROBE_TRIES - 3); + u8 unicast_limit = max(1, max_probe_tries - 3); /* * Try sending broadcast probe requests for the last three @@ -1115,7 +1153,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) } ifmgd->probe_send_count++; - ifmgd->probe_timeout = jiffies + IEEE80211_PROBE_WAIT; + ifmgd->probe_timeout = jiffies + msecs_to_jiffies(probe_wait_ms); run_again(ifmgd, ifmgd->probe_timeout); } @@ -1216,7 +1254,8 @@ static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata) memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN); - printk(KERN_DEBUG "Connection to AP %pM lost.\n", bssid); + printk(KERN_DEBUG "%s: Connection to AP %pM lost.\n", + sdata->name, bssid); ieee80211_set_disassoc(sdata, true, true); mutex_unlock(&ifmgd->mtx); @@ -1519,7 +1558,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, } if (elems->ds_params && elems->ds_params_len == 1) - freq = ieee80211_channel_to_frequency(elems->ds_params[0]); + freq = ieee80211_channel_to_frequency(elems->ds_params[0], + rx_status->band); else freq = rx_status->freq; @@ -1960,9 +2000,9 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN); if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) - max_tries = IEEE80211_MAX_NULLFUNC_TRIES; + max_tries = max_nullfunc_tries; else - max_tries = IEEE80211_MAX_PROBE_TRIES; + max_tries = max_probe_tries; /* ACK received for nullfunc probing frame */ if (!ifmgd->probe_send_count) @@ -1972,9 +2012,9 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) #ifdef CONFIG_MAC80211_VERBOSE_DEBUG wiphy_debug(local->hw.wiphy, "%s: No ack for nullfunc frame to" - " AP %pM, try %d\n", + " AP %pM, try %d/%i\n", sdata->name, bssid, - ifmgd->probe_send_count); + ifmgd->probe_send_count, max_tries); #endif ieee80211_mgd_probe_ap_send(sdata); } else { @@ -1994,17 +2034,17 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) "%s: Failed to send nullfunc to AP %pM" " after %dms, disconnecting.\n", sdata->name, - bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ); + bssid, probe_wait_ms); #endif ieee80211_sta_connection_lost(sdata, bssid); } else if (ifmgd->probe_send_count < max_tries) { #ifdef CONFIG_MAC80211_VERBOSE_DEBUG wiphy_debug(local->hw.wiphy, "%s: No probe response from AP %pM" - " after %dms, try %d\n", + " after %dms, try %d/%i\n", sdata->name, - bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ, - ifmgd->probe_send_count); + bssid, probe_wait_ms, + ifmgd->probe_send_count, max_tries); #endif ieee80211_mgd_probe_ap_send(sdata); } else { @@ -2016,7 +2056,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) "%s: No probe response from AP %pM" " after %dms, disconnecting.\n", sdata->name, - bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ); + bssid, probe_wait_ms); ieee80211_sta_connection_lost(sdata, bssid); } diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index b4e52676f3fb..13427b194ced 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -17,10 +17,14 @@ #include "driver-trace.h" /* - * inform AP that we will go to sleep so that it will buffer the frames - * while we scan + * Tell our hardware to disable PS. + * Optionally inform AP that we will go to sleep so that it will buffer + * the frames while we are doing off-channel work. This is optional + * because we *may* be doing work on-operating channel, and want our + * hardware unconditionally awake, but still let the AP send us normal frames. */ -static void ieee80211_offchannel_ps_enable(struct ieee80211_sub_if_data *sdata) +static void ieee80211_offchannel_ps_enable(struct ieee80211_sub_if_data *sdata, + bool tell_ap) { struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; @@ -41,8 +45,8 @@ static void ieee80211_offchannel_ps_enable(struct ieee80211_sub_if_data *sdata) ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); } - if (!(local->offchannel_ps_enabled) || - !(local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)) + if (tell_ap && (!local->offchannel_ps_enabled || + !(local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK))) /* * If power save was enabled, no need to send a nullfunc * frame because AP knows that we are sleeping. But if the @@ -77,6 +81,9 @@ static void ieee80211_offchannel_ps_disable(struct ieee80211_sub_if_data *sdata) * we are sleeping, let's just enable power save mode in * hardware. */ + /* TODO: Only set hardware if CONF_PS changed? + * TODO: Should we set offchannel_ps_enabled to false? + */ local->hw.conf.flags |= IEEE80211_CONF_PS; ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); } else if (local->hw.conf.dynamic_ps_timeout > 0) { @@ -95,63 +102,61 @@ static void ieee80211_offchannel_ps_disable(struct ieee80211_sub_if_data *sdata) ieee80211_sta_reset_conn_monitor(sdata); } -void ieee80211_offchannel_stop_beaconing(struct ieee80211_local *local) +void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local, + bool offchannel_ps_enable) { struct ieee80211_sub_if_data *sdata; + /* + * notify the AP about us leaving the channel and stop all + * STA interfaces. + */ mutex_lock(&local->iflist_mtx); list_for_each_entry(sdata, &local->interfaces, list) { if (!ieee80211_sdata_running(sdata)) continue; - /* disable beaconing */ + if (sdata->vif.type != NL80211_IFTYPE_MONITOR) + set_bit(SDATA_STATE_OFFCHANNEL, &sdata->state); + + /* Check to see if we should disable beaconing. */ if (sdata->vif.type == NL80211_IFTYPE_AP || sdata->vif.type == NL80211_IFTYPE_ADHOC || sdata->vif.type == NL80211_IFTYPE_MESH_POINT) ieee80211_bss_info_change_notify( sdata, BSS_CHANGED_BEACON_ENABLED); - /* - * only handle non-STA interfaces here, STA interfaces - * are handled in ieee80211_offchannel_stop_station(), - * e.g., from the background scan state machine. - * - * In addition, do not stop monitor interface to allow it to be - * used from user space controlled off-channel operations. - */ - if (sdata->vif.type != NL80211_IFTYPE_STATION && - sdata->vif.type != NL80211_IFTYPE_MONITOR) { - set_bit(SDATA_STATE_OFFCHANNEL, &sdata->state); + if (sdata->vif.type != NL80211_IFTYPE_MONITOR) { netif_tx_stop_all_queues(sdata->dev); + if (offchannel_ps_enable && + (sdata->vif.type == NL80211_IFTYPE_STATION) && + sdata->u.mgd.associated) + ieee80211_offchannel_ps_enable(sdata, true); } } mutex_unlock(&local->iflist_mtx); } -void ieee80211_offchannel_stop_station(struct ieee80211_local *local) +void ieee80211_offchannel_enable_all_ps(struct ieee80211_local *local, + bool tell_ap) { struct ieee80211_sub_if_data *sdata; - /* - * notify the AP about us leaving the channel and stop all STA interfaces - */ mutex_lock(&local->iflist_mtx); list_for_each_entry(sdata, &local->interfaces, list) { if (!ieee80211_sdata_running(sdata)) continue; - if (sdata->vif.type == NL80211_IFTYPE_STATION) { - set_bit(SDATA_STATE_OFFCHANNEL, &sdata->state); - netif_tx_stop_all_queues(sdata->dev); - if (sdata->u.mgd.associated) - ieee80211_offchannel_ps_enable(sdata); - } + if (sdata->vif.type == NL80211_IFTYPE_STATION && + sdata->u.mgd.associated) + ieee80211_offchannel_ps_enable(sdata, tell_ap); } mutex_unlock(&local->iflist_mtx); } void ieee80211_offchannel_return(struct ieee80211_local *local, - bool enable_beaconing) + bool enable_beaconing, + bool offchannel_ps_disable) { struct ieee80211_sub_if_data *sdata; @@ -161,7 +166,8 @@ void ieee80211_offchannel_return(struct ieee80211_local *local, continue; /* Tell AP we're back */ - if (sdata->vif.type == NL80211_IFTYPE_STATION) { + if (offchannel_ps_disable && + sdata->vif.type == NL80211_IFTYPE_STATION) { if (sdata->u.mgd.associated) ieee80211_offchannel_ps_disable(sdata); } @@ -181,7 +187,7 @@ void ieee80211_offchannel_return(struct ieee80211_local *local, netif_tx_wake_all_queues(sdata->dev); } - /* re-enable beaconing */ + /* Check to see if we should re-enable beaconing */ if (enable_beaconing && (sdata->vif.type == NL80211_IFTYPE_AP || sdata->vif.type == NL80211_IFTYPE_ADHOC || diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index a6701ed87f0d..045b2fe4a414 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -85,6 +85,9 @@ ieee80211_rx_radiotap_len(struct ieee80211_local *local, if (len & 1) /* padding for RX_FLAGS if necessary */ len++; + if (status->flag & RX_FLAG_HT) /* HT info */ + len += 3; + return len; } @@ -139,11 +142,8 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, /* IEEE80211_RADIOTAP_RATE */ if (status->flag & RX_FLAG_HT) { /* - * TODO: add following information into radiotap header once - * suitable fields are defined for it: - * - MCS index (status->rate_idx) - * - HT40 (status->flag & RX_FLAG_40MHZ) - * - short-GI (status->flag & RX_FLAG_SHORT_GI) + * MCS information is a separate field in radiotap, + * added below. */ *pos = 0; } else { @@ -193,6 +193,20 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, rx_flags |= IEEE80211_RADIOTAP_F_RX_BADPLCP; put_unaligned_le16(rx_flags, pos); pos += 2; + + if (status->flag & RX_FLAG_HT) { + rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_MCS); + *pos++ = IEEE80211_RADIOTAP_MCS_HAVE_MCS | + IEEE80211_RADIOTAP_MCS_HAVE_GI | + IEEE80211_RADIOTAP_MCS_HAVE_BW; + *pos = 0; + if (status->flag & RX_FLAG_SHORT_GI) + *pos |= IEEE80211_RADIOTAP_MCS_SGI; + if (status->flag & RX_FLAG_40MHZ) + *pos |= IEEE80211_RADIOTAP_MCS_BW_40; + pos++; + *pos++ = status->rate_idx; + } } /* @@ -392,16 +406,10 @@ ieee80211_rx_h_passive_scan(struct ieee80211_rx_data *rx) if (likely(!(status->rx_flags & IEEE80211_RX_IN_SCAN))) return RX_CONTINUE; - if (test_bit(SCAN_HW_SCANNING, &local->scanning)) + if (test_bit(SCAN_HW_SCANNING, &local->scanning) || + test_bit(SCAN_SW_SCANNING, &local->scanning)) return ieee80211_scan_rx(rx->sdata, skb); - if (test_bit(SCAN_SW_SCANNING, &local->scanning)) { - /* drop all the other packets during a software scan anyway */ - if (ieee80211_scan_rx(rx->sdata, skb) != RX_QUEUED) - dev_kfree_skb(skb); - return RX_QUEUED; - } - /* scanning finished during invoking of handlers */ I802_DEBUG_INC(local->rx_handlers_drop_passive_scan); return RX_DROP_UNUSABLE; @@ -798,7 +806,7 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx) rx->local->dot11FrameDuplicateCount++; rx->sta->num_duplicates++; } - return RX_DROP_MONITOR; + return RX_DROP_UNUSABLE; } else rx->sta->last_seq_ctrl[rx->queue] = hdr->seq_ctrl; } @@ -1088,7 +1096,8 @@ static void ap_sta_ps_start(struct sta_info *sta) atomic_inc(&sdata->bss->num_sta_ps); set_sta_flags(sta, WLAN_STA_PS_STA); - drv_sta_notify(local, sdata, STA_NOTIFY_SLEEP, &sta->sta); + if (!(local->hw.flags & IEEE80211_HW_AP_LINK_PS)) + drv_sta_notify(local, sdata, STA_NOTIFY_SLEEP, &sta->sta); #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "%s: STA %pM aid %d enters power save mode\n", sdata->name, sta->sta.addr, sta->sta.aid); @@ -1117,6 +1126,27 @@ static void ap_sta_ps_end(struct sta_info *sta) ieee80211_sta_ps_deliver_wakeup(sta); } +int ieee80211_sta_ps_transition(struct ieee80211_sta *sta, bool start) +{ + struct sta_info *sta_inf = container_of(sta, struct sta_info, sta); + bool in_ps; + + WARN_ON(!(sta_inf->local->hw.flags & IEEE80211_HW_AP_LINK_PS)); + + /* Don't let the same PS state be set twice */ + in_ps = test_sta_flags(sta_inf, WLAN_STA_PS_STA); + if ((start && in_ps) || (!start && !in_ps)) + return -EINVAL; + + if (start) + ap_sta_ps_start(sta_inf); + else + ap_sta_ps_end(sta_inf); + + return 0; +} +EXPORT_SYMBOL(ieee80211_sta_ps_transition); + static ieee80211_rx_result debug_noinline ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) { @@ -1161,7 +1191,8 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) * Change STA power saving mode only at the end of a frame * exchange sequence. */ - if (!ieee80211_has_morefrags(hdr->frame_control) && + if (!(sta->local->hw.flags & IEEE80211_HW_AP_LINK_PS) && + !ieee80211_has_morefrags(hdr->frame_control) && !(status->rx_flags & IEEE80211_RX_DEFERRED_RELEASE) && (rx->sdata->vif.type == NL80211_IFTYPE_AP || rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) { @@ -1556,17 +1587,36 @@ __ieee80211_data_to_8023(struct ieee80211_rx_data *rx) { struct ieee80211_sub_if_data *sdata = rx->sdata; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; + bool check_port_control = false; + struct ethhdr *ehdr; + int ret; if (ieee80211_has_a4(hdr->frame_control) && sdata->vif.type == NL80211_IFTYPE_AP_VLAN && !sdata->u.vlan.sta) return -1; + if (sdata->vif.type == NL80211_IFTYPE_STATION && + !!sdata->u.mgd.use_4addr != !!ieee80211_has_a4(hdr->frame_control)) { + + if (!sdata->u.mgd.use_4addr) + return -1; + else + check_port_control = true; + } + if (is_multicast_ether_addr(hdr->addr1) && - ((sdata->vif.type == NL80211_IFTYPE_AP_VLAN && sdata->u.vlan.sta) || - (sdata->vif.type == NL80211_IFTYPE_STATION && sdata->u.mgd.use_4addr))) + sdata->vif.type == NL80211_IFTYPE_AP_VLAN && sdata->u.vlan.sta) + return -1; + + ret = ieee80211_data_to_8023(rx->skb, sdata->vif.addr, sdata->vif.type); + if (ret < 0 || !check_port_control) + return ret; + + ehdr = (struct ethhdr *) rx->skb->data; + if (ehdr->h_proto != rx->sdata->control_port_protocol) return -1; - return ieee80211_data_to_8023(rx->skb, sdata->vif.addr, sdata->vif.type); + return 0; } /* @@ -1893,7 +1943,10 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx) dev->stats.rx_bytes += rx->skb->len; if (local->ps_sdata && local->hw.conf.dynamic_ps_timeout > 0 && - !is_multicast_ether_addr(((struct ethhdr *)rx->skb->data)->h_dest)) { + !is_multicast_ether_addr( + ((struct ethhdr *)rx->skb->data)->h_dest) && + (!local->scanning && + !test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state))) { mod_timer(&local->dynamic_ps_timer, jiffies + msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout)); } @@ -2590,7 +2643,8 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx, return 0; if (!multicast && compare_ether_addr(sdata->vif.addr, hdr->addr1) != 0) { - if (!(sdata->dev->flags & IFF_PROMISC)) + if (!(sdata->dev->flags & IFF_PROMISC) || + sdata->u.mgd.use_4addr) return 0; status->rx_flags &= ~IEEE80211_RX_RA_MATCH; } @@ -2639,7 +2693,8 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx, return 0; } else if (!ieee80211_bssid_match(bssid, sdata->vif.addr)) { - if (!(status->rx_flags & IEEE80211_RX_IN_SCAN)) + if (!(status->rx_flags & IEEE80211_RX_IN_SCAN) && + !ieee80211_is_beacon(hdr->frame_control)) return 0; status->rx_flags &= ~IEEE80211_RX_RA_MATCH; } @@ -2692,7 +2747,7 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx, if (!skb) { if (net_ratelimit()) wiphy_debug(local->hw.wiphy, - "failed to copy multicast frame for %s\n", + "failed to copy skb for %s\n", sdata->name); return true; } @@ -2730,7 +2785,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, local->dot11ReceivedFragmentCount++; if (unlikely(test_bit(SCAN_HW_SCANNING, &local->scanning) || - test_bit(SCAN_OFF_CHANNEL, &local->scanning))) + test_bit(SCAN_SW_SCANNING, &local->scanning))) status->rx_flags |= IEEE80211_RX_IN_SCAN; if (ieee80211_is_mgmt(fc)) diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index fb274db77e3c..0ea6adae3e06 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -196,7 +196,8 @@ ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) ieee802_11_parse_elems(elements, skb->len - baselen, &elems); if (elems.ds_params && elems.ds_params_len == 1) - freq = ieee80211_channel_to_frequency(elems.ds_params[0]); + freq = ieee80211_channel_to_frequency(elems.ds_params[0], + rx_status->band); else freq = rx_status->freq; @@ -211,6 +212,14 @@ ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) if (bss) ieee80211_rx_bss_put(sdata->local, bss); + /* If we are on-operating-channel, and this packet is for the + * current channel, pass the pkt on up the stack so that + * the rest of the stack can make use of it. + */ + if (ieee80211_cfg_on_oper_channel(sdata->local) + && (channel == sdata->local->oper_channel)) + return RX_CONTINUE; + dev_kfree_skb(skb); return RX_QUEUED; } @@ -292,15 +301,31 @@ static void __ieee80211_scan_completed_finish(struct ieee80211_hw *hw, bool was_hw_scan) { struct ieee80211_local *local = hw_to_local(hw); + bool on_oper_chan; + bool enable_beacons = false; + + mutex_lock(&local->mtx); + on_oper_chan = ieee80211_cfg_on_oper_channel(local); + + if (was_hw_scan || !on_oper_chan) { + if (WARN_ON(local->scan_channel)) + local->scan_channel = NULL; + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); + } - ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); if (!was_hw_scan) { + bool on_oper_chan2; ieee80211_configure_filter(local); drv_sw_scan_complete(local); - ieee80211_offchannel_return(local, true); + on_oper_chan2 = ieee80211_cfg_on_oper_channel(local); + /* We should always be on-channel at this point. */ + WARN_ON(!on_oper_chan2); + if (on_oper_chan2 && (on_oper_chan != on_oper_chan2)) + enable_beacons = true; + + ieee80211_offchannel_return(local, enable_beacons, true); } - mutex_lock(&local->mtx); ieee80211_recalc_idle(local); mutex_unlock(&local->mtx); @@ -340,13 +365,15 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local) */ drv_sw_scan_start(local); - ieee80211_offchannel_stop_beaconing(local); - local->leave_oper_channel_time = 0; local->next_scan_state = SCAN_DECISION; local->scan_channel_idx = 0; - drv_flush(local, false); + /* We always want to use off-channel PS, even if we + * are not really leaving oper-channel. Don't + * tell the AP though, as long as we are on-channel. + */ + ieee80211_offchannel_enable_all_ps(local, false); ieee80211_configure_filter(local); @@ -486,7 +513,21 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local, } mutex_unlock(&local->iflist_mtx); - if (local->scan_channel) { + next_chan = local->scan_req->channels[local->scan_channel_idx]; + + if (ieee80211_cfg_on_oper_channel(local)) { + /* We're currently on operating channel. */ + if ((next_chan == local->oper_channel) && + (local->_oper_channel_type == NL80211_CHAN_NO_HT)) + /* We don't need to move off of operating channel. */ + local->next_scan_state = SCAN_SET_CHANNEL; + else + /* + * We do need to leave operating channel, as next + * scan is somewhere else. + */ + local->next_scan_state = SCAN_LEAVE_OPER_CHANNEL; + } else { /* * we're currently scanning a different channel, let's * see if we can scan another channel without interfering @@ -502,7 +543,6 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local, * * Otherwise switch back to the operating channel. */ - next_chan = local->scan_req->channels[local->scan_channel_idx]; bad_latency = time_after(jiffies + ieee80211_scan_get_channel_time(next_chan), @@ -520,12 +560,6 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local, local->next_scan_state = SCAN_ENTER_OPER_CHANNEL; else local->next_scan_state = SCAN_SET_CHANNEL; - } else { - /* - * we're on the operating channel currently, let's - * leave that channel now to scan another one - */ - local->next_scan_state = SCAN_LEAVE_OPER_CHANNEL; } *next_delay = 0; @@ -534,9 +568,10 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local, static void ieee80211_scan_state_leave_oper_channel(struct ieee80211_local *local, unsigned long *next_delay) { - ieee80211_offchannel_stop_station(local); - - __set_bit(SCAN_OFF_CHANNEL, &local->scanning); + /* PS will already be in off-channel mode, + * we do that once at the beginning of scanning. + */ + ieee80211_offchannel_stop_vifs(local, false); /* * What if the nullfunc frames didn't arrive? @@ -559,15 +594,15 @@ static void ieee80211_scan_state_enter_oper_channel(struct ieee80211_local *loca { /* switch back to the operating channel */ local->scan_channel = NULL; - ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); + if (!ieee80211_cfg_on_oper_channel(local)) + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); /* - * Only re-enable station mode interface now; beaconing will be - * re-enabled once the full scan has been completed. + * Re-enable vifs and beaconing. Leave PS + * in off-channel state..will put that back + * on-channel at the end of scanning. */ - ieee80211_offchannel_return(local, false); - - __clear_bit(SCAN_OFF_CHANNEL, &local->scanning); + ieee80211_offchannel_return(local, true, false); *next_delay = HZ / 5; local->next_scan_state = SCAN_DECISION; @@ -583,8 +618,12 @@ static void ieee80211_scan_state_set_channel(struct ieee80211_local *local, chan = local->scan_req->channels[local->scan_channel_idx]; local->scan_channel = chan; - if (ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL)) - skip = 1; + + /* Only call hw-config if we really need to change channels. */ + if ((chan != local->hw.conf.channel) || + (local->hw.conf.channel_type != NL80211_CHAN_NO_HT)) + if (ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL)) + skip = 1; /* advance state machine to next channel/band */ local->scan_channel_idx++; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index c426504ed1cf..5a11078827ab 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -899,7 +899,8 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) struct ieee80211_local *local = sdata->local; int sent, buffered; - drv_sta_notify(local, sdata, STA_NOTIFY_AWAKE, &sta->sta); + if (!(local->hw.flags & IEEE80211_HW_AP_LINK_PS)) + drv_sta_notify(local, sdata, STA_NOTIFY_AWAKE, &sta->sta); if (!skb_queue_empty(&sta->ps_tx_buf)) sta_info_clear_tim_bit(sta); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index bbdd2a86a94b..ca0b69060ef7 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -82,6 +82,7 @@ enum ieee80211_sta_info_flags { * @state: session state (see above) * @stop_initiator: initiator of a session stop * @tx_stop: TX DelBA frame when stopping + * @buf_size: reorder buffer size at receiver * * This structure's lifetime is managed by RCU, assignments to * the array holding it must hold the aggregation mutex. @@ -101,6 +102,7 @@ struct tid_ampdu_tx { u8 dialog_token; u8 stop_initiator; bool tx_stop; + u8 buf_size; }; /** diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 38a797217a91..010a559bd872 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -98,6 +98,10 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, * (b) always process RX events before TX status events if ordering * can be unknown, for example with different interrupt status * bits. + * (c) if PS mode transitions are manual (i.e. the flag + * %IEEE80211_HW_AP_LINK_PS is set), always process PS state + * changes before calling TX status events if ordering can be + * unknown. */ if (test_sta_flags(sta, WLAN_STA_PS_STA) && skb_queue_len(&sta->tx_filtered) < STA_MAX_TX_BUFFER) { @@ -323,6 +327,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX) { struct ieee80211_work *wk; + u64 cookie = (unsigned long)skb; rcu_read_lock(); list_for_each_entry_rcu(wk, &local->work_list, list) { @@ -334,8 +339,12 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) break; } rcu_read_unlock(); + if (local->hw_roc_skb_for_status == skb) { + cookie = local->hw_roc_cookie ^ 2; + local->hw_roc_skb_for_status = NULL; + } cfg80211_mgmt_tx_status( - skb->dev, (unsigned long) skb, skb->data, skb->len, + skb->dev, cookie, skb->data, skb->len, !!(info->flags & IEEE80211_TX_STAT_ACK), GFP_ATOMIC); } diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 5950e3abead9..17ef4f4e8602 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -257,7 +257,8 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx) if (unlikely(info->flags & IEEE80211_TX_CTL_INJECTED)) return TX_CONTINUE; - if (unlikely(test_bit(SCAN_OFF_CHANNEL, &tx->local->scanning)) && + if (unlikely(test_bit(SCAN_SW_SCANNING, &tx->local->scanning)) && + test_bit(SDATA_STATE_OFFCHANNEL, &tx->sdata->state) && !ieee80211_is_probe_req(hdr->frame_control) && !ieee80211_is_nullfunc(hdr->frame_control)) /* @@ -1394,7 +1395,8 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx) /* handlers after fragment must be aware of tx info fragmentation! */ CALL_TXH(ieee80211_tx_h_stats); CALL_TXH(ieee80211_tx_h_encrypt); - CALL_TXH(ieee80211_tx_h_calculate_duration); + if (!(tx->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL)) + CALL_TXH(ieee80211_tx_h_calculate_duration); #undef CALL_TXH txh_done: @@ -1547,7 +1549,7 @@ static int ieee80211_skb_resize(struct ieee80211_local *local, skb_orphan(skb); } - if (skb_header_cloned(skb)) + if (skb_cloned(skb)) I802_DEBUG_INC(local->tx_expand_skb_head_cloned); else if (head_need || tail_need) I802_DEBUG_INC(local->tx_expand_skb_head); @@ -1750,7 +1752,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, __le16 fc; struct ieee80211_hdr hdr; struct ieee80211s_hdr mesh_hdr __maybe_unused; - struct mesh_path *mppath = NULL; + struct mesh_path __maybe_unused *mppath = NULL; const u8 *encaps_data; int encaps_len, skip_header_bytes; int nh_pos, h_pos; @@ -1815,19 +1817,19 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, mppath = mpp_path_lookup(skb->data, sdata); /* - * Do not use address extension, if it is a packet from - * the same interface and the destination is not being - * proxied by any other mest point. + * Use address extension if it is a packet from + * another interface or if we know the destination + * is being proxied by a portal (i.e. portal address + * differs from proxied address) */ if (compare_ether_addr(sdata->vif.addr, skb->data + ETH_ALEN) == 0 && - (!mppath || !compare_ether_addr(mppath->mpp, skb->data))) { + !(mppath && compare_ether_addr(mppath->mpp, skb->data))) { hdrlen = ieee80211_fill_mesh_addresses(&hdr, &fc, skb->data, skb->data + ETH_ALEN); meshhdrlen = ieee80211_new_mesh_header(&mesh_hdr, sdata, NULL, NULL); } else { - /* packet from other interface */ int is_mesh_mcast = 1; const u8 *mesh_da; @@ -2178,6 +2180,8 @@ static void ieee80211_beacon_add_tim(struct ieee80211_if_ap *bss, if (bss->dtim_count == 0 && !skb_queue_empty(&bss->ps_bc_buf)) aid0 = 1; + bss->dtim_bc_mc = aid0 == 1; + if (have_bits) { /* Find largest even number N1 so that bits numbered 1 through * (N1 x 8) - 1 in the bitmap are 0 and number N2 so that bits @@ -2230,6 +2234,9 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, sdata = vif_to_sdata(vif); + if (!ieee80211_sdata_running(sdata)) + goto out; + if (tim_offset) *tim_offset = 0; if (tim_length) @@ -2238,7 +2245,7 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, if (sdata->vif.type == NL80211_IFTYPE_AP) { ap = &sdata->u.ap; beacon = rcu_dereference(ap->beacon); - if (ap && beacon) { + if (beacon) { /* * headroom, head length, * tail length and maximum TIM length @@ -2299,6 +2306,11 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, struct ieee80211_mgmt *mgmt; u8 *pos; +#ifdef CONFIG_MAC80211_MESH + if (!sdata->u.mesh.mesh_id_len) + goto out; +#endif + /* headroom, head length, tail length and maximum TIM length */ skb = dev_alloc_skb(local->tx_headroom + 400 + sdata->u.mesh.vendor_ie_len); @@ -2540,7 +2552,7 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw, if (sdata->vif.type != NL80211_IFTYPE_AP || !beacon || !beacon->head) goto out; - if (bss->dtim_count != 0) + if (bss->dtim_count != 0 || !bss->dtim_bc_mc) goto out; /* send buffered bc/mc only after DTIM beacon */ while (1) { diff --git a/net/mac80211/work.c b/net/mac80211/work.c index 36305e0d06ef..6bf787a5b38a 100644 --- a/net/mac80211/work.c +++ b/net/mac80211/work.c @@ -924,18 +924,44 @@ static void ieee80211_work_work(struct work_struct *work) } if (!started && !local->tmp_channel) { - /* - * TODO: could optimize this by leaving the - * station vifs in awake mode if they - * happen to be on the same channel as - * the requested channel - */ - ieee80211_offchannel_stop_beaconing(local); - ieee80211_offchannel_stop_station(local); + bool on_oper_chan; + bool tmp_chan_changed = false; + bool on_oper_chan2; + on_oper_chan = ieee80211_cfg_on_oper_channel(local); + if (local->tmp_channel) + if ((local->tmp_channel != wk->chan) || + (local->tmp_channel_type != wk->chan_type)) + tmp_chan_changed = true; local->tmp_channel = wk->chan; local->tmp_channel_type = wk->chan_type; - ieee80211_hw_config(local, 0); + /* + * Leave the station vifs in awake mode if they + * happen to be on the same channel as + * the requested channel. + */ + on_oper_chan2 = ieee80211_cfg_on_oper_channel(local); + if (on_oper_chan != on_oper_chan2) { + if (on_oper_chan2) { + /* going off oper channel, PS too */ + ieee80211_offchannel_stop_vifs(local, + true); + ieee80211_hw_config(local, 0); + } else { + /* going on channel, but leave PS + * off-channel. */ + ieee80211_hw_config(local, 0); + ieee80211_offchannel_return(local, + true, + false); + } + } else if (tmp_chan_changed) + /* Still off-channel, but on some other + * channel, so update hardware. + * PS should already be off-channel. + */ + ieee80211_hw_config(local, 0); + started = true; wk->timeout = jiffies; } @@ -1011,9 +1037,27 @@ static void ieee80211_work_work(struct work_struct *work) } if (!remain_off_channel && local->tmp_channel) { + bool on_oper_chan = ieee80211_cfg_on_oper_channel(local); local->tmp_channel = NULL; - ieee80211_hw_config(local, 0); - ieee80211_offchannel_return(local, true); + /* If tmp_channel wasn't operating channel, then + * we need to go back on-channel. + * NOTE: If we can ever be here while scannning, + * or if the hw_config() channel config logic changes, + * then we may need to do a more thorough check to see if + * we still need to do a hardware config. Currently, + * we cannot be here while scanning, however. + */ + if (ieee80211_cfg_on_oper_channel(local) && !on_oper_chan) + ieee80211_hw_config(local, 0); + + /* At the least, we need to disable offchannel_ps, + * so just go ahead and run the entire offchannel + * return logic here. We *could* skip enabling + * beaconing if we were already on-oper-channel + * as a future optimization. + */ + ieee80211_offchannel_return(local, true, true); + /* give connection some time to breathe */ run_again(local, jiffies + HZ/2); } diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index bee230d8fd11..f1765de2f4bf 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -26,13 +26,12 @@ ieee80211_tx_result ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx) { - u8 *data, *key, *mic, key_offset; + u8 *data, *key, *mic; size_t data_len; unsigned int hdrlen; struct ieee80211_hdr *hdr; struct sk_buff *skb = tx->skb; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - int authenticator; int tail; hdr = (struct ieee80211_hdr *)skb->data; @@ -47,6 +46,11 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx) data = skb->data + hdrlen; data_len = skb->len - hdrlen; + if (unlikely(info->flags & IEEE80211_TX_INTFL_TKIP_MIC_FAILURE)) { + /* Need to use software crypto for the test */ + info->control.hw_key = NULL; + } + if (info->control.hw_key && !(tx->flags & IEEE80211_TX_FRAGMENTED) && !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC)) { @@ -62,17 +66,11 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx) skb_headroom(skb) < TKIP_IV_LEN)) return TX_DROP; -#if 0 - authenticator = fc & IEEE80211_FCTL_FROMDS; /* FIX */ -#else - authenticator = 1; -#endif - key_offset = authenticator ? - NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY : - NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY; - key = &tx->key->conf.key[key_offset]; + key = &tx->key->conf.key[NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY]; mic = skb_put(skb, MICHAEL_MIC_LEN); michael_mic(key, hdr, data, data_len, mic); + if (unlikely(info->flags & IEEE80211_TX_INTFL_TKIP_MIC_FAILURE)) + mic[0]++; return TX_CONTINUE; } @@ -81,14 +79,13 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx) ieee80211_rx_result ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx) { - u8 *data, *key = NULL, key_offset; + u8 *data, *key = NULL; size_t data_len; unsigned int hdrlen; u8 mic[MICHAEL_MIC_LEN]; struct sk_buff *skb = rx->skb; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; - int authenticator = 1, wpa_test = 0; /* No way to verify the MIC if the hardware stripped it */ if (status->flag & RX_FLAG_MMIC_STRIPPED) @@ -106,17 +103,9 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx) data = skb->data + hdrlen; data_len = skb->len - hdrlen - MICHAEL_MIC_LEN; -#if 0 - authenticator = fc & IEEE80211_FCTL_TODS; /* FIX */ -#else - authenticator = 1; -#endif - key_offset = authenticator ? - NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY : - NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY; - key = &rx->key->conf.key[key_offset]; + key = &rx->key->conf.key[NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY]; michael_mic(key, hdr, data, data_len, mic); - if (memcmp(mic, data + data_len, MICHAEL_MIC_LEN) != 0 || wpa_test) { + if (memcmp(mic, data + data_len, MICHAEL_MIC_LEN) != 0) { if (!(status->rx_flags & IEEE80211_RX_RA_MATCH)) return RX_DROP_UNUSABLE; @@ -208,7 +197,7 @@ ieee80211_rx_result ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data; - int hdrlen, res, hwaccel = 0, wpa_test = 0; + int hdrlen, res, hwaccel = 0; struct ieee80211_key *key = rx->key; struct sk_buff *skb = rx->skb; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); @@ -235,7 +224,7 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx) hdr->addr1, hwaccel, rx->queue, &rx->tkip_iv32, &rx->tkip_iv16); - if (res != TKIP_DECRYPT_OK || wpa_test) + if (res != TKIP_DECRYPT_OK) return RX_DROP_UNUSABLE; /* Trim ICV */ diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 5702de35e2bb..63a1b915a7e4 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -63,6 +63,9 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct) * this does not harm and it happens very rarely. */ unsigned long missed = e->missed; + if (!((events | missed) & e->ctmask)) + goto out_unlock; + ret = notify->fcn(events | missed, &item); if (unlikely(ret < 0 || missed)) { spin_lock_bh(&ct->lock); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index cb1a8190ba29..30bf8a167fc8 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -710,6 +710,7 @@ restart: if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, IPCTNL_MSG_CT_NEW, ct) < 0) { + nf_conntrack_get(&ct->ct_general); cb->args[1] = (unsigned long)ct; goto out; } @@ -967,7 +968,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, u16 zone; int err; - if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) + if (nlh->nlmsg_flags & NLM_F_DUMP) return netlink_dump_start(ctnl, skb, nlh, ctnetlink_dump_table, ctnetlink_done); @@ -1832,7 +1833,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, u16 zone; int err; - if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) { + if (nlh->nlmsg_flags & NLM_F_DUMP) { return netlink_dump_start(ctnl, skb, nlh, ctnetlink_exp_dump_table, ctnetlink_exp_done); diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c index d3eb5ed1892f..b46626cddd93 100644 --- a/net/netfilter/xt_iprange.c +++ b/net/netfilter/xt_iprange.c @@ -53,15 +53,13 @@ iprange_mt4(const struct sk_buff *skb, struct xt_action_param *par) } static inline int -iprange_ipv6_sub(const struct in6_addr *a, const struct in6_addr *b) +iprange_ipv6_lt(const struct in6_addr *a, const struct in6_addr *b) { unsigned int i; - int r; for (i = 0; i < 4; ++i) { - r = ntohl(a->s6_addr32[i]) - ntohl(b->s6_addr32[i]); - if (r != 0) - return r; + if (a->s6_addr32[i] != b->s6_addr32[i]) + return ntohl(a->s6_addr32[i]) < ntohl(b->s6_addr32[i]); } return 0; @@ -75,8 +73,8 @@ iprange_mt6(const struct sk_buff *skb, struct xt_action_param *par) bool m; if (info->flags & IPRANGE_SRC) { - m = iprange_ipv6_sub(&iph->saddr, &info->src_min.in6) < 0; - m |= iprange_ipv6_sub(&iph->saddr, &info->src_max.in6) > 0; + m = iprange_ipv6_lt(&iph->saddr, &info->src_min.in6); + m |= iprange_ipv6_lt(&info->src_max.in6, &iph->saddr); m ^= !!(info->flags & IPRANGE_SRC_INV); if (m) { pr_debug("src IP %pI6 NOT in range %s%pI6-%pI6\n", @@ -88,8 +86,8 @@ iprange_mt6(const struct sk_buff *skb, struct xt_action_param *par) } } if (info->flags & IPRANGE_DST) { - m = iprange_ipv6_sub(&iph->daddr, &info->dst_min.in6) < 0; - m |= iprange_ipv6_sub(&iph->daddr, &info->dst_max.in6) > 0; + m = iprange_ipv6_lt(&iph->daddr, &info->dst_min.in6); + m |= iprange_ipv6_lt(&info->dst_max.in6, &iph->daddr); m ^= !!(info->flags & IPRANGE_DST_INV); if (m) { pr_debug("dst IP %pI6 NOT in range %s%pI6-%pI6\n", diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index f83cb370292b..1781d99145e2 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -519,7 +519,7 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) security_netlink_recv(skb, CAP_NET_ADMIN)) return -EPERM; - if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) { + if (nlh->nlmsg_flags & NLM_F_DUMP) { if (ops->dumpit == NULL) return -EOPNOTSUPP; diff --git a/net/rfkill/Kconfig b/net/rfkill/Kconfig index eaf765876458..7fce6dfd2180 100644 --- a/net/rfkill/Kconfig +++ b/net/rfkill/Kconfig @@ -18,7 +18,7 @@ config RFKILL_LEDS default y config RFKILL_INPUT - bool "RF switch input support" if EMBEDDED + bool "RF switch input support" if EXPERT depends on RFKILL depends on INPUT = y || RFKILL = INPUT - default y if !EMBEDDED + default y if !EXPERT diff --git a/net/sched/Kconfig b/net/sched/Kconfig index e318f458713e..8c19b6e3201e 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -217,6 +217,17 @@ config NET_SCH_MQPRIO If unsure, say N. +config NET_SCH_CHOKE + tristate "CHOose and Keep responsive flow scheduler (CHOKE)" + help + Say Y here if you want to use the CHOKe packet scheduler (CHOose + and Keep for responsive flows, CHOose and Kill for unresponsive + flows). This is a variation of RED which trys to penalize flows + that monopolize the queue. + + To compile this code as a module, choose M here: the + module will be called sch_choke. + config NET_SCH_INGRESS tristate "Ingress Qdisc" depends on NET_CLS_ACT diff --git a/net/sched/Makefile b/net/sched/Makefile index 26ce681a2c60..06c6cdfd1948 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -33,6 +33,8 @@ obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o obj-$(CONFIG_NET_SCH_MQPRIO) += sch_mqprio.o +obj-$(CONFIG_NET_SCH_CHOKE) += sch_choke.o + obj-$(CONFIG_NET_CLS_U32) += cls_u32.o obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o obj-$(CONFIG_NET_CLS_FW) += cls_fw.o diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 36ac0ec81ce0..150741579408 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -398,6 +398,11 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt) return stab; } +static void stab_kfree_rcu(struct rcu_head *head) +{ + kfree(container_of(head, struct qdisc_size_table, rcu)); +} + void qdisc_put_stab(struct qdisc_size_table *tab) { if (!tab) @@ -407,7 +412,7 @@ void qdisc_put_stab(struct qdisc_size_table *tab) if (--tab->refcnt == 0) { list_del(&tab->list); - kfree(tab); + call_rcu_bh(&tab->rcu, stab_kfree_rcu); } spin_unlock(&qdisc_stab_lock); @@ -430,7 +435,7 @@ nla_put_failure: return -1; } -void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab) +void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab) { int pkt_len, slot; @@ -456,7 +461,7 @@ out: pkt_len = 1; qdisc_skb_cb(skb)->pkt_len = pkt_len; } -EXPORT_SYMBOL(qdisc_calculate_pkt_len); +EXPORT_SYMBOL(__qdisc_calculate_pkt_len); void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc) { @@ -473,7 +478,7 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, timer); - wd->qdisc->flags &= ~TCQ_F_THROTTLED; + qdisc_unthrottled(wd->qdisc); __netif_schedule(qdisc_root(wd->qdisc)); return HRTIMER_NORESTART; @@ -495,7 +500,7 @@ void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires) &qdisc_root_sleeping(wd->qdisc)->state)) return; - wd->qdisc->flags |= TCQ_F_THROTTLED; + qdisc_throttled(wd->qdisc); time = ktime_set(0, 0); time = ktime_add_ns(time, PSCHED_TICKS2NS(expires)); hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS); @@ -505,7 +510,7 @@ EXPORT_SYMBOL(qdisc_watchdog_schedule); void qdisc_watchdog_cancel(struct qdisc_watchdog *wd) { hrtimer_cancel(&wd->timer); - wd->qdisc->flags &= ~TCQ_F_THROTTLED; + qdisc_unthrottled(wd->qdisc); } EXPORT_SYMBOL(qdisc_watchdog_cancel); @@ -835,7 +840,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, err = PTR_ERR(stab); goto err_out4; } - sch->stab = stab; + rcu_assign_pointer(sch->stab, stab); } if (tca[TCA_RATE]) { spinlock_t *root_lock; @@ -875,7 +880,7 @@ err_out4: * Any broken qdiscs that would require a ops->reset() here? * The qdisc was never in action so it shouldn't be necessary. */ - qdisc_put_stab(sch->stab); + qdisc_put_stab(rtnl_dereference(sch->stab)); if (ops->destroy) ops->destroy(sch); goto err_out3; @@ -883,7 +888,7 @@ err_out4: static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) { - struct qdisc_size_table *stab = NULL; + struct qdisc_size_table *ostab, *stab = NULL; int err = 0; if (tca[TCA_OPTIONS]) { @@ -900,8 +905,9 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) return PTR_ERR(stab); } - qdisc_put_stab(sch->stab); - sch->stab = stab; + ostab = rtnl_dereference(sch->stab); + rcu_assign_pointer(sch->stab, stab); + qdisc_put_stab(ostab); if (tca[TCA_RATE]) { /* NB: ignores errors from replace_estimator @@ -1180,6 +1186,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, struct nlmsghdr *nlh; unsigned char *b = skb_tail_pointer(skb); struct gnet_dump d; + struct qdisc_size_table *stab; nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags); tcm = NLMSG_DATA(nlh); @@ -1195,7 +1202,8 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, goto nla_put_failure; q->qstats.qlen = q->q.qlen; - if (q->stab && qdisc_dump_stab(skb, q->stab) < 0) + stab = rtnl_dereference(q->stab); + if (stab && qdisc_dump_stab(skb, stab) < 0) goto nla_put_failure; if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 4aaf44c95c52..24d94c097b35 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -351,7 +351,7 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl) { int toplevel = q->toplevel; - if (toplevel > cl->level && !(cl->q->flags & TCQ_F_THROTTLED)) { + if (toplevel > cl->level && !(qdisc_is_throttled(cl->q))) { psched_time_t now; psched_tdiff_t incr; @@ -391,7 +391,6 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch) ret = qdisc_enqueue(skb, cl->q); if (ret == NET_XMIT_SUCCESS) { sch->q.qlen++; - qdisc_bstats_update(sch, skb); cbq_mark_toplevel(q, cl); if (!cl->next_alive) cbq_activate_class(cl); @@ -625,7 +624,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer) hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS); } - sch->flags &= ~TCQ_F_THROTTLED; + qdisc_unthrottled(sch); __netif_schedule(qdisc_root(sch)); return HRTIMER_NORESTART; } @@ -650,7 +649,6 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child) ret = qdisc_enqueue(skb, cl->q); if (ret == NET_XMIT_SUCCESS) { sch->q.qlen++; - qdisc_bstats_update(sch, skb); if (!cl->next_alive) cbq_activate_class(cl); return 0; @@ -973,8 +971,9 @@ cbq_dequeue(struct Qdisc *sch) skb = cbq_dequeue_1(sch); if (skb) { + qdisc_bstats_update(sch, skb); sch->q.qlen--; - sch->flags &= ~TCQ_F_THROTTLED; + qdisc_unthrottled(sch); return skb; } diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c new file mode 100644 index 000000000000..ee1e2090eebe --- /dev/null +++ b/net/sched/sch_choke.c @@ -0,0 +1,677 @@ +/* + * net/sched/sch_choke.c CHOKE scheduler + * + * Copyright (c) 2011 Stephen Hemminger <shemminger@vyatta.com> + * Copyright (c) 2011 Eric Dumazet <eric.dumazet@gmail.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + */ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/skbuff.h> +#include <linux/reciprocal_div.h> +#include <linux/vmalloc.h> +#include <net/pkt_sched.h> +#include <net/inet_ecn.h> +#include <net/red.h> +#include <linux/ip.h> +#include <net/ip.h> +#include <linux/ipv6.h> +#include <net/ipv6.h> + +/* + CHOKe stateless AQM for fair bandwidth allocation + ================================================= + + CHOKe (CHOose and Keep for responsive flows, CHOose and Kill for + unresponsive flows) is a variant of RED that penalizes misbehaving flows but + maintains no flow state. The difference from RED is an additional step + during the enqueuing process. If average queue size is over the + low threshold (qmin), a packet is chosen at random from the queue. + If both the new and chosen packet are from the same flow, both + are dropped. Unlike RED, CHOKe is not really a "classful" qdisc because it + needs to access packets in queue randomly. It has a minimal class + interface to allow overriding the builtin flow classifier with + filters. + + Source: + R. Pan, B. Prabhakar, and K. Psounis, "CHOKe, A Stateless + Active Queue Management Scheme for Approximating Fair Bandwidth Allocation", + IEEE INFOCOM, 2000. + + A. Tang, J. Wang, S. Low, "Understanding CHOKe: Throughput and Spatial + Characteristics", IEEE/ACM Transactions on Networking, 2004 + + */ + +/* Upper bound on size of sk_buff table (packets) */ +#define CHOKE_MAX_QUEUE (128*1024 - 1) + +struct choke_sched_data { +/* Parameters */ + u32 limit; + unsigned char flags; + + struct red_parms parms; + +/* Variables */ + struct tcf_proto *filter_list; + struct { + u32 prob_drop; /* Early probability drops */ + u32 prob_mark; /* Early probability marks */ + u32 forced_drop; /* Forced drops, qavg > max_thresh */ + u32 forced_mark; /* Forced marks, qavg > max_thresh */ + u32 pdrop; /* Drops due to queue limits */ + u32 other; /* Drops due to drop() calls */ + u32 matched; /* Drops to flow match */ + } stats; + + unsigned int head; + unsigned int tail; + + unsigned int tab_mask; /* size - 1 */ + + struct sk_buff **tab; +}; + +/* deliver a random number between 0 and N - 1 */ +static u32 random_N(unsigned int N) +{ + return reciprocal_divide(random32(), N); +} + +/* number of elements in queue including holes */ +static unsigned int choke_len(const struct choke_sched_data *q) +{ + return (q->tail - q->head) & q->tab_mask; +} + +/* Is ECN parameter configured */ +static int use_ecn(const struct choke_sched_data *q) +{ + return q->flags & TC_RED_ECN; +} + +/* Should packets over max just be dropped (versus marked) */ +static int use_harddrop(const struct choke_sched_data *q) +{ + return q->flags & TC_RED_HARDDROP; +} + +/* Move head pointer forward to skip over holes */ +static void choke_zap_head_holes(struct choke_sched_data *q) +{ + do { + q->head = (q->head + 1) & q->tab_mask; + if (q->head == q->tail) + break; + } while (q->tab[q->head] == NULL); +} + +/* Move tail pointer backwards to reuse holes */ +static void choke_zap_tail_holes(struct choke_sched_data *q) +{ + do { + q->tail = (q->tail - 1) & q->tab_mask; + if (q->head == q->tail) + break; + } while (q->tab[q->tail] == NULL); +} + +/* Drop packet from queue array by creating a "hole" */ +static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx) +{ + struct choke_sched_data *q = qdisc_priv(sch); + struct sk_buff *skb = q->tab[idx]; + + q->tab[idx] = NULL; + + if (idx == q->head) + choke_zap_head_holes(q); + if (idx == q->tail) + choke_zap_tail_holes(q); + + sch->qstats.backlog -= qdisc_pkt_len(skb); + qdisc_drop(skb, sch); + qdisc_tree_decrease_qlen(sch, 1); + --sch->q.qlen; +} + +/* + * Compare flow of two packets + * Returns true only if source and destination address and port match. + * false for special cases + */ +static bool choke_match_flow(struct sk_buff *skb1, + struct sk_buff *skb2) +{ + int off1, off2, poff; + const u32 *ports1, *ports2; + u8 ip_proto; + __u32 hash1; + + if (skb1->protocol != skb2->protocol) + return false; + + /* Use hash value as quick check + * Assumes that __skb_get_rxhash makes IP header and ports linear + */ + hash1 = skb_get_rxhash(skb1); + if (!hash1 || hash1 != skb_get_rxhash(skb2)) + return false; + + /* Probably match, but be sure to avoid hash collisions */ + off1 = skb_network_offset(skb1); + off2 = skb_network_offset(skb2); + + switch (skb1->protocol) { + case __constant_htons(ETH_P_IP): { + const struct iphdr *ip1, *ip2; + + ip1 = (const struct iphdr *) (skb1->data + off1); + ip2 = (const struct iphdr *) (skb2->data + off2); + + ip_proto = ip1->protocol; + if (ip_proto != ip2->protocol || + ip1->saddr != ip2->saddr || ip1->daddr != ip2->daddr) + return false; + + if ((ip1->frag_off | ip2->frag_off) & htons(IP_MF | IP_OFFSET)) + ip_proto = 0; + off1 += ip1->ihl * 4; + off2 += ip2->ihl * 4; + break; + } + + case __constant_htons(ETH_P_IPV6): { + const struct ipv6hdr *ip1, *ip2; + + ip1 = (const struct ipv6hdr *) (skb1->data + off1); + ip2 = (const struct ipv6hdr *) (skb2->data + off2); + + ip_proto = ip1->nexthdr; + if (ip_proto != ip2->nexthdr || + ipv6_addr_cmp(&ip1->saddr, &ip2->saddr) || + ipv6_addr_cmp(&ip1->daddr, &ip2->daddr)) + return false; + off1 += 40; + off2 += 40; + } + + default: /* Maybe compare MAC header here? */ + return false; + } + + poff = proto_ports_offset(ip_proto); + if (poff < 0) + return true; + + off1 += poff; + off2 += poff; + + ports1 = (__force u32 *)(skb1->data + off1); + ports2 = (__force u32 *)(skb2->data + off2); + return *ports1 == *ports2; +} + +static inline void choke_set_classid(struct sk_buff *skb, u16 classid) +{ + *(unsigned int *)(qdisc_skb_cb(skb)->data) = classid; +} + +static u16 choke_get_classid(const struct sk_buff *skb) +{ + return *(unsigned int *)(qdisc_skb_cb(skb)->data); +} + +/* + * Classify flow using either: + * 1. pre-existing classification result in skb + * 2. fast internal classification + * 3. use TC filter based classification + */ +static bool choke_classify(struct sk_buff *skb, + struct Qdisc *sch, int *qerr) + +{ + struct choke_sched_data *q = qdisc_priv(sch); + struct tcf_result res; + int result; + + result = tc_classify(skb, q->filter_list, &res); + if (result >= 0) { +#ifdef CONFIG_NET_CLS_ACT + switch (result) { + case TC_ACT_STOLEN: + case TC_ACT_QUEUED: + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + case TC_ACT_SHOT: + return false; + } +#endif + choke_set_classid(skb, TC_H_MIN(res.classid)); + return true; + } + + return false; +} + +/* + * Select a packet at random from queue + * HACK: since queue can have holes from previous deletion; retry several + * times to find a random skb but then just give up and return the head + * Will return NULL if queue is empty (q->head == q->tail) + */ +static struct sk_buff *choke_peek_random(const struct choke_sched_data *q, + unsigned int *pidx) +{ + struct sk_buff *skb; + int retrys = 3; + + do { + *pidx = (q->head + random_N(choke_len(q))) & q->tab_mask; + skb = q->tab[*pidx]; + if (skb) + return skb; + } while (--retrys > 0); + + return q->tab[*pidx = q->head]; +} + +/* + * Compare new packet with random packet in queue + * returns true if matched and sets *pidx + */ +static bool choke_match_random(const struct choke_sched_data *q, + struct sk_buff *nskb, + unsigned int *pidx) +{ + struct sk_buff *oskb; + + if (q->head == q->tail) + return false; + + oskb = choke_peek_random(q, pidx); + if (q->filter_list) + return choke_get_classid(nskb) == choke_get_classid(oskb); + + return choke_match_flow(oskb, nskb); +} + +static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch) +{ + struct choke_sched_data *q = qdisc_priv(sch); + struct red_parms *p = &q->parms; + int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; + + if (q->filter_list) { + /* If using external classifiers, get result and record it. */ + if (!choke_classify(skb, sch, &ret)) + goto other_drop; /* Packet was eaten by filter */ + } + + /* Compute average queue usage (see RED) */ + p->qavg = red_calc_qavg(p, sch->q.qlen); + if (red_is_idling(p)) + red_end_of_idle_period(p); + + /* Is queue small? */ + if (p->qavg <= p->qth_min) + p->qcount = -1; + else { + unsigned int idx; + + /* Draw a packet at random from queue and compare flow */ + if (choke_match_random(q, skb, &idx)) { + q->stats.matched++; + choke_drop_by_idx(sch, idx); + goto congestion_drop; + } + + /* Queue is large, always mark/drop */ + if (p->qavg > p->qth_max) { + p->qcount = -1; + + sch->qstats.overlimits++; + if (use_harddrop(q) || !use_ecn(q) || + !INET_ECN_set_ce(skb)) { + q->stats.forced_drop++; + goto congestion_drop; + } + + q->stats.forced_mark++; + } else if (++p->qcount) { + if (red_mark_probability(p, p->qavg)) { + p->qcount = 0; + p->qR = red_random(p); + + sch->qstats.overlimits++; + if (!use_ecn(q) || !INET_ECN_set_ce(skb)) { + q->stats.prob_drop++; + goto congestion_drop; + } + + q->stats.prob_mark++; + } + } else + p->qR = red_random(p); + } + + /* Admit new packet */ + if (sch->q.qlen < q->limit) { + q->tab[q->tail] = skb; + q->tail = (q->tail + 1) & q->tab_mask; + ++sch->q.qlen; + sch->qstats.backlog += qdisc_pkt_len(skb); + return NET_XMIT_SUCCESS; + } + + q->stats.pdrop++; + sch->qstats.drops++; + kfree_skb(skb); + return NET_XMIT_DROP; + + congestion_drop: + qdisc_drop(skb, sch); + return NET_XMIT_CN; + + other_drop: + if (ret & __NET_XMIT_BYPASS) + sch->qstats.drops++; + kfree_skb(skb); + return ret; +} + +static struct sk_buff *choke_dequeue(struct Qdisc *sch) +{ + struct choke_sched_data *q = qdisc_priv(sch); + struct sk_buff *skb; + + if (q->head == q->tail) { + if (!red_is_idling(&q->parms)) + red_start_of_idle_period(&q->parms); + return NULL; + } + + skb = q->tab[q->head]; + q->tab[q->head] = NULL; + choke_zap_head_holes(q); + --sch->q.qlen; + sch->qstats.backlog -= qdisc_pkt_len(skb); + qdisc_bstats_update(sch, skb); + + return skb; +} + +static unsigned int choke_drop(struct Qdisc *sch) +{ + struct choke_sched_data *q = qdisc_priv(sch); + unsigned int len; + + len = qdisc_queue_drop(sch); + if (len > 0) + q->stats.other++; + else { + if (!red_is_idling(&q->parms)) + red_start_of_idle_period(&q->parms); + } + + return len; +} + +static void choke_reset(struct Qdisc *sch) +{ + struct choke_sched_data *q = qdisc_priv(sch); + + red_restart(&q->parms); +} + +static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = { + [TCA_CHOKE_PARMS] = { .len = sizeof(struct tc_red_qopt) }, + [TCA_CHOKE_STAB] = { .len = RED_STAB_SIZE }, +}; + + +static void choke_free(void *addr) +{ + if (addr) { + if (is_vmalloc_addr(addr)) + vfree(addr); + else + kfree(addr); + } +} + +static int choke_change(struct Qdisc *sch, struct nlattr *opt) +{ + struct choke_sched_data *q = qdisc_priv(sch); + struct nlattr *tb[TCA_CHOKE_MAX + 1]; + const struct tc_red_qopt *ctl; + int err; + struct sk_buff **old = NULL; + unsigned int mask; + + if (opt == NULL) + return -EINVAL; + + err = nla_parse_nested(tb, TCA_CHOKE_MAX, opt, choke_policy); + if (err < 0) + return err; + + if (tb[TCA_CHOKE_PARMS] == NULL || + tb[TCA_CHOKE_STAB] == NULL) + return -EINVAL; + + ctl = nla_data(tb[TCA_CHOKE_PARMS]); + + if (ctl->limit > CHOKE_MAX_QUEUE) + return -EINVAL; + + mask = roundup_pow_of_two(ctl->limit + 1) - 1; + if (mask != q->tab_mask) { + struct sk_buff **ntab; + + ntab = kcalloc(mask + 1, sizeof(struct sk_buff *), GFP_KERNEL); + if (!ntab) + ntab = vzalloc((mask + 1) * sizeof(struct sk_buff *)); + if (!ntab) + return -ENOMEM; + + sch_tree_lock(sch); + old = q->tab; + if (old) { + unsigned int oqlen = sch->q.qlen, tail = 0; + + while (q->head != q->tail) { + struct sk_buff *skb = q->tab[q->head]; + + q->head = (q->head + 1) & q->tab_mask; + if (!skb) + continue; + if (tail < mask) { + ntab[tail++] = skb; + continue; + } + sch->qstats.backlog -= qdisc_pkt_len(skb); + --sch->q.qlen; + qdisc_drop(skb, sch); + } + qdisc_tree_decrease_qlen(sch, oqlen - sch->q.qlen); + q->head = 0; + q->tail = tail; + } + + q->tab_mask = mask; + q->tab = ntab; + } else + sch_tree_lock(sch); + + q->flags = ctl->flags; + q->limit = ctl->limit; + + red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog, + ctl->Plog, ctl->Scell_log, + nla_data(tb[TCA_CHOKE_STAB])); + + if (q->head == q->tail) + red_end_of_idle_period(&q->parms); + + sch_tree_unlock(sch); + choke_free(old); + return 0; +} + +static int choke_init(struct Qdisc *sch, struct nlattr *opt) +{ + return choke_change(sch, opt); +} + +static int choke_dump(struct Qdisc *sch, struct sk_buff *skb) +{ + struct choke_sched_data *q = qdisc_priv(sch); + struct nlattr *opts = NULL; + struct tc_red_qopt opt = { + .limit = q->limit, + .flags = q->flags, + .qth_min = q->parms.qth_min >> q->parms.Wlog, + .qth_max = q->parms.qth_max >> q->parms.Wlog, + .Wlog = q->parms.Wlog, + .Plog = q->parms.Plog, + .Scell_log = q->parms.Scell_log, + }; + + opts = nla_nest_start(skb, TCA_OPTIONS); + if (opts == NULL) + goto nla_put_failure; + + NLA_PUT(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt); + return nla_nest_end(skb, opts); + +nla_put_failure: + nla_nest_cancel(skb, opts); + return -EMSGSIZE; +} + +static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d) +{ + struct choke_sched_data *q = qdisc_priv(sch); + struct tc_choke_xstats st = { + .early = q->stats.prob_drop + q->stats.forced_drop, + .marked = q->stats.prob_mark + q->stats.forced_mark, + .pdrop = q->stats.pdrop, + .other = q->stats.other, + .matched = q->stats.matched, + }; + + return gnet_stats_copy_app(d, &st, sizeof(st)); +} + +static void choke_destroy(struct Qdisc *sch) +{ + struct choke_sched_data *q = qdisc_priv(sch); + + tcf_destroy_chain(&q->filter_list); + choke_free(q->tab); +} + +static struct Qdisc *choke_leaf(struct Qdisc *sch, unsigned long arg) +{ + return NULL; +} + +static unsigned long choke_get(struct Qdisc *sch, u32 classid) +{ + return 0; +} + +static void choke_put(struct Qdisc *q, unsigned long cl) +{ +} + +static unsigned long choke_bind(struct Qdisc *sch, unsigned long parent, + u32 classid) +{ + return 0; +} + +static struct tcf_proto **choke_find_tcf(struct Qdisc *sch, unsigned long cl) +{ + struct choke_sched_data *q = qdisc_priv(sch); + + if (cl) + return NULL; + return &q->filter_list; +} + +static int choke_dump_class(struct Qdisc *sch, unsigned long cl, + struct sk_buff *skb, struct tcmsg *tcm) +{ + tcm->tcm_handle |= TC_H_MIN(cl); + return 0; +} + +static void choke_walk(struct Qdisc *sch, struct qdisc_walker *arg) +{ + if (!arg->stop) { + if (arg->fn(sch, 1, arg) < 0) { + arg->stop = 1; + return; + } + arg->count++; + } +} + +static const struct Qdisc_class_ops choke_class_ops = { + .leaf = choke_leaf, + .get = choke_get, + .put = choke_put, + .tcf_chain = choke_find_tcf, + .bind_tcf = choke_bind, + .unbind_tcf = choke_put, + .dump = choke_dump_class, + .walk = choke_walk, +}; + +static struct sk_buff *choke_peek_head(struct Qdisc *sch) +{ + struct choke_sched_data *q = qdisc_priv(sch); + + return (q->head != q->tail) ? q->tab[q->head] : NULL; +} + +static struct Qdisc_ops choke_qdisc_ops __read_mostly = { + .id = "choke", + .priv_size = sizeof(struct choke_sched_data), + + .enqueue = choke_enqueue, + .dequeue = choke_dequeue, + .peek = choke_peek_head, + .drop = choke_drop, + .init = choke_init, + .destroy = choke_destroy, + .reset = choke_reset, + .change = choke_change, + .dump = choke_dump, + .dump_stats = choke_dump_stats, + .owner = THIS_MODULE, +}; + +static int __init choke_module_init(void) +{ + return register_qdisc(&choke_qdisc_ops); +} + +static void __exit choke_module_exit(void) +{ + unregister_qdisc(&choke_qdisc_ops); +} + +module_init(choke_module_init) +module_exit(choke_module_exit) + +MODULE_LICENSE("GPL"); diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index de55e642eafc..6b7fe4a84f13 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -376,7 +376,6 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch) } bstats_update(&cl->bstats, skb); - qdisc_bstats_update(sch, skb); sch->q.qlen++; return err; @@ -403,6 +402,7 @@ static struct sk_buff *drr_dequeue(struct Qdisc *sch) skb = qdisc_dequeue_peeked(cl->qdisc); if (cl->qdisc->q.qlen == 0) list_del(&cl->alist); + qdisc_bstats_update(sch, skb); sch->q.qlen--; return skb; } diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 4970d56b4aa7..2c790204d042 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -260,7 +260,6 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch) return err; } - qdisc_bstats_update(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; @@ -283,6 +282,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) if (skb == NULL) return NULL; + qdisc_bstats_update(sch, skb); sch->q.qlen--; index = skb->tc_index & (p->indices - 1); diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index b3075f8a196b..be33f9ddf9dd 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -45,17 +45,14 @@ static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch) static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch) { - struct sk_buff *skb_head; struct fifo_sched_data *q = qdisc_priv(sch); if (likely(skb_queue_len(&sch->q) < q->limit)) return qdisc_enqueue_tail(skb, sch); /* queue full, remove one skb to fulfill the limit */ - skb_head = qdisc_dequeue_head(sch); + __qdisc_queue_drop_head(sch, &sch->q); sch->qstats.drops++; - kfree_skb(skb_head); - qdisc_enqueue_tail(skb, sch); return NET_XMIT_CN; @@ -64,11 +61,13 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch) static int fifo_init(struct Qdisc *sch, struct nlattr *opt) { struct fifo_sched_data *q = qdisc_priv(sch); + bool bypass; + bool is_bfifo = sch->ops == &bfifo_qdisc_ops; if (opt == NULL) { u32 limit = qdisc_dev(sch)->tx_queue_len ? : 1; - if (sch->ops == &bfifo_qdisc_ops) + if (is_bfifo) limit *= psched_mtu(qdisc_dev(sch)); q->limit = limit; @@ -81,6 +80,15 @@ static int fifo_init(struct Qdisc *sch, struct nlattr *opt) q->limit = ctl->limit; } + if (is_bfifo) + bypass = q->limit >= psched_mtu(qdisc_dev(sch)); + else + bypass = q->limit >= 1; + + if (bypass) + sch->flags |= TCQ_F_CAN_BYPASS; + else + sch->flags &= ~TCQ_F_CAN_BYPASS; return 0; } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 2f1cb62130da..0da09d508737 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -527,6 +527,8 @@ static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt) for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) skb_queue_head_init(band2list(priv, prio)); + /* Can by-pass the queue discipline */ + qdisc->flags |= TCQ_F_CAN_BYPASS; return 0; } @@ -632,7 +634,7 @@ void qdisc_destroy(struct Qdisc *qdisc) #ifdef CONFIG_NET_SCHED qdisc_list_del(qdisc); - qdisc_put_stab(qdisc->stab); + qdisc_put_stab(rtnl_dereference(qdisc->stab)); #endif gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); if (ops->reset) @@ -691,9 +693,6 @@ static void attach_one_default_qdisc(struct net_device *dev, netdev_info(dev, "activation failed\n"); return; } - - /* Can by-pass the queue discipline for default qdisc */ - qdisc->flags |= TCQ_F_CAN_BYPASS; } dev_queue->qdisc_sleeping = qdisc; } diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index dea4009615f9..6488e6425652 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1598,7 +1598,6 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch) set_active(cl, qdisc_pkt_len(skb)); bstats_update(&cl->bstats, skb); - qdisc_bstats_update(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; @@ -1664,7 +1663,8 @@ hfsc_dequeue(struct Qdisc *sch) set_passive(cl); } - sch->flags &= ~TCQ_F_THROTTLED; + qdisc_unthrottled(sch); + qdisc_bstats_update(sch, skb); sch->q.qlen--; return skb; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 3e86fd3a1b78..e1429a85091f 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -581,7 +581,6 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) } sch->q.qlen++; - qdisc_bstats_update(sch, skb); return NET_XMIT_SUCCESS; } @@ -856,7 +855,7 @@ next: static struct sk_buff *htb_dequeue(struct Qdisc *sch) { - struct sk_buff *skb = NULL; + struct sk_buff *skb; struct htb_sched *q = qdisc_priv(sch); int level; psched_time_t next_event; @@ -865,7 +864,9 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) /* try to dequeue direct packets as high prio (!) to minimize cpu work */ skb = __skb_dequeue(&q->direct_queue); if (skb != NULL) { - sch->flags &= ~TCQ_F_THROTTLED; +ok: + qdisc_bstats_update(sch, skb); + qdisc_unthrottled(sch); sch->q.qlen--; return skb; } @@ -899,11 +900,8 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) m |= 1 << prio; skb = htb_dequeue_tree(q, prio, level); - if (likely(skb != NULL)) { - sch->q.qlen--; - sch->flags &= ~TCQ_F_THROTTLED; - goto fin; - } + if (likely(skb != NULL)) + goto ok; } } sch->qstats.overlimits++; diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index ecc302f4d2a1..ec5cbc848963 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -61,7 +61,6 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt) TC_H_MIN(ntx + 1))); if (qdisc == NULL) goto err; - qdisc->flags |= TCQ_F_CAN_BYPASS; priv->qdiscs[ntx] = qdisc; } diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 8620c65f480a..effd4ee0e880 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -130,7 +130,6 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) err = -ENOMEM; goto err; } - qdisc->flags |= TCQ_F_CAN_BYPASS; priv->qdiscs[i] = qdisc; } @@ -216,7 +215,7 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) struct net_device *dev = qdisc_dev(sch); struct mqprio_sched *priv = qdisc_priv(sch); unsigned char *b = skb_tail_pointer(skb); - struct tc_mqprio_qopt opt; + struct tc_mqprio_qopt opt = { 0 }; struct Qdisc *qdisc; unsigned int i; diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 820f2a7ca14d..edc1950e0e77 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -83,7 +83,6 @@ multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch) ret = qdisc_enqueue(skb, qdisc); if (ret == NET_XMIT_SUCCESS) { - qdisc_bstats_update(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; } @@ -112,6 +111,7 @@ static struct sk_buff *multiq_dequeue(struct Qdisc *sch) qdisc = q->queues[q->curband]; skb = qdisc->dequeue(qdisc); if (skb) { + qdisc_bstats_update(sch, skb); sch->q.qlen--; return skb; } diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index c2bbbe60d544..64f0d3293b49 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -240,7 +240,6 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (likely(ret == NET_XMIT_SUCCESS)) { sch->q.qlen++; - qdisc_bstats_update(sch, skb); } else if (net_xmit_drop_count(ret)) { sch->qstats.drops++; } @@ -266,7 +265,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) struct netem_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; - if (sch->flags & TCQ_F_THROTTLED) + if (qdisc_is_throttled(sch)) return NULL; skb = q->qdisc->ops->peek(q->qdisc); @@ -289,6 +288,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) skb->tstamp.tv64 = 0; #endif pr_debug("netem_dequeue: return skb=%p\n", skb); + qdisc_bstats_update(sch, skb); sch->q.qlen--; return skb; } @@ -476,7 +476,6 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) __skb_queue_after(list, skb, nskb); sch->qstats.backlog += qdisc_pkt_len(nskb); - qdisc_bstats_update(sch, nskb); return NET_XMIT_SUCCESS; } diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 3bea31e101b5..2a318f2dc3e5 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -83,7 +83,6 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch) ret = qdisc_enqueue(skb, qdisc); if (ret == NET_XMIT_SUCCESS) { - qdisc_bstats_update(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; } @@ -115,6 +114,7 @@ static struct sk_buff *prio_dequeue(struct Qdisc *sch) struct Qdisc *qdisc = q->queues[prio]; struct sk_buff *skb = qdisc->dequeue(qdisc); if (skb) { + qdisc_bstats_update(sch, skb); sch->q.qlen--; return skb; } diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 689157555fa4..6649463da1b6 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -93,7 +93,6 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch) ret = qdisc_enqueue(skb, child); if (likely(ret == NET_XMIT_SUCCESS)) { - qdisc_bstats_update(sch, skb); sch->q.qlen++; } else if (net_xmit_drop_count(ret)) { q->stats.pdrop++; @@ -113,11 +112,13 @@ static struct sk_buff *red_dequeue(struct Qdisc *sch) struct Qdisc *child = q->qdisc; skb = child->dequeue(child); - if (skb) + if (skb) { + qdisc_bstats_update(sch, skb); sch->q.qlen--; - else if (!red_is_idling(&q->parms)) - red_start_of_idle_period(&q->parms); - + } else { + if (!red_is_idling(&q->parms)) + red_start_of_idle_period(&q->parms); + } return skb; } diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 54a36f43a1f1..c2e628dfaacc 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -21,6 +21,7 @@ #include <linux/skbuff.h> #include <linux/jhash.h> #include <linux/slab.h> +#include <linux/vmalloc.h> #include <net/ip.h> #include <net/netlink.h> #include <net/pkt_sched.h> @@ -76,7 +77,8 @@ #define SFQ_DEPTH 128 /* max number of packets per flow */ #define SFQ_SLOTS 128 /* max number of flows */ #define SFQ_EMPTY_SLOT 255 -#define SFQ_HASH_DIVISOR 1024 +#define SFQ_DEFAULT_HASH_DIVISOR 1024 + /* We use 16 bits to store allot, and want to handle packets up to 64K * Scale allot by 8 (1<<3) so that no overflow occurs. */ @@ -112,7 +114,7 @@ struct sfq_sched_data { int perturb_period; unsigned int quantum; /* Allotment per round: MUST BE >= MTU */ int limit; - + unsigned int divisor; /* number of slots in hash table */ /* Variables */ struct tcf_proto *filter_list; struct timer_list perturb_timer; @@ -120,7 +122,7 @@ struct sfq_sched_data { sfq_index cur_depth; /* depth of longest slot */ unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */ struct sfq_slot *tail; /* current slot in round */ - sfq_index ht[SFQ_HASH_DIVISOR]; /* Hash table */ + sfq_index *ht; /* Hash table (divisor slots) */ struct sfq_slot slots[SFQ_SLOTS]; struct sfq_head dep[SFQ_DEPTH]; /* Linked list of slots, indexed by depth */ }; @@ -137,7 +139,7 @@ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index static unsigned int sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1) { - return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1); + return jhash_2words(h, h1, q->perturbation) & (q->divisor - 1); } static unsigned int sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) @@ -201,7 +203,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, if (TC_H_MAJ(skb->priority) == sch->handle && TC_H_MIN(skb->priority) > 0 && - TC_H_MIN(skb->priority) <= SFQ_HASH_DIVISOR) + TC_H_MIN(skb->priority) <= q->divisor) return TC_H_MIN(skb->priority); if (!q->filter_list) @@ -219,7 +221,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, return 0; } #endif - if (TC_H_MIN(res.classid) <= SFQ_HASH_DIVISOR) + if (TC_H_MIN(res.classid) <= q->divisor) return TC_H_MIN(res.classid); } return 0; @@ -400,10 +402,8 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) q->tail = slot; slot->allot = q->scaled_quantum; } - if (++sch->q.qlen <= q->limit) { - qdisc_bstats_update(sch, skb); + if (++sch->q.qlen <= q->limit) return NET_XMIT_SUCCESS; - } sfq_drop(sch); return NET_XMIT_CN; @@ -443,6 +443,7 @@ next_slot: } skb = slot_dequeue_head(slot); sfq_dec(q, a); + qdisc_bstats_update(sch, skb); sch->q.qlen--; sch->qstats.backlog -= qdisc_pkt_len(skb); @@ -490,13 +491,18 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) if (opt->nla_len < nla_attr_size(sizeof(*ctl))) return -EINVAL; + if (ctl->divisor && + (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536)) + return -EINVAL; + sch_tree_lock(sch); q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch)); q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); q->perturb_period = ctl->perturb_period * HZ; if (ctl->limit) q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1); - + if (ctl->divisor) + q->divisor = ctl->divisor; qlen = sch->q.qlen; while (sch->q.qlen > q->limit) sfq_drop(sch); @@ -514,15 +520,13 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) static int sfq_init(struct Qdisc *sch, struct nlattr *opt) { struct sfq_sched_data *q = qdisc_priv(sch); + size_t sz; int i; q->perturb_timer.function = sfq_perturbation; q->perturb_timer.data = (unsigned long)sch; init_timer_deferrable(&q->perturb_timer); - for (i = 0; i < SFQ_HASH_DIVISOR; i++) - q->ht[i] = SFQ_EMPTY_SLOT; - for (i = 0; i < SFQ_DEPTH; i++) { q->dep[i].next = i + SFQ_SLOTS; q->dep[i].prev = i + SFQ_SLOTS; @@ -531,6 +535,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) q->limit = SFQ_DEPTH - 1; q->cur_depth = 0; q->tail = NULL; + q->divisor = SFQ_DEFAULT_HASH_DIVISOR; if (opt == NULL) { q->quantum = psched_mtu(qdisc_dev(sch)); q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); @@ -542,10 +547,23 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) return err; } + sz = sizeof(q->ht[0]) * q->divisor; + q->ht = kmalloc(sz, GFP_KERNEL); + if (!q->ht && sz > PAGE_SIZE) + q->ht = vmalloc(sz); + if (!q->ht) + return -ENOMEM; + for (i = 0; i < q->divisor; i++) + q->ht[i] = SFQ_EMPTY_SLOT; + for (i = 0; i < SFQ_SLOTS; i++) { slot_queue_init(&q->slots[i]); sfq_link(q, i); } + if (q->limit >= 1) + sch->flags |= TCQ_F_CAN_BYPASS; + else + sch->flags &= ~TCQ_F_CAN_BYPASS; return 0; } @@ -556,6 +574,10 @@ static void sfq_destroy(struct Qdisc *sch) tcf_destroy_chain(&q->filter_list); q->perturb_period = 0; del_timer_sync(&q->perturb_timer); + if (is_vmalloc_addr(q->ht)) + vfree(q->ht); + else + kfree(q->ht); } static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) @@ -568,7 +590,7 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) opt.perturb_period = q->perturb_period / HZ; opt.limit = q->limit; - opt.divisor = SFQ_HASH_DIVISOR; + opt.divisor = q->divisor; opt.flows = q->limit; NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); @@ -593,6 +615,8 @@ static unsigned long sfq_get(struct Qdisc *sch, u32 classid) static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent, u32 classid) { + /* we cannot bypass queue discipline anymore */ + sch->flags &= ~TCQ_F_CAN_BYPASS; return 0; } @@ -646,7 +670,7 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg) if (arg->stop) return; - for (i = 0; i < SFQ_HASH_DIVISOR; i++) { + for (i = 0; i < q->divisor; i++) { if (q->ht[i] == SFQ_EMPTY_SLOT || arg->count < arg->skip) { arg->count++; diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 475edfb69c22..1dcfb5223a86 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -133,7 +133,6 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch) } sch->q.qlen++; - qdisc_bstats_update(sch, skb); return NET_XMIT_SUCCESS; } @@ -185,7 +184,8 @@ static struct sk_buff *tbf_dequeue(struct Qdisc *sch) q->tokens = toks; q->ptokens = ptoks; sch->q.qlen--; - sch->flags &= ~TCQ_F_THROTTLED; + qdisc_unthrottled(sch); + qdisc_bstats_update(sch, skb); return skb; } diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 64c071ded0f4..45cd30098e34 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -85,7 +85,6 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (q->q.qlen < dev->tx_queue_len) { __skb_queue_tail(&q->q, skb); - qdisc_bstats_update(sch, skb); return NET_XMIT_SUCCESS; } @@ -109,6 +108,8 @@ teql_dequeue(struct Qdisc *sch) dat->m->slaves = sch; netif_wake_queue(m); } + } else { + qdisc_bstats_update(sch, skb); } sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen; return skb; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index a09b0dd25f50..8e02550ff3e8 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3428,7 +3428,7 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_peer_addr_params(sk, optval, optlen); break; - case SCTP_DELAYED_ACK: + case SCTP_DELAYED_SACK: retval = sctp_setsockopt_delayed_ack(sk, optval, optlen); break; case SCTP_PARTIAL_DELIVERY_POINT: @@ -5333,7 +5333,7 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname, retval = sctp_getsockopt_peer_addr_params(sk, len, optval, optlen); break; - case SCTP_DELAYED_ACK: + case SCTP_DELAYED_SACK: retval = sctp_getsockopt_delayed_ack(sk, len, optval, optlen); break; diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index d0ee29063e5d..1f1ef70f34f2 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -95,7 +95,7 @@ config CFG80211_DEBUGFS If unsure, say N. config CFG80211_INTERNAL_REGDB - bool "use statically compiled regulatory rules database" if EMBEDDED + bool "use statically compiled regulatory rules database" if EXPERT default n depends on CFG80211 ---help--- diff --git a/net/wireless/core.c b/net/wireless/core.c index e9a5f8ca4c27..fe01de29bfe8 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -718,13 +718,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, wdev->ps = false; /* allow mac80211 to determine the timeout */ wdev->ps_timeout = -1; - if (rdev->ops->set_power_mgmt) - if (rdev->ops->set_power_mgmt(wdev->wiphy, dev, - wdev->ps, - wdev->ps_timeout)) { - /* assume this means it's off */ - wdev->ps = false; - } if (!dev->ethtool_ops) dev->ethtool_ops = &cfg80211_ethtool_ops; @@ -813,6 +806,19 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, rdev->opencount++; mutex_unlock(&rdev->devlist_mtx); cfg80211_unlock_rdev(rdev); + + /* + * Configure power management to the driver here so that its + * correctly set also after interface type changes etc. + */ + if (wdev->iftype == NL80211_IFTYPE_STATION && + rdev->ops->set_power_mgmt) + if (rdev->ops->set_power_mgmt(wdev->wiphy, dev, + wdev->ps, + wdev->ps_timeout)) { + /* assume this means it's off */ + wdev->ps = false; + } break; case NETDEV_UNREGISTER: /* diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 9b62710891a2..864ddfbeff2f 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2718,7 +2718,7 @@ static int nl80211_get_mesh_config(struct sk_buff *skb, hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, NL80211_CMD_GET_MESH_CONFIG); if (!hdr) - goto nla_put_failure; + goto out; pinfoattr = nla_nest_start(msg, NL80211_ATTR_MESH_CONFIG); if (!pinfoattr) goto nla_put_failure; @@ -2759,6 +2759,7 @@ static int nl80211_get_mesh_config(struct sk_buff *skb, nla_put_failure: genlmsg_cancel(msg, hdr); + out: nlmsg_free(msg); return -ENOBUFS; } @@ -2954,7 +2955,7 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, NL80211_CMD_GET_REG); if (!hdr) - goto nla_put_failure; + goto put_failure; NLA_PUT_STRING(msg, NL80211_ATTR_REG_ALPHA2, cfg80211_regdomain->alpha2); @@ -3001,6 +3002,7 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) nla_put_failure: genlmsg_cancel(msg, hdr); +put_failure: nlmsg_free(msg); err = -EMSGSIZE; out: diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 37693b6ef23a..c565689f0b9f 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1801,9 +1801,9 @@ void regulatory_hint_disconnect(void) static bool freq_is_chan_12_13_14(u16 freq) { - if (freq == ieee80211_channel_to_frequency(12) || - freq == ieee80211_channel_to_frequency(13) || - freq == ieee80211_channel_to_frequency(14)) + if (freq == ieee80211_channel_to_frequency(12, IEEE80211_BAND_2GHZ) || + freq == ieee80211_channel_to_frequency(13, IEEE80211_BAND_2GHZ) || + freq == ieee80211_channel_to_frequency(14, IEEE80211_BAND_2GHZ)) return true; return false; } diff --git a/net/wireless/util.c b/net/wireless/util.c index 7620ae2fcf18..6a750bc6bcfe 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -29,29 +29,37 @@ ieee80211_get_response_rate(struct ieee80211_supported_band *sband, } EXPORT_SYMBOL(ieee80211_get_response_rate); -int ieee80211_channel_to_frequency(int chan) +int ieee80211_channel_to_frequency(int chan, enum ieee80211_band band) { - if (chan < 14) - return 2407 + chan * 5; - - if (chan == 14) - return 2484; - - /* FIXME: 802.11j 17.3.8.3.2 */ - return (chan + 1000) * 5; + /* see 802.11 17.3.8.3.2 and Annex J + * there are overlapping channel numbers in 5GHz and 2GHz bands */ + if (band == IEEE80211_BAND_5GHZ) { + if (chan >= 182 && chan <= 196) + return 4000 + chan * 5; + else + return 5000 + chan * 5; + } else { /* IEEE80211_BAND_2GHZ */ + if (chan == 14) + return 2484; + else if (chan < 14) + return 2407 + chan * 5; + else + return 0; /* not supported */ + } } EXPORT_SYMBOL(ieee80211_channel_to_frequency); int ieee80211_frequency_to_channel(int freq) { + /* see 802.11 17.3.8.3.2 and Annex J */ if (freq == 2484) return 14; - - if (freq < 2484) + else if (freq < 2484) return (freq - 2407) / 5; - - /* FIXME: 802.11j 17.3.8.3.2 */ - return freq/5 - 1000; + else if (freq >= 4910 && freq <= 4980) + return (freq - 4000) / 5; + else + return (freq - 5000) / 5; } EXPORT_SYMBOL(ieee80211_frequency_to_channel); @@ -159,12 +167,15 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, /* * Disallow pairwise keys with non-zero index unless it's WEP - * (because current deployments use pairwise WEP keys with - * non-zero indizes but 802.11i clearly specifies to use zero) + * or a vendor specific cipher (because current deployments use + * pairwise WEP keys with non-zero indices and for vendor specific + * ciphers this should be validated in the driver or hardware level + * - but 802.11i clearly specifies to use zero) */ if (pairwise && key_idx && - params->cipher != WLAN_CIPHER_SUITE_WEP40 && - params->cipher != WLAN_CIPHER_SUITE_WEP104) + ((params->cipher == WLAN_CIPHER_SUITE_TKIP) || + (params->cipher == WLAN_CIPHER_SUITE_CCMP) || + (params->cipher == WLAN_CIPHER_SUITE_AES_CMAC))) return -EINVAL; switch (params->cipher) { diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 3e5dbd4e4cd5..7f1f4ec49041 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -267,9 +267,12 @@ int cfg80211_wext_freq(struct wiphy *wiphy, struct iw_freq *freq) * -EINVAL for impossible things. */ if (freq->e == 0) { + enum ieee80211_band band = IEEE80211_BAND_2GHZ; if (freq->m < 0) return 0; - return ieee80211_channel_to_frequency(freq->m); + if (freq->m > 14) + band = IEEE80211_BAND_5GHZ; + return ieee80211_channel_to_frequency(freq->m, band); } else { int i, div = 1000000; for (i = 0; i < freq->e; i++) diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c index 55187c8f6420..406207515b5e 100644 --- a/net/x25/x25_facilities.c +++ b/net/x25/x25_facilities.c @@ -27,9 +27,19 @@ #include <net/sock.h> #include <net/x25.h> -/* - * Parse a set of facilities into the facilities structures. Unrecognised - * facilities are written to the debug log file. +/** + * x25_parse_facilities - Parse facilities from skb into the facilities structs + * + * @skb: sk_buff to parse + * @facilities: Regular facilites, updated as facilities are found + * @dte_facs: ITU DTE facilities, updated as DTE facilities are found + * @vc_fac_mask: mask is updated with all facilities found + * + * Return codes: + * -1 - Parsing error, caller should drop call and clean up + * 0 - Parse OK, this skb has no facilities + * >0 - Parse OK, returns the length of the facilities header + * */ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities, struct x25_dte_facilities *dte_facs, unsigned long *vc_fac_mask) @@ -62,7 +72,7 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities, switch (*p & X25_FAC_CLASS_MASK) { case X25_FAC_CLASS_A: if (len < 2) - return 0; + return -1; switch (*p) { case X25_FAC_REVERSE: if((p[1] & 0x81) == 0x81) { @@ -107,7 +117,7 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities, break; case X25_FAC_CLASS_B: if (len < 3) - return 0; + return -1; switch (*p) { case X25_FAC_PACKET_SIZE: facilities->pacsize_in = p[1]; @@ -130,7 +140,7 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities, break; case X25_FAC_CLASS_C: if (len < 4) - return 0; + return -1; printk(KERN_DEBUG "X.25: unknown facility %02X, " "values %02X, %02X, %02X\n", p[0], p[1], p[2], p[3]); @@ -139,18 +149,18 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities, break; case X25_FAC_CLASS_D: if (len < p[1] + 2) - return 0; + return -1; switch (*p) { case X25_FAC_CALLING_AE: if (p[1] > X25_MAX_DTE_FACIL_LEN || p[1] <= 1) - return 0; + return -1; dte_facs->calling_len = p[2]; memcpy(dte_facs->calling_ae, &p[3], p[1] - 1); *vc_fac_mask |= X25_MASK_CALLING_AE; break; case X25_FAC_CALLED_AE: if (p[1] > X25_MAX_DTE_FACIL_LEN || p[1] <= 1) - return 0; + return -1; dte_facs->called_len = p[2]; memcpy(dte_facs->called_ae, &p[3], p[1] - 1); *vc_fac_mask |= X25_MASK_CALLED_AE; diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c index f729f022be69..15de65f04719 100644 --- a/net/x25/x25_in.c +++ b/net/x25/x25_in.c @@ -91,10 +91,10 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp { struct x25_address source_addr, dest_addr; int len; + struct x25_sock *x25 = x25_sk(sk); switch (frametype) { case X25_CALL_ACCEPTED: { - struct x25_sock *x25 = x25_sk(sk); x25_stop_timer(sk); x25->condition = 0x00; @@ -113,14 +113,16 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp &dest_addr); if (len > 0) skb_pull(skb, len); + else if (len < 0) + goto out_clear; len = x25_parse_facilities(skb, &x25->facilities, &x25->dte_facilities, &x25->vc_facil_mask); if (len > 0) skb_pull(skb, len); - else - return -1; + else if (len < 0) + goto out_clear; /* * Copy any Call User Data. */ @@ -144,6 +146,12 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp } return 0; + +out_clear: + x25_write_internal(sk, X25_CLEAR_REQUEST); + x25->state = X25_STATE_2; + x25_start_t23timer(sk); + return 0; } /* diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index d5e1e0b08890..61291965c5f6 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2189,7 +2189,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if ((type == (XFRM_MSG_GETSA - XFRM_MSG_BASE) || type == (XFRM_MSG_GETPOLICY - XFRM_MSG_BASE)) && - (nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) { + (nlh->nlmsg_flags & NLM_F_DUMP)) { if (link->dump == NULL) return -EINVAL; |