From 6e23ae2a48750bda407a4a58f52a4865d7308bf5 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 19 Nov 2007 18:53:30 -0800 Subject: [NETFILTER]: Introduce NF_INET_ hook values The IPv4 and IPv6 hook values are identical, yet some code tries to figure out the "correct" value by looking at the address family. Introduce NF_INET_* values for both IPv4 and IPv6. The old values are kept in a #ifndef __KERNEL__ section for userspace compatibility. Signed-off-by: Patrick McHardy Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/ip_input.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv4/ip_input.c') diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 168c871fcd79..5b8a7603e606 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -268,7 +268,7 @@ int ip_local_deliver(struct sk_buff *skb) return 0; } - return NF_HOOK(PF_INET, NF_IP_LOCAL_IN, skb, skb->dev, NULL, + return NF_HOOK(PF_INET, NF_INET_LOCAL_IN, skb, skb->dev, NULL, ip_local_deliver_finish); } @@ -442,7 +442,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, /* Remove any debris in the socket control block */ memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); - return NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, dev, NULL, + return NF_HOOK(PF_INET, NF_INET_PRE_ROUTING, skb, dev, NULL, ip_rcv_finish); inhdr_error: -- cgit v1.2.3 From 8dbde28d9711475adfe0e9c88505e38743cdc2a7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 16 Nov 2007 03:32:10 -0800 Subject: [NET]: NET_CLS_ROUTE : convert ip_rt_acct to per_cpu variables ip_rt_acct needs 4096 bytes per cpu to perform some accounting. It is actually allocated as a single huge array [4096*NR_CPUS] (rounded up to a power of two) Converting it to a per cpu variable is wanted to : - Save space on machines were num_possible_cpus() < NR_CPUS - Better NUMA placement (each cpu gets memory on its node) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_input.c | 2 +- net/ipv4/route.c | 15 +++------------ 2 files changed, 4 insertions(+), 13 deletions(-) (limited to 'net/ipv4/ip_input.c') diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 5b8a7603e606..4068e178d747 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -347,7 +347,7 @@ static int ip_rcv_finish(struct sk_buff *skb) #ifdef CONFIG_NET_CLS_ROUTE if (unlikely(skb->dst->tclassid)) { - struct ip_rt_acct *st = ip_rt_acct + 256*smp_processor_id(); + struct ip_rt_acct *st = per_cpu_ptr(ip_rt_acct, smp_processor_id()); u32 idx = skb->dst->tclassid; st[idx&0xFF].o_packets++; st[idx&0xFF].o_bytes+=skb->len; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 94ef788a2ac6..a21021bf1409 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2858,12 +2858,10 @@ ctl_table ipv4_route_table[] = { #endif #ifdef CONFIG_NET_CLS_ROUTE -struct ip_rt_acct *ip_rt_acct; - -/* This code sucks. But you should have seen it before! --RR */ +struct ip_rt_acct *ip_rt_acct __read_mostly; /* IP route accounting ptr for this logical cpu number. */ -#define IP_RT_ACCT_CPU(i) (ip_rt_acct + i * 256) +#define IP_RT_ACCT_CPU(cpu) (per_cpu_ptr(ip_rt_acct, cpu)) #ifdef CONFIG_PROC_FS static int ip_rt_acct_read(char *buffer, char **start, off_t offset, @@ -2923,16 +2921,9 @@ int __init ip_rt_init(void) (jiffies ^ (jiffies >> 7))); #ifdef CONFIG_NET_CLS_ROUTE - { - int order; - for (order = 0; - (PAGE_SIZE << order) < 256 * sizeof(struct ip_rt_acct) * NR_CPUS; order++) - /* NOTHING */; - ip_rt_acct = (struct ip_rt_acct *)__get_free_pages(GFP_KERNEL, order); + ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct)); if (!ip_rt_acct) panic("IP: failed to allocate ip_rt_acct\n"); - memset(ip_rt_acct, 0, PAGE_SIZE << order); - } #endif ipv4_dst_ops.kmem_cachep = -- cgit v1.2.3 From 7bc54c90307b4bc3d7fb2ffd6ad8fbda0671a45e Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 19 Nov 2007 22:35:07 -0800 Subject: [IPv4] RAW: Compact the API for the kernel The raw sockets functions are explicitly used from inside the kernel in two places: 1. in ip_local_deliver_finish to intercept skb-s 2. in icmp_error For this purposes many functions and even data structures, that are naturally internal for raw protocol, are exported. Compact the API to two functions and hide all the other (including hash table and rwlock) inside the net/ipv4/raw.c Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/net/raw.h | 19 +++---------------- net/ipv4/icmp.c | 15 +-------------- net/ipv4/ip_input.c | 16 ++++------------ net/ipv4/raw.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++----- 4 files changed, 56 insertions(+), 47 deletions(-) (limited to 'net/ipv4/ip_input.c') diff --git a/include/net/raw.h b/include/net/raw.h index e4af59781949..7fc3c770f170 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -22,23 +22,10 @@ extern struct proto raw_prot; -extern void raw_err(struct sock *, struct sk_buff *, u32 info); -extern int raw_rcv(struct sock *, struct sk_buff *); - -/* Note: v4 ICMP wants to get at this stuff, if you change the - * hashing mechanism, make sure you update icmp.c as well. - */ -#define RAWV4_HTABLE_SIZE MAX_INET_PROTOS -extern struct hlist_head raw_v4_htable[RAWV4_HTABLE_SIZE]; - -extern rwlock_t raw_v4_lock; +void raw_icmp_error(struct sk_buff *, int, u32); +int raw_local_deliver(struct sk_buff *, int); - -extern struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num, - __be32 raddr, __be32 laddr, - int dif); - -extern int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash); +extern int raw_rcv(struct sock *, struct sk_buff *); #ifdef CONFIG_PROC_FS extern int raw_proc_init(void); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 82baea026484..13d74598d3e4 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -603,7 +603,6 @@ static void icmp_unreach(struct sk_buff *skb) struct icmphdr *icmph; int hash, protocol; struct net_protocol *ipprot; - struct sock *raw_sk; u32 info = 0; /* @@ -697,21 +696,9 @@ static void icmp_unreach(struct sk_buff *skb) /* * Deliver ICMP message to raw sockets. Pretty useless feature? */ + raw_icmp_error(skb, protocol, info); - /* Note: See raw.c and net/raw.h, RAWV4_HTABLE_SIZE==MAX_INET_PROTOS */ hash = protocol & (MAX_INET_PROTOS - 1); - read_lock(&raw_v4_lock); - if ((raw_sk = sk_head(&raw_v4_htable[hash])) != NULL) { - while ((raw_sk = __raw_v4_lookup(raw_sk, protocol, iph->daddr, - iph->saddr, - skb->dev->ifindex)) != NULL) { - raw_err(raw_sk, skb, info); - raw_sk = sk_next(raw_sk); - iph = (struct iphdr *)skb->data; - } - } - read_unlock(&raw_v4_lock); - rcu_read_lock(); ipprot = rcu_dereference(inet_protos[hash]); if (ipprot && ipprot->err_handler) diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 4068e178d747..65631391d479 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -204,22 +204,14 @@ static int ip_local_deliver_finish(struct sk_buff *skb) rcu_read_lock(); { - /* Note: See raw.c and net/raw.h, RAWV4_HTABLE_SIZE==MAX_INET_PROTOS */ int protocol = ip_hdr(skb)->protocol; - int hash; - struct sock *raw_sk; + int hash, raw; struct net_protocol *ipprot; resubmit: - hash = protocol & (MAX_INET_PROTOS - 1); - raw_sk = sk_head(&raw_v4_htable[hash]); - - /* If there maybe a raw socket we must check - if not we - * don't care less - */ - if (raw_sk && !raw_v4_input(skb, ip_hdr(skb), hash)) - raw_sk = NULL; + raw = raw_local_deliver(skb, protocol); + hash = protocol & (MAX_INET_PROTOS - 1); if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) { int ret; @@ -237,7 +229,7 @@ static int ip_local_deliver_finish(struct sk_buff *skb) } IP_INC_STATS_BH(IPSTATS_MIB_INDELIVERS); } else { - if (!raw_sk) { + if (!raw) { if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { IP_INC_STATS_BH(IPSTATS_MIB_INUNKNOWNPROTOS); icmp_send(skb, ICMP_DEST_UNREACH, diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index b80987d2fc55..8a506618b912 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -80,8 +80,10 @@ #include #include -struct hlist_head raw_v4_htable[RAWV4_HTABLE_SIZE]; -DEFINE_RWLOCK(raw_v4_lock); +#define RAWV4_HTABLE_SIZE MAX_INET_PROTOS + +static struct hlist_head raw_v4_htable[RAWV4_HTABLE_SIZE]; +static DEFINE_RWLOCK(raw_v4_lock); static void raw_v4_hash(struct sock *sk) { @@ -102,7 +104,7 @@ static void raw_v4_unhash(struct sock *sk) write_unlock_bh(&raw_v4_lock); } -struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num, +static struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num, __be32 raddr, __be32 laddr, int dif) { @@ -150,7 +152,7 @@ static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb) * RFC 1122: SHOULD pass TOS value up to the transport layer. * -> It does. And not only TOS, but all IP header. */ -int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash) +static int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash) { struct sock *sk; struct hlist_head *head; @@ -182,7 +184,25 @@ out: return delivered; } -void raw_err (struct sock *sk, struct sk_buff *skb, u32 info) +int raw_local_deliver(struct sk_buff *skb, int protocol) +{ + int hash; + struct sock *raw_sk; + + hash = protocol & (RAWV4_HTABLE_SIZE - 1); + raw_sk = sk_head(&raw_v4_htable[hash]); + + /* If there maybe a raw socket we must check - if not we + * don't care less + */ + if (raw_sk && !raw_v4_input(skb, ip_hdr(skb), hash)) + raw_sk = NULL; + + return raw_sk != NULL; + +} + +static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info) { struct inet_sock *inet = inet_sk(sk); const int type = icmp_hdr(skb)->type; @@ -236,6 +256,29 @@ void raw_err (struct sock *sk, struct sk_buff *skb, u32 info) } } +void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info) +{ + int hash; + struct sock *raw_sk; + struct iphdr *iph; + + hash = protocol & (RAWV4_HTABLE_SIZE - 1); + + read_lock(&raw_v4_lock); + raw_sk = sk_head(&raw_v4_htable[hash]); + if (raw_sk != NULL) { + iph = (struct iphdr *)skb->data; + while ((raw_sk = __raw_v4_lookup(raw_sk, protocol, iph->daddr, + iph->saddr, + skb->dev->ifindex)) != NULL) { + raw_err(raw_sk, skb, info); + raw_sk = sk_next(raw_sk); + iph = (struct iphdr *)skb->data; + } + } + read_unlock(&raw_v4_lock); +} + static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb) { /* Charge it to the socket. */ -- cgit v1.2.3