diff options
Diffstat (limited to 'net/bridge')
35 files changed, 1475 insertions, 491 deletions
diff --git a/net/bridge/Makefile b/net/bridge/Makefile index 0aefc011b668..40b1ede527ca 100644 --- a/net/bridge/Makefile +++ b/net/bridge/Makefile @@ -6,7 +6,8 @@ obj-$(CONFIG_BRIDGE) += bridge.o bridge-y := br.o br_device.o br_fdb.o br_forward.o br_if.o br_input.o \ br_ioctl.o br_stp.o br_stp_bpdu.o \ - br_stp_if.o br_stp_timer.o br_netlink.o + br_stp_if.o br_stp_timer.o br_netlink.o \ + br_netlink_tunnel.o bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o @@ -18,7 +19,7 @@ obj-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o -bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o +bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o br_vlan_tunnel.o bridge-$(CONFIG_NET_SWITCHDEV) += br_switchdev.o diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 89a687f3c0a3..90f49a194249 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -19,7 +19,7 @@ #include <linux/list.h> #include <linux/netfilter_bridge.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include "br_private.h" #define COMMON_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | \ @@ -79,7 +79,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) br_multicast_flood(mdst, skb, false, true); else br_flood(br, skb, BR_PKT_MULTICAST, false, true); - } else if ((dst = __br_fdb_get(br, dest, vid)) != NULL) { + } else if ((dst = br_fdb_find_rcu(br, dest, vid)) != NULL) { br_forward(dst->dst, skb, false, true); } else { br_flood(br, skb, BR_PKT_UNICAST, false, true); @@ -119,6 +119,15 @@ static int br_dev_init(struct net_device *dev) return err; } +static void br_dev_uninit(struct net_device *dev) +{ + struct net_bridge *br = netdev_priv(dev); + + br_multicast_uninit_stats(br); + br_vlan_flush(br); + free_percpu(br->stats); +} + static int br_dev_open(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); @@ -153,8 +162,8 @@ static int br_dev_stop(struct net_device *dev) return 0; } -static struct rtnl_link_stats64 *br_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *stats) +static void br_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) { struct net_bridge *br = netdev_priv(dev); struct pcpu_sw_netstats tmp, sum = { 0 }; @@ -178,14 +187,12 @@ static struct rtnl_link_stats64 *br_get_stats64(struct net_device *dev, stats->tx_packets = sum.tx_packets; stats->rx_bytes = sum.rx_bytes; stats->rx_packets = sum.rx_packets; - - return stats; } static int br_change_mtu(struct net_device *dev, int new_mtu) { struct net_bridge *br = netdev_priv(dev); - if (new_mtu < 68 || new_mtu > br_min_mtu(br)) + if (new_mtu > br_min_mtu(br)) return -EINVAL; dev->mtu = new_mtu; @@ -334,6 +341,7 @@ static const struct net_device_ops br_netdev_ops = { .ndo_open = br_dev_open, .ndo_stop = br_dev_stop, .ndo_init = br_dev_init, + .ndo_uninit = br_dev_uninit, .ndo_start_xmit = br_dev_xmit, .ndo_get_stats64 = br_get_stats64, .ndo_set_mac_address = br_set_mac_address, @@ -349,8 +357,6 @@ static const struct net_device_ops br_netdev_ops = { .ndo_add_slave = br_add_slave, .ndo_del_slave = br_del_slave, .ndo_fix_features = br_fix_features, - .ndo_neigh_construct = netdev_default_l2upper_neigh_construct, - .ndo_neigh_destroy = netdev_default_l2upper_neigh_destroy, .ndo_fdb_add = br_fdb_add, .ndo_fdb_del = br_fdb_delete, .ndo_fdb_dump = br_fdb_dump, @@ -360,14 +366,6 @@ static const struct net_device_ops br_netdev_ops = { .ndo_features_check = passthru_features_check, }; -static void br_dev_free(struct net_device *dev) -{ - struct net_bridge *br = netdev_priv(dev); - - free_percpu(br->stats); - free_netdev(dev); -} - static struct device_type br_type = { .name = "bridge", }; @@ -380,7 +378,7 @@ void br_dev_setup(struct net_device *dev) ether_setup(dev); dev->netdev_ops = &br_netdev_ops; - dev->destructor = br_dev_free; + dev->destructor = free_netdev; dev->ethtool_ops = &br_ethtool_ops; SET_NETDEV_DEVTYPE(dev, &br_type); dev->priv_flags = IFF_EBRIDGE | IFF_NO_QUEUE; @@ -409,9 +407,11 @@ void br_dev_setup(struct net_device *dev) br->bridge_max_age = br->max_age = 20 * HZ; br->bridge_hello_time = br->hello_time = 2 * HZ; br->bridge_forward_delay = br->forward_delay = 15 * HZ; - br->ageing_time = BR_DEFAULT_AGEING_TIME; + br->bridge_ageing_time = br->ageing_time = BR_DEFAULT_AGEING_TIME; + dev->max_mtu = ETH_MAX_MTU; br_netfilter_rtable_init(br); br_stp_timer_init(br); br_multicast_init(br); + INIT_DELAYED_WORK(&br->gc_work, br_fdb_cleanup); } diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 6b43c8c88f19..6e08b7199dd7 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -28,9 +28,6 @@ #include "br_private.h" static struct kmem_cache *br_fdb_cache __read_mostly; -static struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head, - const unsigned char *addr, - __u16 vid); static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, const unsigned char *addr, u16 vid); static void fdb_notify(struct net_bridge *br, @@ -68,7 +65,7 @@ static inline unsigned long hold_time(const struct net_bridge *br) static inline int has_expired(const struct net_bridge *br, const struct net_bridge_fdb_entry *fdb) { - return !fdb->is_static && + return !fdb->is_static && !fdb->added_by_external_learn && time_before_eq(fdb->updated + hold_time(br), jiffies); } @@ -86,6 +83,47 @@ static void fdb_rcu_free(struct rcu_head *head) kmem_cache_free(br_fdb_cache, ent); } +static struct net_bridge_fdb_entry *fdb_find_rcu(struct hlist_head *head, + const unsigned char *addr, + __u16 vid) +{ + struct net_bridge_fdb_entry *f; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + hlist_for_each_entry_rcu(f, head, hlist) + if (ether_addr_equal(f->addr.addr, addr) && f->vlan_id == vid) + break; + + return f; +} + +/* requires bridge hash_lock */ +static struct net_bridge_fdb_entry *br_fdb_find(struct net_bridge *br, + const unsigned char *addr, + __u16 vid) +{ + struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; + struct net_bridge_fdb_entry *fdb; + + lockdep_assert_held_once(&br->hash_lock); + + rcu_read_lock(); + fdb = fdb_find_rcu(head, addr, vid); + rcu_read_unlock(); + + return fdb; +} + +struct net_bridge_fdb_entry *br_fdb_find_rcu(struct net_bridge *br, + const unsigned char *addr, + __u16 vid) +{ + struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; + + return fdb_find_rcu(head, addr, vid); +} + /* When a static FDB entry is added, the mac address from the entry is * added to the bridge private HW address list and all required ports * are then updated with the new information. @@ -154,7 +192,7 @@ static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f) if (f->added_by_external_learn) fdb_del_external_learn(f); - hlist_del_rcu(&f->hlist); + hlist_del_init_rcu(&f->hlist); fdb_notify(br, f, RTM_DELNEIGH); call_rcu(&f->rcu, fdb_rcu_free); } @@ -198,11 +236,10 @@ void br_fdb_find_delete_local(struct net_bridge *br, const struct net_bridge_port *p, const unsigned char *addr, u16 vid) { - struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; struct net_bridge_fdb_entry *f; spin_lock_bh(&br->hash_lock); - f = fdb_find(head, addr, vid); + f = br_fdb_find(br, addr, vid); if (f && f->is_local && !f->added_by_user && f->dst == p) fdb_delete_local(br, p, f); spin_unlock_bh(&br->hash_lock); @@ -266,7 +303,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr) spin_lock_bh(&br->hash_lock); /* If old entry was unassociated with any port, then delete it. */ - f = __br_fdb_get(br, br->dev->dev_addr, 0); + f = br_fdb_find(br, br->dev->dev_addr, 0); if (f && f->is_local && !f->dst && !f->added_by_user) fdb_delete_local(br, NULL, f); @@ -281,7 +318,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr) list_for_each_entry(v, &vg->vlan_list, vlist) { if (!br_vlan_should_use(v)) continue; - f = __br_fdb_get(br, br->dev->dev_addr, v->vid); + f = br_fdb_find(br, br->dev->dev_addr, v->vid); if (f && f->is_local && !f->dst && !f->added_by_user) fdb_delete_local(br, NULL, f); fdb_insert(br, NULL, newaddr, v->vid); @@ -290,34 +327,43 @@ out: spin_unlock_bh(&br->hash_lock); } -void br_fdb_cleanup(unsigned long _data) +void br_fdb_cleanup(struct work_struct *work) { - struct net_bridge *br = (struct net_bridge *)_data; + struct net_bridge *br = container_of(work, struct net_bridge, + gc_work.work); unsigned long delay = hold_time(br); - unsigned long next_timer = jiffies + br->ageing_time; + unsigned long work_delay = delay; + unsigned long now = jiffies; int i; - spin_lock(&br->hash_lock); for (i = 0; i < BR_HASH_SIZE; i++) { struct net_bridge_fdb_entry *f; struct hlist_node *n; + if (!br->hash[i].first) + continue; + + spin_lock_bh(&br->hash_lock); hlist_for_each_entry_safe(f, n, &br->hash[i], hlist) { unsigned long this_timer; + if (f->is_static) continue; if (f->added_by_external_learn) continue; this_timer = f->updated + delay; - if (time_before_eq(this_timer, jiffies)) + if (time_after(this_timer, now)) + work_delay = min(work_delay, this_timer - now); + else fdb_delete(br, f); - else if (time_before(this_timer, next_timer)) - next_timer = this_timer; } + spin_unlock_bh(&br->hash_lock); + cond_resched(); } - spin_unlock(&br->hash_lock); - mod_timer(&br->gc_timer, round_jiffies_up(next_timer)); + /* Cleanup minimum 10 milliseconds apart */ + work_delay = max_t(unsigned long, work_delay, msecs_to_jiffies(10)); + mod_delayed_work(system_long_wq, &br->gc_work, work_delay); } /* Completely flush all dynamic entries in forwarding database.*/ @@ -371,26 +417,6 @@ void br_fdb_delete_by_port(struct net_bridge *br, spin_unlock_bh(&br->hash_lock); } -/* No locking or refcounting, assumes caller has rcu_read_lock */ -struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br, - const unsigned char *addr, - __u16 vid) -{ - struct net_bridge_fdb_entry *fdb; - - hlist_for_each_entry_rcu(fdb, - &br->hash[br_mac_hash(addr, vid)], hlist) { - if (ether_addr_equal(fdb->addr.addr, addr) && - fdb->vlan_id == vid) { - if (unlikely(has_expired(br, fdb))) - break; - return fdb; - } - } - - return NULL; -} - #if IS_ENABLED(CONFIG_ATM_LANE) /* Interface used by ATM LANE hook to test * if an addr is on some other bridge port */ @@ -405,7 +431,7 @@ int br_fdb_test_addr(struct net_device *dev, unsigned char *addr) if (!port) ret = 0; else { - fdb = __br_fdb_get(port->br, addr, 0); + fdb = br_fdb_find_rcu(port->br, addr, 0); ret = fdb && fdb->dst && fdb->dst->dev != dev && fdb->dst->state == BR_STATE_FORWARDING; } @@ -467,34 +493,6 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf, return num; } -static struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head, - const unsigned char *addr, - __u16 vid) -{ - struct net_bridge_fdb_entry *fdb; - - hlist_for_each_entry(fdb, head, hlist) { - if (ether_addr_equal(fdb->addr.addr, addr) && - fdb->vlan_id == vid) - return fdb; - } - return NULL; -} - -static struct net_bridge_fdb_entry *fdb_find_rcu(struct hlist_head *head, - const unsigned char *addr, - __u16 vid) -{ - struct net_bridge_fdb_entry *fdb; - - hlist_for_each_entry_rcu(fdb, head, hlist) { - if (ether_addr_equal(fdb->addr.addr, addr) && - fdb->vlan_id == vid) - return fdb; - } - return NULL; -} - static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, struct net_bridge_port *source, const unsigned char *addr, @@ -528,16 +526,15 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, if (!is_valid_ether_addr(addr)) return -EINVAL; - fdb = fdb_find(head, addr, vid); + fdb = br_fdb_find(br, addr, vid); if (fdb) { /* it is okay to have multiple ports with same * address, just use the first one. */ if (fdb->is_local) return 0; - br_warn(br, "adding interface %s with same address " - "as a received packet\n", - source ? source->dev->name : br->dev->name); + br_warn(br, "adding interface %s with same address as a received packet (addr:%pM, vlan:%u)\n", + source ? source->dev->name : br->dev->name, addr, vid); fdb_delete(br, fdb); } @@ -583,16 +580,18 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, /* attempt to update an entry for a local interface */ if (unlikely(fdb->is_local)) { if (net_ratelimit()) - br_warn(br, "received packet on %s with " - "own address as source address\n", - source->dev->name); + br_warn(br, "received packet on %s with own address as source address (addr:%pM, vlan:%u)\n", + source->dev->name, addr, vid); } else { + unsigned long now = jiffies; + /* fastpath: update of existing entry */ if (unlikely(source != fdb->dst)) { fdb->dst = source; fdb_modified = true; } - fdb->updated = jiffies; + if (now != fdb->updated) + fdb->updated = now; if (unlikely(added_by_user)) fdb->added_by_user = 1; if (unlikely(fdb_modified)) @@ -600,7 +599,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, } } else { spin_lock(&br->hash_lock); - if (likely(!fdb_find(head, addr, vid))) { + if (likely(!fdb_find_rcu(head, addr, vid))) { fdb = fdb_create(head, source, addr, vid, 0, 0); if (fdb) { if (unlikely(added_by_user)) @@ -784,7 +783,7 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source, return -EINVAL; } - fdb = fdb_find(head, addr, vid); + fdb = br_fdb_find(br, addr, vid); if (fdb == NULL) { if (!(flags & NLM_F_CREATE)) return -ENOENT; @@ -931,55 +930,30 @@ out: return err; } -static int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr, - u16 vid) -{ - struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; - struct net_bridge_fdb_entry *fdb; - - fdb = fdb_find(head, addr, vid); - if (!fdb) - return -ENOENT; - - fdb_delete(br, fdb); - return 0; -} - -static int __br_fdb_delete_by_addr(struct net_bridge *br, - const unsigned char *addr, u16 vid) -{ - int err; - - spin_lock_bh(&br->hash_lock); - err = fdb_delete_by_addr(br, addr, vid); - spin_unlock_bh(&br->hash_lock); - - return err; -} - -static int fdb_delete_by_addr_and_port(struct net_bridge_port *p, +static int fdb_delete_by_addr_and_port(struct net_bridge *br, + const struct net_bridge_port *p, const u8 *addr, u16 vlan) { - struct net_bridge *br = p->br; - struct hlist_head *head = &br->hash[br_mac_hash(addr, vlan)]; struct net_bridge_fdb_entry *fdb; - fdb = fdb_find(head, addr, vlan); + fdb = br_fdb_find(br, addr, vlan); if (!fdb || fdb->dst != p) return -ENOENT; fdb_delete(br, fdb); + return 0; } -static int __br_fdb_delete(struct net_bridge_port *p, +static int __br_fdb_delete(struct net_bridge *br, + const struct net_bridge_port *p, const unsigned char *addr, u16 vid) { int err; - spin_lock_bh(&p->br->hash_lock); - err = fdb_delete_by_addr_and_port(p, addr, vid); - spin_unlock_bh(&p->br->hash_lock); + spin_lock_bh(&br->hash_lock); + err = fdb_delete_by_addr_and_port(br, p, addr, vid); + spin_unlock_bh(&br->hash_lock); return err; } @@ -992,7 +966,7 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], struct net_bridge_vlan_group *vg; struct net_bridge_port *p = NULL; struct net_bridge_vlan *v; - struct net_bridge *br = NULL; + struct net_bridge *br; int err; if (dev->priv_flags & IFF_EBRIDGE) { @@ -1006,6 +980,7 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], return -EINVAL; } vg = nbp_vlan_group(p); + br = p->br; } if (vid) { @@ -1015,30 +990,20 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], return -EINVAL; } - if (dev->priv_flags & IFF_EBRIDGE) - err = __br_fdb_delete_by_addr(br, addr, vid); - else - err = __br_fdb_delete(p, addr, vid); + err = __br_fdb_delete(br, p, addr, vid); } else { err = -ENOENT; - if (dev->priv_flags & IFF_EBRIDGE) - err = __br_fdb_delete_by_addr(br, addr, 0); - else - err &= __br_fdb_delete(p, addr, 0); - + err &= __br_fdb_delete(br, p, addr, 0); if (!vg || !vg->num_vlans) - goto out; + return err; list_for_each_entry(v, &vg->vlan_list, vlist) { if (!br_vlan_should_use(v)) continue; - if (dev->priv_flags & IFF_EBRIDGE) - err = __br_fdb_delete_by_addr(br, addr, v->vid); - else - err &= __br_fdb_delete(p, addr, v->vid); + err &= __br_fdb_delete(br, p, addr, v->vid); } } -out: + return err; } @@ -1109,7 +1074,7 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, spin_lock_bh(&br->hash_lock); head = &br->hash[br_mac_hash(addr, vid)]; - fdb = fdb_find(head, addr, vid); + fdb = br_fdb_find(br, addr, vid); if (!fdb) { fdb = fdb_create(head, p, addr, vid, 0, 0); if (!fdb) { @@ -1137,15 +1102,13 @@ err_unlock: int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 vid) { - struct hlist_head *head; struct net_bridge_fdb_entry *fdb; int err = 0; ASSERT_RTNL(); spin_lock_bh(&br->hash_lock); - head = &br->hash[br_mac_hash(addr, vid)]; - fdb = fdb_find(head, addr, vid); + fdb = br_fdb_find(br, addr, vid); if (fdb && fdb->added_by_external_learn) fdb_delete(br, fdb); else diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 7cb41aee4c82..902af6ba481c 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -80,7 +80,7 @@ static void __br_forward(const struct net_bridge_port *to, int br_hook; vg = nbp_vlan_group_rcu(to); - skb = br_handle_vlan(to->br, vg, skb); + skb = br_handle_vlan(to->br, to, vg, skb); if (!skb) return; @@ -186,8 +186,9 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb, /* Do not flood unicast traffic to ports that turn it off */ if (pkt_type == BR_PKT_UNICAST && !(p->flags & BR_FLOOD)) continue; + /* Do not flood if mc off, except for traffic we originate */ if (pkt_type == BR_PKT_MULTICAST && - !(p->flags & BR_MCAST_FLOOD)) + !(p->flags & BR_MCAST_FLOOD) && skb->dev != br->dev) continue; /* Do not flood to ports that enable proxy ARP */ @@ -220,6 +221,31 @@ out: } #ifdef CONFIG_BRIDGE_IGMP_SNOOPING +static void maybe_deliver_addr(struct net_bridge_port *p, struct sk_buff *skb, + const unsigned char *addr, bool local_orig) +{ + struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev; + const unsigned char *src = eth_hdr(skb)->h_source; + + if (!should_deliver(p, skb)) + return; + + /* Even with hairpin, no soliloquies - prevent breaking IPv6 DAD */ + if (skb->dev == p->dev && ether_addr_equal(src, addr)) + return; + + skb = skb_copy(skb, GFP_ATOMIC); + if (!skb) { + dev->stats.tx_dropped++; + return; + } + + if (!is_broadcast_ether_addr(addr)) + memcpy(eth_hdr(skb)->h_dest, addr, ETH_ALEN); + + __br_forward(p, skb, local_orig); +} + /* called with rcu_read_lock */ void br_multicast_flood(struct net_bridge_mdb_entry *mdst, struct sk_buff *skb, @@ -241,10 +267,20 @@ void br_multicast_flood(struct net_bridge_mdb_entry *mdst, rport = rp ? hlist_entry(rp, struct net_bridge_port, rlist) : NULL; - port = (unsigned long)lport > (unsigned long)rport ? - lport : rport; + if ((unsigned long)lport > (unsigned long)rport) { + port = lport; + + if (port->flags & BR_MULTICAST_TO_UNICAST) { + maybe_deliver_addr(lport, skb, p->eth_addr, + local_orig); + goto delivered; + } + } else { + port = rport; + } prev = maybe_deliver(prev, port, skb, local_orig); +delivered: if (IS_ERR(prev)) goto out; if (prev == port) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index ed0dd3340084..56a2a72e7738 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -311,9 +311,8 @@ void br_dev_delete(struct net_device *dev, struct list_head *head) br_fdb_delete_by_port(br, NULL, 0, 1); - br_vlan_flush(br); br_multicast_dev_del(br); - del_timer_sync(&br->gc_timer); + cancel_delayed_work_sync(&br->gc_work); br_sysfs_delbr(br->dev); unregister_netdevice_queue(br->dev, head); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 855b72fbe1da..013f2290bfa5 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -21,6 +21,7 @@ #include <linux/export.h> #include <linux/rculist.h> #include "br_private.h" +#include "br_private_tunnel.h" /* Hook for brouter */ br_should_route_hook_t __rcu *br_should_route_hook __read_mostly; @@ -29,6 +30,7 @@ EXPORT_SYMBOL(br_should_route_hook); static int br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb) { + br_drop_fake_rtable(skb); return netif_receive_skb(skb); } @@ -57,7 +59,7 @@ static int br_pass_frame_up(struct sk_buff *skb) indev = skb->dev; skb->dev = brdev; - skb = br_handle_vlan(br, vg, skb); + skb = br_handle_vlan(br, NULL, vg, skb); if (!skb) return NET_RX_DROP; /* update the multicast stats if the packet is IGMP/MLD */ @@ -113,7 +115,7 @@ static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br, return; } - f = __br_fdb_get(br, n->ha, vid); + f = br_fdb_find_rcu(br, n->ha, vid); if (f && ((p->flags & BR_PROXYARP) || (f->dst && (f->dst->flags & BR_PROXYARP_WIFI)))) { arp_send(ARPOP_REPLY, ETH_P_ARP, sip, skb->dev, tip, @@ -188,16 +190,19 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb } break; case BR_PKT_UNICAST: - dst = __br_fdb_get(br, dest, vid); + dst = br_fdb_find_rcu(br, dest, vid); default: break; } if (dst) { + unsigned long now = jiffies; + if (dst->is_local) return br_pass_frame_up(skb); - dst->used = jiffies; + if (now != dst->used) + dst->used = now; br_forward(dst->dst, skb, local_rcv, false); } else { if (!mcast_hit) @@ -261,6 +266,11 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) return RX_HANDLER_CONSUMED; p = br_port_get_rcu(skb->dev); + if (p->flags & BR_VLAN_TUNNEL) { + if (br_handle_ingress_vlan_tunnel(skb, p, + nbp_vlan_group_rcu(p))) + goto drop; + } if (unlikely(is_link_local_ether_addr(dest))) { u16 fwd_mask = p->br->group_fwd_mask_required; diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index d99b2009771a..7970f8540cbb 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -18,7 +18,7 @@ #include <linux/slab.h> #include <linux/times.h> #include <net/net_namespace.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include "br_private.h" static int get_bridge_ifindices(struct net *net, int *indices, int num) @@ -149,7 +149,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) b.hello_timer_value = br_timer_value(&br->hello_timer); b.tcn_timer_value = br_timer_value(&br->tcn_timer); b.topology_change_timer_value = br_timer_value(&br->topology_change_timer); - b.gc_timer_value = br_timer_value(&br->gc_timer); + b.gc_timer_value = br_timer_value(&br->gc_work.timer); rcu_read_unlock(); if (copy_to_user((void __user *)args[1], &b, sizeof(b))) diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 7dbc80d01eb0..056e6ac49d8f 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -531,7 +531,7 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, break; } - p = br_multicast_new_port_group(port, group, *pp, state); + p = br_multicast_new_port_group(port, group, *pp, state, NULL); if (unlikely(!p)) return -ENOMEM; rcu_assign_pointer(*pp, p); diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 2136e45f5277..faa7261a992f 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -25,7 +25,9 @@ #include <linux/slab.h> #include <linux/timer.h> #include <linux/inetdevice.h> +#include <linux/mroute.h> #include <net/ip.h> +#include <net/switchdev.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/ipv6.h> #include <net/mld.h> @@ -42,12 +44,15 @@ static void br_multicast_add_router(struct net_bridge *br, static void br_ip4_multicast_leave_group(struct net_bridge *br, struct net_bridge_port *port, __be32 group, - __u16 vid); + __u16 vid, + const unsigned char *src); + +static void __del_port_router(struct net_bridge_port *p); #if IS_ENABLED(CONFIG_IPV6) static void br_ip6_multicast_leave_group(struct net_bridge *br, struct net_bridge_port *port, const struct in6_addr *group, - __u16 vid); + __u16 vid, const unsigned char *src); #endif unsigned int br_mdb_rehash_seq; @@ -364,13 +369,18 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br, __be32 group, u8 *igmp_type) { + struct igmpv3_query *ihv3; + size_t igmp_hdr_size; struct sk_buff *skb; struct igmphdr *ih; struct ethhdr *eth; struct iphdr *iph; + igmp_hdr_size = sizeof(*ih); + if (br->multicast_igmp_version == 3) + igmp_hdr_size = sizeof(*ihv3); skb = netdev_alloc_skb_ip_align(br->dev, sizeof(*eth) + sizeof(*iph) + - sizeof(*ih) + 4); + igmp_hdr_size + 4); if (!skb) goto out; @@ -395,7 +405,7 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br, iph->version = 4; iph->ihl = 6; iph->tos = 0xc0; - iph->tot_len = htons(sizeof(*iph) + sizeof(*ih) + 4); + iph->tot_len = htons(sizeof(*iph) + igmp_hdr_size + 4); iph->id = 0; iph->frag_off = htons(IP_DF); iph->ttl = 1; @@ -411,17 +421,37 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br, skb_put(skb, 24); skb_set_transport_header(skb, skb->len); - ih = igmp_hdr(skb); *igmp_type = IGMP_HOST_MEMBERSHIP_QUERY; - ih->type = IGMP_HOST_MEMBERSHIP_QUERY; - ih->code = (group ? br->multicast_last_member_interval : - br->multicast_query_response_interval) / - (HZ / IGMP_TIMER_SCALE); - ih->group = group; - ih->csum = 0; - ih->csum = ip_compute_csum((void *)ih, sizeof(struct igmphdr)); - skb_put(skb, sizeof(*ih)); + switch (br->multicast_igmp_version) { + case 2: + ih = igmp_hdr(skb); + ih->type = IGMP_HOST_MEMBERSHIP_QUERY; + ih->code = (group ? br->multicast_last_member_interval : + br->multicast_query_response_interval) / + (HZ / IGMP_TIMER_SCALE); + ih->group = group; + ih->csum = 0; + ih->csum = ip_compute_csum((void *)ih, sizeof(*ih)); + break; + case 3: + ihv3 = igmpv3_query_hdr(skb); + ihv3->type = IGMP_HOST_MEMBERSHIP_QUERY; + ihv3->code = (group ? br->multicast_last_member_interval : + br->multicast_query_response_interval) / + (HZ / IGMP_TIMER_SCALE); + ihv3->group = group; + ihv3->qqic = br->multicast_query_interval / HZ; + ihv3->nsrcs = 0; + ihv3->resv = 0; + ihv3->suppress = 0; + ihv3->qrv = 2; + ihv3->csum = 0; + ihv3->csum = ip_compute_csum((void *)ihv3, sizeof(*ihv3)); + break; + } + + skb_put(skb, igmp_hdr_size); __skb_pull(skb, sizeof(*eth)); out: @@ -433,15 +463,20 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, const struct in6_addr *grp, u8 *igmp_type) { - struct sk_buff *skb; + struct mld2_query *mld2q; + unsigned long interval; struct ipv6hdr *ip6h; struct mld_msg *mldq; + size_t mld_hdr_size; + struct sk_buff *skb; struct ethhdr *eth; u8 *hopopt; - unsigned long interval; + mld_hdr_size = sizeof(*mldq); + if (br->multicast_mld_version == 2) + mld_hdr_size = sizeof(*mld2q); skb = netdev_alloc_skb_ip_align(br->dev, sizeof(*eth) + sizeof(*ip6h) + - 8 + sizeof(*mldq)); + 8 + mld_hdr_size); if (!skb) goto out; @@ -460,7 +495,7 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, ip6h = ipv6_hdr(skb); *(__force __be32 *)ip6h = htonl(0x60000000); - ip6h->payload_len = htons(8 + sizeof(*mldq)); + ip6h->payload_len = htons(8 + mld_hdr_size); ip6h->nexthdr = IPPROTO_HOPOPTS; ip6h->hop_limit = 1; ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1)); @@ -488,26 +523,47 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, /* ICMPv6 */ skb_set_transport_header(skb, skb->len); - mldq = (struct mld_msg *) icmp6_hdr(skb); - interval = ipv6_addr_any(grp) ? br->multicast_query_response_interval : br->multicast_last_member_interval; - *igmp_type = ICMPV6_MGM_QUERY; - mldq->mld_type = ICMPV6_MGM_QUERY; - mldq->mld_code = 0; - mldq->mld_cksum = 0; - mldq->mld_maxdelay = htons((u16)jiffies_to_msecs(interval)); - mldq->mld_reserved = 0; - mldq->mld_mca = *grp; - - /* checksum */ - mldq->mld_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, - sizeof(*mldq), IPPROTO_ICMPV6, - csum_partial(mldq, - sizeof(*mldq), 0)); - skb_put(skb, sizeof(*mldq)); + switch (br->multicast_mld_version) { + case 1: + mldq = (struct mld_msg *)icmp6_hdr(skb); + mldq->mld_type = ICMPV6_MGM_QUERY; + mldq->mld_code = 0; + mldq->mld_cksum = 0; + mldq->mld_maxdelay = htons((u16)jiffies_to_msecs(interval)); + mldq->mld_reserved = 0; + mldq->mld_mca = *grp; + mldq->mld_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, + sizeof(*mldq), IPPROTO_ICMPV6, + csum_partial(mldq, + sizeof(*mldq), + 0)); + break; + case 2: + mld2q = (struct mld2_query *)icmp6_hdr(skb); + mld2q->mld2q_mrc = htons((u16)jiffies_to_msecs(interval)); + mld2q->mld2q_type = ICMPV6_MGM_QUERY; + mld2q->mld2q_code = 0; + mld2q->mld2q_cksum = 0; + mld2q->mld2q_resv1 = 0; + mld2q->mld2q_resv2 = 0; + mld2q->mld2q_suppress = 0; + mld2q->mld2q_qrv = 2; + mld2q->mld2q_nsrcs = 0; + mld2q->mld2q_qqic = br->multicast_query_interval / HZ; + mld2q->mld2q_mca = *grp; + mld2q->mld2q_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, + sizeof(*mld2q), + IPPROTO_ICMPV6, + csum_partial(mld2q, + sizeof(*mld2q), + 0)); + break; + } + skb_put(skb, mld_hdr_size); __skb_pull(skb, sizeof(*eth)); @@ -607,7 +663,8 @@ err: } struct net_bridge_mdb_entry *br_multicast_new_group(struct net_bridge *br, - struct net_bridge_port *port, struct br_ip *group) + struct net_bridge_port *p, + struct br_ip *group) { struct net_bridge_mdb_htable *mdb; struct net_bridge_mdb_entry *mp; @@ -623,7 +680,7 @@ struct net_bridge_mdb_entry *br_multicast_new_group(struct net_bridge *br, } hash = br_ip_hash(mdb, group); - mp = br_multicast_get_group(br, port, group, hash); + mp = br_multicast_get_group(br, p, group, hash); switch (PTR_ERR(mp)) { case 0: break; @@ -658,7 +715,8 @@ struct net_bridge_port_group *br_multicast_new_port_group( struct net_bridge_port *port, struct br_ip *group, struct net_bridge_port_group __rcu *next, - unsigned char flags) + unsigned char flags, + const unsigned char *src) { struct net_bridge_port_group *p; @@ -673,16 +731,36 @@ struct net_bridge_port_group *br_multicast_new_port_group( hlist_add_head(&p->mglist, &port->mglist); setup_timer(&p->timer, br_multicast_port_group_expired, (unsigned long)p); + + if (src) + memcpy(p->eth_addr, src, ETH_ALEN); + else + memset(p->eth_addr, 0xff, ETH_ALEN); + return p; } +static bool br_port_group_equal(struct net_bridge_port_group *p, + struct net_bridge_port *port, + const unsigned char *src) +{ + if (p->port != port) + return false; + + if (!(port->flags & BR_MULTICAST_TO_UNICAST)) + return true; + + return ether_addr_equal(src, p->eth_addr); +} + static int br_multicast_add_group(struct net_bridge *br, struct net_bridge_port *port, - struct br_ip *group) + struct br_ip *group, + const unsigned char *src) { - struct net_bridge_mdb_entry *mp; - struct net_bridge_port_group *p; struct net_bridge_port_group __rcu **pp; + struct net_bridge_port_group *p; + struct net_bridge_mdb_entry *mp; unsigned long now = jiffies; int err; @@ -705,13 +783,13 @@ static int br_multicast_add_group(struct net_bridge *br, for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL; pp = &p->next) { - if (p->port == port) + if (br_port_group_equal(p, port, src)) goto found; if ((unsigned long)p->port < (unsigned long)port) break; } - p = br_multicast_new_port_group(port, group, *pp, 0); + p = br_multicast_new_port_group(port, group, *pp, 0, src); if (unlikely(!p)) goto err; rcu_assign_pointer(*pp, p); @@ -730,7 +808,8 @@ err: static int br_ip4_multicast_add_group(struct net_bridge *br, struct net_bridge_port *port, __be32 group, - __u16 vid) + __u16 vid, + const unsigned char *src) { struct br_ip br_group; @@ -741,14 +820,15 @@ static int br_ip4_multicast_add_group(struct net_bridge *br, br_group.proto = htons(ETH_P_IP); br_group.vid = vid; - return br_multicast_add_group(br, port, &br_group); + return br_multicast_add_group(br, port, &br_group, src); } #if IS_ENABLED(CONFIG_IPV6) static int br_ip6_multicast_add_group(struct net_bridge *br, struct net_bridge_port *port, const struct in6_addr *group, - __u16 vid) + __u16 vid, + const unsigned char *src) { struct br_ip br_group; @@ -759,7 +839,7 @@ static int br_ip6_multicast_add_group(struct net_bridge *br, br_group.proto = htons(ETH_P_IPV6); br_group.vid = vid; - return br_multicast_add_group(br, port, &br_group); + return br_multicast_add_group(br, port, &br_group, src); } #endif @@ -771,16 +851,10 @@ static void br_multicast_router_expired(unsigned long data) spin_lock(&br->multicast_lock); if (port->multicast_router == MDB_RTR_TYPE_DISABLED || port->multicast_router == MDB_RTR_TYPE_PERM || - timer_pending(&port->multicast_router_timer) || - hlist_unhashed(&port->rlist)) + timer_pending(&port->multicast_router_timer)) goto out; - hlist_del_init_rcu(&port->rlist); - br_rtr_notify(br->dev, port, RTM_DELMDB); - /* Don't allow timer refresh if the router expired */ - if (port->multicast_router == MDB_RTR_TYPE_TEMP) - port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; - + __del_port_router(port); out: spin_unlock(&br->multicast_lock); } @@ -860,9 +934,9 @@ static void br_multicast_send_query(struct net_bridge *br, struct net_bridge_port *port, struct bridge_mcast_own_query *own_query) { - unsigned long time; - struct br_ip br_group; struct bridge_mcast_other_query *other_query = NULL; + struct br_ip br_group; + unsigned long time; if (!netif_running(br->dev) || br->multicast_disabled || !br->multicast_querier) @@ -929,6 +1003,18 @@ static void br_ip6_multicast_port_query_expired(unsigned long data) } #endif +static void br_mc_disabled_update(struct net_device *dev, bool value) +{ + struct switchdev_attr attr = { + .orig_dev = dev, + .id = SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED, + .flags = SWITCHDEV_F_DEFER, + .u.mc_disabled = value, + }; + + switchdev_port_attr_set(dev, &attr); +} + int br_multicast_add_port(struct net_bridge_port *port) { port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; @@ -941,6 +1027,8 @@ int br_multicast_add_port(struct net_bridge_port *port) setup_timer(&port->ip6_own_query.timer, br_ip6_multicast_port_query_expired, (unsigned long)port); #endif + br_mc_disabled_update(port->dev, port->br->multicast_disabled); + port->mcast_stats = netdev_alloc_pcpu_stats(struct bridge_mcast_stats); if (!port->mcast_stats) return -ENOMEM; @@ -1008,13 +1096,8 @@ void br_multicast_disable_port(struct net_bridge_port *port) if (!(pg->flags & MDB_PG_FLAGS_PERMANENT)) br_multicast_del_pg(br, pg); - if (!hlist_unhashed(&port->rlist)) { - hlist_del_init_rcu(&port->rlist); - br_rtr_notify(br->dev, port, RTM_DELMDB); - /* Don't allow timer refresh if disabling */ - if (port->multicast_router == MDB_RTR_TYPE_TEMP) - port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; - } + __del_port_router(port); + del_timer(&port->multicast_router_timer); del_timer(&port->ip4_own_query.timer); #if IS_ENABLED(CONFIG_IPV6) @@ -1028,6 +1111,7 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, struct sk_buff *skb, u16 vid) { + const unsigned char *src; struct igmpv3_report *ih; struct igmpv3_grec *grec; int i; @@ -1068,12 +1152,14 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, continue; } + src = eth_hdr(skb)->h_source; if ((type == IGMPV3_CHANGE_TO_INCLUDE || type == IGMPV3_MODE_IS_INCLUDE) && ntohs(grec->grec_nsrcs) == 0) { - br_ip4_multicast_leave_group(br, port, group, vid); + br_ip4_multicast_leave_group(br, port, group, vid, src); } else { - err = br_ip4_multicast_add_group(br, port, group, vid); + err = br_ip4_multicast_add_group(br, port, group, vid, + src); if (err) break; } @@ -1088,6 +1174,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, struct sk_buff *skb, u16 vid) { + const unsigned char *src; struct icmp6hdr *icmp6h; struct mld2_grec *grec; int i; @@ -1135,14 +1222,16 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, continue; } + src = eth_hdr(skb)->h_source; if ((grec->grec_type == MLD2_CHANGE_TO_INCLUDE || grec->grec_type == MLD2_MODE_IS_INCLUDE) && ntohs(*nsrcs) == 0) { br_ip6_multicast_leave_group(br, port, &grec->grec_mca, - vid); + vid, src); } else { err = br_ip6_multicast_add_group(br, port, - &grec->grec_mca, vid); + &grec->grec_mca, vid, + src); if (err) break; } @@ -1228,6 +1317,19 @@ br_multicast_update_query_timer(struct net_bridge *br, mod_timer(&query->timer, jiffies + br->multicast_querier_interval); } +static void br_port_mc_router_state_change(struct net_bridge_port *p, + bool is_mc_router) +{ + struct switchdev_attr attr = { + .orig_dev = p->dev, + .id = SWITCHDEV_ATTR_ID_PORT_MROUTER, + .flags = SWITCHDEV_F_DEFER, + .u.mrouter = is_mc_router, + }; + + switchdev_port_attr_set(p->dev, &attr); +} + /* * Add port to router_list * list is maintained ordered by pointer value @@ -1253,6 +1355,7 @@ static void br_multicast_add_router(struct net_bridge *br, else hlist_add_head_rcu(&port->rlist, &br->router_list); br_rtr_notify(br->dev, port, RTM_NEWMDB); + br_port_mc_router_state_change(port, true); } static void br_multicast_mark_router(struct net_bridge *br, @@ -1458,7 +1561,8 @@ br_multicast_leave_group(struct net_bridge *br, struct net_bridge_port *port, struct br_ip *group, struct bridge_mcast_other_query *other_query, - struct bridge_mcast_own_query *own_query) + struct bridge_mcast_own_query *own_query, + const unsigned char *src) { struct net_bridge_mdb_htable *mdb; struct net_bridge_mdb_entry *mp; @@ -1482,7 +1586,7 @@ br_multicast_leave_group(struct net_bridge *br, for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL; pp = &p->next) { - if (p->port != port) + if (!br_port_group_equal(p, port, src)) continue; rcu_assign_pointer(*pp, p->next); @@ -1513,7 +1617,7 @@ br_multicast_leave_group(struct net_bridge *br, for (p = mlock_dereference(mp->ports, br); p != NULL; p = mlock_dereference(p->next, br)) { - if (p->port != port) + if (!br_port_group_equal(p, port, src)) continue; if (!hlist_unhashed(&p->mglist) && @@ -1564,7 +1668,8 @@ out: static void br_ip4_multicast_leave_group(struct net_bridge *br, struct net_bridge_port *port, __be32 group, - __u16 vid) + __u16 vid, + const unsigned char *src) { struct br_ip br_group; struct bridge_mcast_own_query *own_query; @@ -1579,14 +1684,15 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br, br_group.vid = vid; br_multicast_leave_group(br, port, &br_group, &br->ip4_other_query, - own_query); + own_query, src); } #if IS_ENABLED(CONFIG_IPV6) static void br_ip6_multicast_leave_group(struct net_bridge *br, struct net_bridge_port *port, const struct in6_addr *group, - __u16 vid) + __u16 vid, + const unsigned char *src) { struct br_ip br_group; struct bridge_mcast_own_query *own_query; @@ -1601,7 +1707,7 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br, br_group.vid = vid; br_multicast_leave_group(br, port, &br_group, &br->ip6_other_query, - own_query); + own_query, src); } #endif @@ -1638,20 +1744,40 @@ static void br_multicast_err_count(const struct net_bridge *br, u64_stats_update_end(&pstats->syncp); } +static void br_multicast_pim(struct net_bridge *br, + struct net_bridge_port *port, + const struct sk_buff *skb) +{ + unsigned int offset = skb_transport_offset(skb); + struct pimhdr *pimhdr, _pimhdr; + + pimhdr = skb_header_pointer(skb, offset, sizeof(_pimhdr), &_pimhdr); + if (!pimhdr || pim_hdr_version(pimhdr) != PIM_VERSION || + pim_hdr_type(pimhdr) != PIM_TYPE_HELLO) + return; + + br_multicast_mark_router(br, port); +} + static int br_multicast_ipv4_rcv(struct net_bridge *br, struct net_bridge_port *port, struct sk_buff *skb, u16 vid) { struct sk_buff *skb_trimmed = NULL; + const unsigned char *src; struct igmphdr *ih; int err; err = ip_mc_check_igmp(skb, &skb_trimmed); if (err == -ENOMSG) { - if (!ipv4_is_local_multicast(ip_hdr(skb)->daddr)) + if (!ipv4_is_local_multicast(ip_hdr(skb)->daddr)) { BR_INPUT_SKB_CB(skb)->mrouters_only = 1; + } else if (pim_ipv4_all_pim_routers(ip_hdr(skb)->daddr)) { + if (ip_hdr(skb)->protocol == IPPROTO_PIM) + br_multicast_pim(br, port, skb); + } return 0; } else if (err < 0) { br_multicast_err_count(br, port, skb->protocol); @@ -1659,13 +1785,14 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, } ih = igmp_hdr(skb); + src = eth_hdr(skb)->h_source; BR_INPUT_SKB_CB(skb)->igmp = ih->type; switch (ih->type) { case IGMP_HOST_MEMBERSHIP_REPORT: case IGMPV2_HOST_MEMBERSHIP_REPORT: BR_INPUT_SKB_CB(skb)->mrouters_only = 1; - err = br_ip4_multicast_add_group(br, port, ih->group, vid); + err = br_ip4_multicast_add_group(br, port, ih->group, vid, src); break; case IGMPV3_HOST_MEMBERSHIP_REPORT: err = br_ip4_multicast_igmp3_report(br, port, skb_trimmed, vid); @@ -1674,7 +1801,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, err = br_ip4_multicast_query(br, port, skb_trimmed, vid); break; case IGMP_HOST_LEAVE_MESSAGE: - br_ip4_multicast_leave_group(br, port, ih->group, vid); + br_ip4_multicast_leave_group(br, port, ih->group, vid, src); break; } @@ -1694,6 +1821,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, u16 vid) { struct sk_buff *skb_trimmed = NULL; + const unsigned char *src; struct mld_msg *mld; int err; @@ -1713,8 +1841,10 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, switch (mld->mld_type) { case ICMPV6_MGM_REPORT: + src = eth_hdr(skb)->h_source; BR_INPUT_SKB_CB(skb)->mrouters_only = 1; - err = br_ip6_multicast_add_group(br, port, &mld->mld_mca, vid); + err = br_ip6_multicast_add_group(br, port, &mld->mld_mca, vid, + src); break; case ICMPV6_MLD2_REPORT: err = br_ip6_multicast_mld2_report(br, port, skb_trimmed, vid); @@ -1723,7 +1853,8 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, err = br_ip6_multicast_query(br, port, skb_trimmed, vid); break; case ICMPV6_MGM_REDUCTION: - br_ip6_multicast_leave_group(br, port, &mld->mld_mca, vid); + src = eth_hdr(skb)->h_source; + br_ip6_multicast_leave_group(br, port, &mld->mld_mca, vid, src); break; } @@ -1811,7 +1942,9 @@ void br_multicast_init(struct net_bridge *br) br->ip4_other_query.delay_time = 0; br->ip4_querier.port = NULL; + br->multicast_igmp_version = 2; #if IS_ENABLED(CONFIG_IPV6) + br->multicast_mld_version = 1; br->ip6_other_query.delay_time = 0; br->ip6_querier.port = NULL; #endif @@ -1898,8 +2031,6 @@ void br_multicast_dev_del(struct net_bridge *br) out: spin_unlock_bh(&br->multicast_lock); - - free_percpu(br->mcast_stats); } int br_multicast_set_router(struct net_bridge *br, unsigned long val) @@ -1930,6 +2061,11 @@ static void __del_port_router(struct net_bridge_port *p) return; hlist_del_init_rcu(&p->rlist); br_rtr_notify(p->br->dev, p, RTM_DELMDB); + br_port_mc_router_state_change(p, false); + + /* don't allow timer refresh */ + if (p->multicast_router == MDB_RTR_TYPE_TEMP) + p->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; } int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val) @@ -2007,6 +2143,7 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val) if (br->multicast_disabled == !val) goto unlock; + br_mc_disabled_update(br->dev, !val); br->multicast_disabled = !val; if (br->multicast_disabled) goto unlock; @@ -2112,6 +2249,44 @@ unlock: return err; } +int br_multicast_set_igmp_version(struct net_bridge *br, unsigned long val) +{ + /* Currently we support only version 2 and 3 */ + switch (val) { + case 2: + case 3: + break; + default: + return -EINVAL; + } + + spin_lock_bh(&br->multicast_lock); + br->multicast_igmp_version = val; + spin_unlock_bh(&br->multicast_lock); + + return 0; +} + +#if IS_ENABLED(CONFIG_IPV6) +int br_multicast_set_mld_version(struct net_bridge *br, unsigned long val) +{ + /* Currently we support version 1 and 2 */ + switch (val) { + case 1: + case 2: + break; + default: + return -EINVAL; + } + + spin_lock_bh(&br->multicast_lock); + br->multicast_mld_version = val; + spin_unlock_bh(&br->multicast_lock); + + return 0; +} +#endif + /** * br_multicast_list_adjacent - Returns snooped multicast addresses * @dev: The bridge port adjacent to which to retrieve addresses @@ -2354,6 +2529,11 @@ int br_multicast_init_stats(struct net_bridge *br) return 0; } +void br_multicast_uninit_stats(struct net_bridge *br) +{ + free_percpu(br->mcast_stats); +} + static void mcast_stats_add_dir(u64 *dst, u64 *src) { dst[BR_MCAST_DIR_RX] += src[BR_MCAST_DIR_RX]; diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 2fe9345c1407..1f1e62095464 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -40,13 +40,13 @@ #include <net/netfilter/br_netfilter.h> #include <net/netns/generic.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include "br_private.h" #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> #endif -static int brnf_net_id __read_mostly; +static unsigned int brnf_net_id __read_mostly; struct brnf_net { bool enabled; @@ -399,7 +399,7 @@ bridged_dnat: br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb, skb->dev, NULL, - br_nf_pre_routing_finish); + br_nf_pre_routing_finish_bridge); return 0; } ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr); @@ -521,21 +521,6 @@ static unsigned int br_nf_pre_routing(void *priv, } -/* PF_BRIDGE/LOCAL_IN ************************************************/ -/* The packet is locally destined, which requires a real - * dst_entry, so detach the fake one. On the way up, the - * packet would pass through PRE_ROUTING again (which already - * took place when the packet entered the bridge), but we - * register an IPv4 PRE_ROUTING 'sabotage' hook that will - * prevent this from happening. */ -static unsigned int br_nf_local_in(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - br_drop_fake_rtable(skb); - return NF_ACCEPT; -} - /* PF_BRIDGE/FORWARD *************************************************/ static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { @@ -561,8 +546,8 @@ static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff } nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_FORWARD, net, sk, skb, - in, skb->dev, br_forward_finish, 1); + br_nf_hook_thresh(NF_BR_FORWARD, net, sk, skb, in, skb->dev, + br_forward_finish); return 0; } @@ -721,18 +706,20 @@ static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct nf_bridge_info *nf_bridge; - unsigned int mtu_reserved; + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + unsigned int mtu, mtu_reserved; mtu_reserved = nf_bridge_mtu_reduction(skb); + mtu = skb->dev->mtu; + + if (nf_bridge->frag_max_size && nf_bridge->frag_max_size < mtu) + mtu = nf_bridge->frag_max_size; - if (skb_is_gso(skb) || skb->len + mtu_reserved <= skb->dev->mtu) { + if (skb_is_gso(skb) || skb->len + mtu_reserved <= mtu) { nf_bridge_info_free(skb); return br_dev_queue_push_xmit(net, sk, skb); } - nf_bridge = nf_bridge_info_get(skb); - /* This is wrong! We should preserve the original fragment * boundaries by preserving frag_list rather than refragmenting. */ @@ -845,8 +832,10 @@ static unsigned int ip_sabotage_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - if (skb->nf_bridge && !skb->nf_bridge->in_prerouting) - return NF_STOP; + if (skb->nf_bridge && !skb->nf_bridge->in_prerouting) { + state->okfn(state->net, state->sk, skb); + return NF_STOLEN; + } return NF_ACCEPT; } @@ -906,12 +895,6 @@ static struct nf_hook_ops br_nf_ops[] __read_mostly = { .priority = NF_BR_PRI_BRNF, }, { - .hook = br_nf_local_in, - .pf = NFPROTO_BRIDGE, - .hooknum = NF_BR_LOCAL_IN, - .priority = NF_BR_PRI_BRNF, - }, - { .hook = br_nf_forward_ip, .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_FORWARD, @@ -1006,20 +989,20 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net, struct nf_hook_state state; int ret; - elem = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]); - - while (elem && (elem->ops.priority <= NF_BR_PRI_BRNF)) - elem = rcu_dereference(elem->next); + for (elem = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]); + elem && nf_hook_entry_priority(elem) <= NF_BR_PRI_BRNF; + elem = rcu_dereference(elem->next)) + ; if (!elem) return okfn(net, sk, skb); /* We may already have this, but read-locks nest anyway */ rcu_read_lock(); - nf_hook_state_init(&state, elem, hook, NF_BR_PRI_BRNF + 1, - NFPROTO_BRIDGE, indev, outdev, sk, net, okfn); + nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev, + sk, net, okfn); - ret = nf_hook_slow(skb, &state); + ret = nf_hook_slow(skb, &state, elem); rcu_read_unlock(); if (ret == 1) ret = okfn(net, sk, skb); diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index 5989661c659f..96c072e71ea2 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -38,7 +38,7 @@ #include <net/route.h> #include <net/netfilter/br_netfilter.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include "br_private.h" #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index e99037c6f7b7..225ef7d53701 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -20,6 +20,7 @@ #include "br_private.h" #include "br_private_stp.h" +#include "br_private_tunnel.h" static int __get_num_vlan_infos(struct net_bridge_vlan_group *vg, u32 filter_mask) @@ -95,9 +96,10 @@ static size_t br_get_link_af_size_filtered(const struct net_device *dev, u32 filter_mask) { struct net_bridge_vlan_group *vg = NULL; - struct net_bridge_port *p; + struct net_bridge_port *p = NULL; struct net_bridge *br; int num_vlan_infos; + size_t vinfo_sz = 0; rcu_read_lock(); if (br_port_exists(dev)) { @@ -110,8 +112,13 @@ static size_t br_get_link_af_size_filtered(const struct net_device *dev, num_vlan_infos = br_get_num_vlan_infos(vg, filter_mask); rcu_read_unlock(); + if (p && (p->flags & BR_VLAN_TUNNEL)) + vinfo_sz += br_get_vlan_tunnel_info_size(vg); + /* Each VLAN is returned in bridge_vlan_info along with flags */ - return num_vlan_infos * nla_total_size(sizeof(struct bridge_vlan_info)); + vinfo_sz += num_vlan_infos * nla_total_size(sizeof(struct bridge_vlan_info)); + + return vinfo_sz; } static inline size_t br_port_info_size(void) @@ -123,10 +130,12 @@ static inline size_t br_port_info_size(void) + nla_total_size(1) /* IFLA_BRPORT_GUARD */ + nla_total_size(1) /* IFLA_BRPORT_PROTECT */ + nla_total_size(1) /* IFLA_BRPORT_FAST_LEAVE */ + + nla_total_size(1) /* IFLA_BRPORT_MCAST_TO_UCAST */ + nla_total_size(1) /* IFLA_BRPORT_LEARNING */ + nla_total_size(1) /* IFLA_BRPORT_UNICAST_FLOOD */ + nla_total_size(1) /* IFLA_BRPORT_PROXYARP */ + nla_total_size(1) /* IFLA_BRPORT_PROXYARP_WIFI */ + + nla_total_size(1) /* IFLA_BRPORT_VLAN_TUNNEL */ + nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_ROOT_ID */ + nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_BRIDGE_ID */ + nla_total_size(sizeof(u16)) /* IFLA_BRPORT_DESIGNATED_PORT */ @@ -173,6 +182,8 @@ static int br_port_fill_attrs(struct sk_buff *skb, !!(p->flags & BR_ROOT_BLOCK)) || nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE, !!(p->flags & BR_MULTICAST_FAST_LEAVE)) || + nla_put_u8(skb, IFLA_BRPORT_MCAST_TO_UCAST, + !!(p->flags & BR_MULTICAST_TO_UNICAST)) || nla_put_u8(skb, IFLA_BRPORT_LEARNING, !!(p->flags & BR_LEARNING)) || nla_put_u8(skb, IFLA_BRPORT_UNICAST_FLOOD, !!(p->flags & BR_FLOOD)) || @@ -191,7 +202,9 @@ static int br_port_fill_attrs(struct sk_buff *skb, nla_put_u16(skb, IFLA_BRPORT_NO, p->port_no) || nla_put_u8(skb, IFLA_BRPORT_TOPOLOGY_CHANGE_ACK, p->topology_change_ack) || - nla_put_u8(skb, IFLA_BRPORT_CONFIG_PENDING, p->config_pending)) + nla_put_u8(skb, IFLA_BRPORT_CONFIG_PENDING, p->config_pending) || + nla_put_u8(skb, IFLA_BRPORT_VLAN_TUNNEL, !!(p->flags & + BR_VLAN_TUNNEL))) return -EMSGSIZE; timerval = br_timer_value(&p->message_age_timer); @@ -414,6 +427,9 @@ static int br_fill_ifinfo(struct sk_buff *skb, err = br_fill_ifvlaninfo_compressed(skb, vg); else err = br_fill_ifvlaninfo(skb, vg); + + if (port && (port->flags & BR_VLAN_TUNNEL)) + err = br_fill_vlan_tunnel_info(skb, vg); rcu_read_unlock(); if (err) goto nla_put_failure; @@ -514,60 +530,88 @@ static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p, return err; } +static int br_process_vlan_info(struct net_bridge *br, + struct net_bridge_port *p, int cmd, + struct bridge_vlan_info *vinfo_curr, + struct bridge_vlan_info **vinfo_last) +{ + if (!vinfo_curr->vid || vinfo_curr->vid >= VLAN_VID_MASK) + return -EINVAL; + + if (vinfo_curr->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) { + /* check if we are already processing a range */ + if (*vinfo_last) + return -EINVAL; + *vinfo_last = vinfo_curr; + /* don't allow range of pvids */ + if ((*vinfo_last)->flags & BRIDGE_VLAN_INFO_PVID) + return -EINVAL; + return 0; + } + + if (*vinfo_last) { + struct bridge_vlan_info tmp_vinfo; + int v, err; + + if (!(vinfo_curr->flags & BRIDGE_VLAN_INFO_RANGE_END)) + return -EINVAL; + + if (vinfo_curr->vid <= (*vinfo_last)->vid) + return -EINVAL; + + memcpy(&tmp_vinfo, *vinfo_last, + sizeof(struct bridge_vlan_info)); + for (v = (*vinfo_last)->vid; v <= vinfo_curr->vid; v++) { + tmp_vinfo.vid = v; + err = br_vlan_info(br, p, cmd, &tmp_vinfo); + if (err) + break; + } + *vinfo_last = NULL; + + return 0; + } + + return br_vlan_info(br, p, cmd, vinfo_curr); +} + static int br_afspec(struct net_bridge *br, struct net_bridge_port *p, struct nlattr *af_spec, int cmd) { - struct bridge_vlan_info *vinfo_start = NULL; - struct bridge_vlan_info *vinfo = NULL; + struct bridge_vlan_info *vinfo_curr = NULL; + struct bridge_vlan_info *vinfo_last = NULL; struct nlattr *attr; - int err = 0; - int rem; + struct vtunnel_info tinfo_last = {}; + struct vtunnel_info tinfo_curr = {}; + int err = 0, rem; nla_for_each_nested(attr, af_spec, rem) { - if (nla_type(attr) != IFLA_BRIDGE_VLAN_INFO) - continue; - if (nla_len(attr) != sizeof(struct bridge_vlan_info)) - return -EINVAL; - vinfo = nla_data(attr); - if (!vinfo->vid || vinfo->vid >= VLAN_VID_MASK) - return -EINVAL; - if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) { - if (vinfo_start) - return -EINVAL; - vinfo_start = vinfo; - /* don't allow range of pvids */ - if (vinfo_start->flags & BRIDGE_VLAN_INFO_PVID) + err = 0; + switch (nla_type(attr)) { + case IFLA_BRIDGE_VLAN_TUNNEL_INFO: + if (!(p->flags & BR_VLAN_TUNNEL)) return -EINVAL; - continue; - } - - if (vinfo_start) { - struct bridge_vlan_info tmp_vinfo; - int v; - - if (!(vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END)) - return -EINVAL; - - if (vinfo->vid <= vinfo_start->vid) + err = br_parse_vlan_tunnel_info(attr, &tinfo_curr); + if (err) + return err; + err = br_process_vlan_tunnel_info(br, p, cmd, + &tinfo_curr, + &tinfo_last); + if (err) + return err; + break; + case IFLA_BRIDGE_VLAN_INFO: + if (nla_len(attr) != sizeof(struct bridge_vlan_info)) return -EINVAL; - - memcpy(&tmp_vinfo, vinfo_start, - sizeof(struct bridge_vlan_info)); - - for (v = vinfo_start->vid; v <= vinfo->vid; v++) { - tmp_vinfo.vid = v; - err = br_vlan_info(br, p, cmd, &tmp_vinfo); - if (err) - break; - } - vinfo_start = NULL; - } else { - err = br_vlan_info(br, p, cmd, vinfo); - } - if (err) + vinfo_curr = nla_data(attr); + err = br_process_vlan_info(br, p, cmd, vinfo_curr, + &vinfo_last); + if (err) + return err; break; + } } return err; @@ -586,6 +630,7 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_PROXYARP] = { .type = NLA_U8 }, [IFLA_BRPORT_PROXYARP_WIFI] = { .type = NLA_U8 }, [IFLA_BRPORT_MULTICAST_ROUTER] = { .type = NLA_U8 }, + [IFLA_BRPORT_MCAST_TO_UCAST] = { .type = NLA_U8 }, }; /* Change the state of the port and notify spanning tree */ @@ -626,8 +671,9 @@ static void br_set_port_flag(struct net_bridge_port *p, struct nlattr *tb[], /* Process bridge protocol info on port */ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) { - int err; unsigned long old_flags = p->flags; + bool br_vlan_tunnel_old = false; + int err; br_set_port_flag(p, tb, IFLA_BRPORT_MODE, BR_HAIRPIN_MODE); br_set_port_flag(p, tb, IFLA_BRPORT_GUARD, BR_BPDU_GUARD); @@ -636,9 +682,15 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) br_set_port_flag(p, tb, IFLA_BRPORT_LEARNING, BR_LEARNING); br_set_port_flag(p, tb, IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD); br_set_port_flag(p, tb, IFLA_BRPORT_MCAST_FLOOD, BR_MCAST_FLOOD); + br_set_port_flag(p, tb, IFLA_BRPORT_MCAST_TO_UCAST, BR_MULTICAST_TO_UNICAST); br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP, BR_PROXYARP); br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP_WIFI, BR_PROXYARP_WIFI); + br_vlan_tunnel_old = (p->flags & BR_VLAN_TUNNEL) ? true : false; + br_set_port_flag(p, tb, IFLA_BRPORT_VLAN_TUNNEL, BR_VLAN_TUNNEL); + if (br_vlan_tunnel_old && !(p->flags & BR_VLAN_TUNNEL)) + nbp_vlan_tunnel_info_flush(p); + if (tb[IFLA_BRPORT_COST]) { err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST])); if (err) @@ -781,20 +833,6 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[]) return 0; } -static int br_dev_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[]) -{ - struct net_bridge *br = netdev_priv(dev); - - if (tb[IFLA_ADDRESS]) { - spin_lock_bh(&br->lock); - br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS])); - spin_unlock_bh(&br->lock); - } - - return register_netdevice(dev); -} - static int br_port_slave_changelink(struct net_device *brdev, struct net_device *dev, struct nlattr *tb[], @@ -858,6 +896,8 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = { [IFLA_BR_VLAN_DEFAULT_PVID] = { .type = NLA_U16 }, [IFLA_BR_VLAN_STATS_ENABLED] = { .type = NLA_U8 }, [IFLA_BR_MCAST_STATS_ENABLED] = { .type = NLA_U8 }, + [IFLA_BR_MCAST_IGMP_VERSION] = { .type = NLA_U8 }, + [IFLA_BR_MCAST_MLD_VERSION] = { .type = NLA_U8 }, }; static int br_changelink(struct net_device *brdev, struct nlattr *tb[], @@ -1069,6 +1109,26 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], mcast_stats = nla_get_u8(data[IFLA_BR_MCAST_STATS_ENABLED]); br->multicast_stats_enabled = !!mcast_stats; } + + if (data[IFLA_BR_MCAST_IGMP_VERSION]) { + __u8 igmp_version; + + igmp_version = nla_get_u8(data[IFLA_BR_MCAST_IGMP_VERSION]); + err = br_multicast_set_igmp_version(br, igmp_version); + if (err) + return err; + } + +#if IS_ENABLED(CONFIG_IPV6) + if (data[IFLA_BR_MCAST_MLD_VERSION]) { + __u8 mld_version; + + mld_version = nla_get_u8(data[IFLA_BR_MCAST_MLD_VERSION]); + err = br_multicast_set_mld_version(br, mld_version); + if (err) + return err; + } +#endif #endif #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) if (data[IFLA_BR_NF_CALL_IPTABLES]) { @@ -1093,6 +1153,28 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], return 0; } +static int br_dev_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct net_bridge *br = netdev_priv(dev); + int err; + + if (tb[IFLA_ADDRESS]) { + spin_lock_bh(&br->lock); + br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS])); + spin_unlock_bh(&br->lock); + } + + err = register_netdevice(dev); + if (err) + return err; + + err = br_changelink(dev, tb, data); + if (err) + unregister_netdevice(dev); + return err; +} + static size_t br_get_size(const struct net_device *brdev) { return nla_total_size(sizeof(u32)) + /* IFLA_BR_FORWARD_DELAY */ @@ -1135,6 +1217,8 @@ static size_t br_get_size(const struct net_device *brdev) nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_MCAST_QUERY_INTVL */ nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_MCAST_QUERY_RESPONSE_INTVL */ nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_MCAST_STARTUP_QUERY_INTVL */ + nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_IGMP_VERSION */ + nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_MLD_VERSION */ #endif #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) nla_total_size(sizeof(u8)) + /* IFLA_BR_NF_CALL_IPTABLES */ @@ -1166,7 +1250,7 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) if (nla_put_u64_64bit(skb, IFLA_BR_TOPOLOGY_CHANGE_TIMER, clockval, IFLA_BR_PAD)) return -EMSGSIZE; - clockval = br_timer_value(&br->gc_timer); + clockval = br_timer_value(&br->gc_work.timer); if (nla_put_u64_64bit(skb, IFLA_BR_GC_TIMER, clockval, IFLA_BR_PAD)) return -EMSGSIZE; @@ -1210,9 +1294,15 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) nla_put_u32(skb, IFLA_BR_MCAST_LAST_MEMBER_CNT, br->multicast_last_member_count) || nla_put_u32(skb, IFLA_BR_MCAST_STARTUP_QUERY_CNT, - br->multicast_startup_query_count)) + br->multicast_startup_query_count) || + nla_put_u8(skb, IFLA_BR_MCAST_IGMP_VERSION, + br->multicast_igmp_version)) return -EMSGSIZE; - +#if IS_ENABLED(CONFIG_IPV6) + if (nla_put_u8(skb, IFLA_BR_MCAST_MLD_VERSION, + br->multicast_mld_version)) + return -EMSGSIZE; +#endif clockval = jiffies_to_clock_t(br->multicast_last_member_interval); if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_LAST_MEMBER_INTVL, clockval, IFLA_BR_PAD)) diff --git a/net/bridge/br_netlink_tunnel.c b/net/bridge/br_netlink_tunnel.c new file mode 100644 index 000000000000..c913491495ab --- /dev/null +++ b/net/bridge/br_netlink_tunnel.c @@ -0,0 +1,294 @@ +/* + * Bridge per vlan tunnel port dst_metadata netlink control interface + * + * Authors: + * Roopa Prabhu <roopa@cumulusnetworks.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/etherdevice.h> +#include <net/rtnetlink.h> +#include <net/net_namespace.h> +#include <net/sock.h> +#include <uapi/linux/if_bridge.h> +#include <net/dst_metadata.h> + +#include "br_private.h" +#include "br_private_tunnel.h" + +static size_t __get_vlan_tinfo_size(void) +{ + return nla_total_size(0) + /* nest IFLA_BRIDGE_VLAN_TUNNEL_INFO */ + nla_total_size(sizeof(u32)) + /* IFLA_BRIDGE_VLAN_TUNNEL_ID */ + nla_total_size(sizeof(u16)) + /* IFLA_BRIDGE_VLAN_TUNNEL_VID */ + nla_total_size(sizeof(u16)); /* IFLA_BRIDGE_VLAN_TUNNEL_FLAGS */ +} + +static bool vlan_tunid_inrange(struct net_bridge_vlan *v_curr, + struct net_bridge_vlan *v_last) +{ + __be32 tunid_curr = tunnel_id_to_key32(v_curr->tinfo.tunnel_id); + __be32 tunid_last = tunnel_id_to_key32(v_last->tinfo.tunnel_id); + + return (be32_to_cpu(tunid_curr) - be32_to_cpu(tunid_last)) == 1; +} + +static int __get_num_vlan_tunnel_infos(struct net_bridge_vlan_group *vg) +{ + struct net_bridge_vlan *v, *vtbegin = NULL, *vtend = NULL; + int num_tinfos = 0; + + /* Count number of vlan infos */ + list_for_each_entry_rcu(v, &vg->vlan_list, vlist) { + /* only a context, bridge vlan not activated */ + if (!br_vlan_should_use(v) || !v->tinfo.tunnel_id) + continue; + + if (!vtbegin) { + goto initvars; + } else if ((v->vid - vtend->vid) == 1 && + vlan_tunid_inrange(v, vtend)) { + vtend = v; + continue; + } else { + if ((vtend->vid - vtbegin->vid) > 0) + num_tinfos += 2; + else + num_tinfos += 1; + } +initvars: + vtbegin = v; + vtend = v; + } + + if (vtbegin && vtend) { + if ((vtend->vid - vtbegin->vid) > 0) + num_tinfos += 2; + else + num_tinfos += 1; + } + + return num_tinfos; +} + +int br_get_vlan_tunnel_info_size(struct net_bridge_vlan_group *vg) +{ + int num_tinfos; + + if (!vg) + return 0; + + rcu_read_lock(); + num_tinfos = __get_num_vlan_tunnel_infos(vg); + rcu_read_unlock(); + + return num_tinfos * __get_vlan_tinfo_size(); +} + +static int br_fill_vlan_tinfo(struct sk_buff *skb, u16 vid, + __be64 tunnel_id, u16 flags) +{ + __be32 tid = tunnel_id_to_key32(tunnel_id); + struct nlattr *tmap; + + tmap = nla_nest_start(skb, IFLA_BRIDGE_VLAN_TUNNEL_INFO); + if (!tmap) + return -EMSGSIZE; + if (nla_put_u32(skb, IFLA_BRIDGE_VLAN_TUNNEL_ID, + be32_to_cpu(tid))) + goto nla_put_failure; + if (nla_put_u16(skb, IFLA_BRIDGE_VLAN_TUNNEL_VID, + vid)) + goto nla_put_failure; + if (nla_put_u16(skb, IFLA_BRIDGE_VLAN_TUNNEL_FLAGS, + flags)) + goto nla_put_failure; + nla_nest_end(skb, tmap); + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, tmap); + + return -EMSGSIZE; +} + +static int br_fill_vlan_tinfo_range(struct sk_buff *skb, + struct net_bridge_vlan *vtbegin, + struct net_bridge_vlan *vtend) +{ + int err; + + if (vtend && (vtend->vid - vtbegin->vid) > 0) { + /* add range to skb */ + err = br_fill_vlan_tinfo(skb, vtbegin->vid, + vtbegin->tinfo.tunnel_id, + BRIDGE_VLAN_INFO_RANGE_BEGIN); + if (err) + return err; + + err = br_fill_vlan_tinfo(skb, vtend->vid, + vtend->tinfo.tunnel_id, + BRIDGE_VLAN_INFO_RANGE_END); + if (err) + return err; + } else { + err = br_fill_vlan_tinfo(skb, vtbegin->vid, + vtbegin->tinfo.tunnel_id, + 0); + if (err) + return err; + } + + return 0; +} + +int br_fill_vlan_tunnel_info(struct sk_buff *skb, + struct net_bridge_vlan_group *vg) +{ + struct net_bridge_vlan *vtbegin = NULL; + struct net_bridge_vlan *vtend = NULL; + struct net_bridge_vlan *v; + int err; + + /* Count number of vlan infos */ + list_for_each_entry_rcu(v, &vg->vlan_list, vlist) { + /* only a context, bridge vlan not activated */ + if (!br_vlan_should_use(v)) + continue; + + if (!v->tinfo.tunnel_dst) + continue; + + if (!vtbegin) { + goto initvars; + } else if ((v->vid - vtend->vid) == 1 && + vlan_tunid_inrange(v, vtend)) { + vtend = v; + continue; + } else { + err = br_fill_vlan_tinfo_range(skb, vtbegin, vtend); + if (err) + return err; + } +initvars: + vtbegin = v; + vtend = v; + } + + if (vtbegin) { + err = br_fill_vlan_tinfo_range(skb, vtbegin, vtend); + if (err) + return err; + } + + return 0; +} + +static const struct nla_policy vlan_tunnel_policy[IFLA_BRIDGE_VLAN_TUNNEL_MAX + 1] = { + [IFLA_BRIDGE_VLAN_TUNNEL_ID] = { .type = NLA_U32 }, + [IFLA_BRIDGE_VLAN_TUNNEL_VID] = { .type = NLA_U16 }, + [IFLA_BRIDGE_VLAN_TUNNEL_FLAGS] = { .type = NLA_U16 }, +}; + +static int br_vlan_tunnel_info(struct net_bridge_port *p, int cmd, + u16 vid, u32 tun_id) +{ + int err = 0; + + if (!p) + return -EINVAL; + + switch (cmd) { + case RTM_SETLINK: + err = nbp_vlan_tunnel_info_add(p, vid, tun_id); + break; + case RTM_DELLINK: + nbp_vlan_tunnel_info_delete(p, vid); + break; + } + + return err; +} + +int br_parse_vlan_tunnel_info(struct nlattr *attr, + struct vtunnel_info *tinfo) +{ + struct nlattr *tb[IFLA_BRIDGE_VLAN_TUNNEL_MAX + 1]; + u32 tun_id; + u16 vid, flags = 0; + int err; + + memset(tinfo, 0, sizeof(*tinfo)); + + err = nla_parse_nested(tb, IFLA_BRIDGE_VLAN_TUNNEL_MAX, + attr, vlan_tunnel_policy); + if (err < 0) + return err; + + if (!tb[IFLA_BRIDGE_VLAN_TUNNEL_ID] || + !tb[IFLA_BRIDGE_VLAN_TUNNEL_VID]) + return -EINVAL; + + tun_id = nla_get_u32(tb[IFLA_BRIDGE_VLAN_TUNNEL_ID]); + vid = nla_get_u16(tb[IFLA_BRIDGE_VLAN_TUNNEL_VID]); + if (vid >= VLAN_VID_MASK) + return -ERANGE; + + if (tb[IFLA_BRIDGE_VLAN_TUNNEL_FLAGS]) + flags = nla_get_u16(tb[IFLA_BRIDGE_VLAN_TUNNEL_FLAGS]); + + tinfo->tunid = tun_id; + tinfo->vid = vid; + tinfo->flags = flags; + + return 0; +} + +int br_process_vlan_tunnel_info(struct net_bridge *br, + struct net_bridge_port *p, int cmd, + struct vtunnel_info *tinfo_curr, + struct vtunnel_info *tinfo_last) +{ + int err; + + if (tinfo_curr->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) { + if (tinfo_last->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) + return -EINVAL; + memcpy(tinfo_last, tinfo_curr, sizeof(struct vtunnel_info)); + } else if (tinfo_curr->flags & BRIDGE_VLAN_INFO_RANGE_END) { + int t, v; + + if (!(tinfo_last->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN)) + return -EINVAL; + if ((tinfo_curr->vid - tinfo_last->vid) != + (tinfo_curr->tunid - tinfo_last->tunid)) + return -EINVAL; + t = tinfo_last->tunid; + for (v = tinfo_last->vid; v <= tinfo_curr->vid; v++) { + err = br_vlan_tunnel_info(p, cmd, v, t); + if (err) + return err; + t++; + } + memset(tinfo_last, 0, sizeof(struct vtunnel_info)); + memset(tinfo_curr, 0, sizeof(struct vtunnel_info)); + } else { + if (tinfo_last->flags) + return -EINVAL; + err = br_vlan_tunnel_info(p, cmd, tinfo_curr->vid, + tinfo_curr->tunid); + if (err) + return err; + memset(tinfo_last, 0, sizeof(struct vtunnel_info)); + memset(tinfo_curr, 0, sizeof(struct vtunnel_info)); + } + + return 0; +} diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 1b63177e0ccd..0d177280aa84 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -91,6 +91,11 @@ struct br_vlan_stats { struct u64_stats_sync syncp; }; +struct br_tunnel_info { + __be64 tunnel_id; + struct metadata_dst *tunnel_dst; +}; + /** * struct net_bridge_vlan - per-vlan entry * @@ -113,6 +118,7 @@ struct br_vlan_stats { */ struct net_bridge_vlan { struct rhash_head vnode; + struct rhash_head tnode; u16 vid; u16 flags; struct br_vlan_stats __percpu *stats; @@ -124,6 +130,9 @@ struct net_bridge_vlan { atomic_t refcnt; struct net_bridge_vlan *brvlan; }; + + struct br_tunnel_info tinfo; + struct list_head vlist; struct rcu_head rcu; @@ -145,24 +154,27 @@ struct net_bridge_vlan { */ struct net_bridge_vlan_group { struct rhashtable vlan_hash; + struct rhashtable tunnel_hash; struct list_head vlan_list; u16 num_vlans; u16 pvid; }; -struct net_bridge_fdb_entry -{ +struct net_bridge_fdb_entry { struct hlist_node hlist; struct net_bridge_port *dst; - unsigned long updated; - unsigned long used; mac_addr addr; __u16 vlan_id; unsigned char is_local:1, is_static:1, added_by_user:1, added_by_external_learn:1; + + /* write-heavy members should not affect lookups */ + unsigned long updated ____cacheline_aligned_in_smp; + unsigned long used; + struct rcu_head rcu; }; @@ -177,6 +189,7 @@ struct net_bridge_port_group { struct timer_list timer; struct br_ip addr; unsigned char flags; + unsigned char eth_addr[ETH_ALEN]; }; struct net_bridge_mdb_entry @@ -201,12 +214,16 @@ struct net_bridge_mdb_htable u32 ver; }; -struct net_bridge_port -{ +struct net_bridge_port { struct net_bridge *br; struct net_device *dev; struct list_head list; + unsigned long flags; +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + struct net_bridge_vlan_group __rcu *vlgrp; +#endif + /* STP */ u8 priority; u8 state; @@ -227,8 +244,6 @@ struct net_bridge_port struct kobject kobj; struct rcu_head rcu; - unsigned long flags; - #ifdef CONFIG_BRIDGE_IGMP_SNOOPING struct bridge_mcast_own_query ip4_own_query; #if IS_ENABLED(CONFIG_IPV6) @@ -248,9 +263,6 @@ struct net_bridge_port #ifdef CONFIG_NET_POLL_CONTROLLER struct netpoll *np; #endif -#ifdef CONFIG_BRIDGE_VLAN_FILTERING - struct net_bridge_vlan_group __rcu *vlgrp; -#endif #ifdef CONFIG_NET_SWITCHDEV int offload_fwd_mark; #endif @@ -272,14 +284,21 @@ static inline struct net_bridge_port *br_port_get_rtnl(const struct net_device * rtnl_dereference(dev->rx_handler_data) : NULL; } -struct net_bridge -{ +struct net_bridge { spinlock_t lock; + spinlock_t hash_lock; struct list_head port_list; struct net_device *dev; - struct pcpu_sw_netstats __percpu *stats; - spinlock_t hash_lock; + /* These fields are accessed on each packet */ +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + u8 vlan_enabled; + u8 vlan_stats_enabled; + __be16 vlan_proto; + u16 default_pvid; + struct net_bridge_vlan_group __rcu *vlgrp; +#endif + struct hlist_head hash[BR_HASH_SIZE]; #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) union { @@ -297,17 +316,20 @@ struct net_bridge bridge_id designated_root; bridge_id bridge_id; u32 root_path_cost; + unsigned char topology_change; + unsigned char topology_change_detected; + u16 root_port; unsigned long max_age; unsigned long hello_time; unsigned long forward_delay; - unsigned long bridge_max_age; unsigned long ageing_time; + unsigned long bridge_max_age; unsigned long bridge_hello_time; unsigned long bridge_forward_delay; + unsigned long bridge_ageing_time; u8 group_addr[ETH_ALEN]; bool group_addr_set; - u16 root_port; enum { BR_NO_STP, /* no spanning tree */ @@ -315,9 +337,6 @@ struct net_bridge BR_USER_STP, /* new RSTP in userspace */ } stp_enabled; - unsigned char topology_change; - unsigned char topology_change_detected; - #ifdef CONFIG_BRIDGE_IGMP_SNOOPING unsigned char multicast_router; @@ -333,6 +352,8 @@ struct net_bridge u32 multicast_last_member_count; u32 multicast_startup_query_count; + u8 multicast_igmp_version; + unsigned long multicast_last_member_interval; unsigned long multicast_membership_interval; unsigned long multicast_querier_interval; @@ -353,27 +374,20 @@ struct net_bridge struct bridge_mcast_other_query ip6_other_query; struct bridge_mcast_own_query ip6_own_query; struct bridge_mcast_querier ip6_querier; + u8 multicast_mld_version; #endif /* IS_ENABLED(CONFIG_IPV6) */ #endif struct timer_list hello_timer; struct timer_list tcn_timer; struct timer_list topology_change_timer; - struct timer_list gc_timer; + struct delayed_work gc_work; struct kobject *ifobj; u32 auto_cnt; #ifdef CONFIG_NET_SWITCHDEV int offload_fwd_mark; #endif - -#ifdef CONFIG_BRIDGE_VLAN_FILTERING - struct net_bridge_vlan_group __rcu *vlgrp; - u8 vlan_enabled; - u8 vlan_stats_enabled; - __be16 vlan_proto; - u16 default_pvid; -#endif }; struct br_input_skb_cb { @@ -490,11 +504,12 @@ void br_fdb_find_delete_local(struct net_bridge *br, const unsigned char *addr, u16 vid); void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr); void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr); -void br_fdb_cleanup(unsigned long arg); +void br_fdb_cleanup(struct work_struct *work); void br_fdb_delete_by_port(struct net_bridge *br, const struct net_bridge_port *p, u16 vid, int do_all); -struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br, - const unsigned char *addr, __u16 vid); +struct net_bridge_fdb_entry *br_fdb_find_rcu(struct net_bridge *br, + const unsigned char *addr, + __u16 vid); int br_fdb_test_addr(struct net_device *dev, unsigned char *addr); int br_fdb_fillbuf(struct net_bridge *br, void *buf, unsigned long count, unsigned long off); @@ -582,6 +597,10 @@ int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val); int br_multicast_toggle(struct net_bridge *br, unsigned long val); int br_multicast_set_querier(struct net_bridge *br, unsigned long val); int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val); +int br_multicast_set_igmp_version(struct net_bridge *br, unsigned long val); +#if IS_ENABLED(CONFIG_IPV6) +int br_multicast_set_mld_version(struct net_bridge *br, unsigned long val); +#endif struct net_bridge_mdb_entry * br_mdb_ip_get(struct net_bridge_mdb_htable *mdb, struct br_ip *dst); struct net_bridge_mdb_entry * @@ -591,7 +610,7 @@ void br_multicast_free_pg(struct rcu_head *head); struct net_bridge_port_group * br_multicast_new_port_group(struct net_bridge_port *port, struct br_ip *group, struct net_bridge_port_group __rcu *next, - unsigned char flags); + unsigned char flags, const unsigned char *src); void br_mdb_init(void); void br_mdb_uninit(void); void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, @@ -601,6 +620,7 @@ void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port, void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p, const struct sk_buff *skb, u8 type, u8 dir); int br_multicast_init_stats(struct net_bridge *br); +void br_multicast_uninit_stats(struct net_bridge *br); void br_multicast_get_stats(const struct net_bridge *br, const struct net_bridge_port *p, struct br_mcast_stats *dest); @@ -741,6 +761,10 @@ static inline int br_multicast_init_stats(struct net_bridge *br) return 0; } +static inline void br_multicast_uninit_stats(struct net_bridge *br) +{ +} + static inline int br_multicast_igmp_type(const struct sk_buff *skb) { return 0; @@ -756,6 +780,7 @@ bool br_allowed_egress(struct net_bridge_vlan_group *vg, const struct sk_buff *skb); bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid); struct sk_buff *br_handle_vlan(struct net_bridge *br, + const struct net_bridge_port *port, struct net_bridge_vlan_group *vg, struct sk_buff *skb); int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags); @@ -855,6 +880,7 @@ static inline bool br_should_learn(struct net_bridge_port *p, } static inline struct sk_buff *br_handle_vlan(struct net_bridge *br, + const struct net_bridge_port *port, struct net_bridge_vlan_group *vg, struct sk_buff *skb) { @@ -992,6 +1018,7 @@ void __br_set_forward_delay(struct net_bridge *br, unsigned long t); int br_set_forward_delay(struct net_bridge *br, unsigned long x); int br_set_hello_time(struct net_bridge *br, unsigned long x); int br_set_max_age(struct net_bridge *br, unsigned long x); +int __set_ageing_time(struct net_device *dev, unsigned long t); int br_set_ageing_time(struct net_bridge *br, clock_t ageing_time); diff --git a/net/bridge/br_private_stp.h b/net/bridge/br_private_stp.h index 2fe910c4e170..3f7543a29b76 100644 --- a/net/bridge/br_private_stp.h +++ b/net/bridge/br_private_stp.h @@ -61,6 +61,7 @@ void br_received_tcn_bpdu(struct net_bridge_port *p); void br_transmit_config(struct net_bridge_port *p); void br_transmit_tcn(struct net_bridge *br); void br_topology_change_detection(struct net_bridge *br); +void __br_set_topology_change(struct net_bridge *br, unsigned char val); /* br_stp_bpdu.c */ void br_send_config_bpdu(struct net_bridge_port *, struct br_config_bpdu *); diff --git a/net/bridge/br_private_tunnel.h b/net/bridge/br_private_tunnel.h new file mode 100644 index 000000000000..4a447a378ab3 --- /dev/null +++ b/net/bridge/br_private_tunnel.h @@ -0,0 +1,83 @@ +/* + * Bridge per vlan tunnels + * + * Authors: + * Roopa Prabhu <roopa@cumulusnetworks.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _BR_PRIVATE_TUNNEL_H +#define _BR_PRIVATE_TUNNEL_H + +struct vtunnel_info { + u32 tunid; + u16 vid; + u16 flags; +}; + +/* br_netlink_tunnel.c */ +int br_parse_vlan_tunnel_info(struct nlattr *attr, + struct vtunnel_info *tinfo); +int br_process_vlan_tunnel_info(struct net_bridge *br, + struct net_bridge_port *p, + int cmd, + struct vtunnel_info *tinfo_curr, + struct vtunnel_info *tinfo_last); +int br_get_vlan_tunnel_info_size(struct net_bridge_vlan_group *vg); +int br_fill_vlan_tunnel_info(struct sk_buff *skb, + struct net_bridge_vlan_group *vg); + +#ifdef CONFIG_BRIDGE_VLAN_FILTERING +/* br_vlan_tunnel.c */ +int vlan_tunnel_init(struct net_bridge_vlan_group *vg); +void vlan_tunnel_deinit(struct net_bridge_vlan_group *vg); +int nbp_vlan_tunnel_info_delete(struct net_bridge_port *port, u16 vid); +int nbp_vlan_tunnel_info_add(struct net_bridge_port *port, u16 vid, u32 tun_id); +void nbp_vlan_tunnel_info_flush(struct net_bridge_port *port); +void vlan_tunnel_info_del(struct net_bridge_vlan_group *vg, + struct net_bridge_vlan *vlan); +int br_handle_ingress_vlan_tunnel(struct sk_buff *skb, + struct net_bridge_port *p, + struct net_bridge_vlan_group *vg); +int br_handle_egress_vlan_tunnel(struct sk_buff *skb, + struct net_bridge_vlan *vlan); +#else +static inline int vlan_tunnel_init(struct net_bridge_vlan_group *vg) +{ + return 0; +} + +static inline int nbp_vlan_tunnel_info_delete(struct net_bridge_port *port, + u16 vid) +{ + return 0; +} + +static inline int nbp_vlan_tunnel_info_add(struct net_bridge_port *port, + u16 vid, u32 tun_id) +{ + return 0; +} + +static inline void nbp_vlan_tunnel_info_flush(struct net_bridge_port *port) +{ +} + +static inline void vlan_tunnel_info_del(struct net_bridge_vlan_group *vg, + struct net_bridge_vlan *vlan) +{ +} + +static inline int br_handle_ingress_vlan_tunnel(struct sk_buff *skb, + struct net_bridge_port *p, + struct net_bridge_vlan_group *vg) +{ + return 0; +} +#endif + +#endif diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index 9258b8ef14ff..8f56c2d1f1a7 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -234,7 +234,7 @@ static void br_record_config_timeout_values(struct net_bridge *br, br->max_age = bpdu->max_age; br->hello_time = bpdu->hello_time; br->forward_delay = bpdu->forward_delay; - br->topology_change = bpdu->topology_change; + __br_set_topology_change(br, bpdu->topology_change); } /* called under bridge lock */ @@ -344,7 +344,7 @@ void br_topology_change_detection(struct net_bridge *br) isroot ? "propagating" : "sending tcn bpdu"); if (isroot) { - br->topology_change = 1; + __br_set_topology_change(br, 1); mod_timer(&br->topology_change_timer, jiffies + br->bridge_forward_delay + br->bridge_max_age); } else if (!br->topology_change_detected) { @@ -562,6 +562,24 @@ int br_set_max_age(struct net_bridge *br, unsigned long val) } +/* called under bridge lock */ +int __set_ageing_time(struct net_device *dev, unsigned long t) +{ + struct switchdev_attr attr = { + .orig_dev = dev, + .id = SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME, + .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP | SWITCHDEV_F_DEFER, + .u.ageing_time = jiffies_to_clock_t(t), + }; + int err; + + err = switchdev_port_attr_set(dev, &attr); + if (err && err != -EOPNOTSUPP) + return err; + + return 0; +} + /* Set time interval that dynamic forwarding entries live * For pure software bridge, allow values outside the 802.1 * standard specification for special cases: @@ -572,25 +590,52 @@ int br_set_max_age(struct net_bridge *br, unsigned long val) */ int br_set_ageing_time(struct net_bridge *br, clock_t ageing_time) { - struct switchdev_attr attr = { - .orig_dev = br->dev, - .id = SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME, - .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP, - .u.ageing_time = ageing_time, - }; unsigned long t = clock_t_to_jiffies(ageing_time); int err; - err = switchdev_port_attr_set(br->dev, &attr); - if (err && err != -EOPNOTSUPP) + err = __set_ageing_time(br->dev, t); + if (err) return err; + spin_lock_bh(&br->lock); + br->bridge_ageing_time = t; br->ageing_time = t; - mod_timer(&br->gc_timer, jiffies); + spin_unlock_bh(&br->lock); + + mod_delayed_work(system_long_wq, &br->gc_work, 0); return 0; } +/* called under bridge lock */ +void __br_set_topology_change(struct net_bridge *br, unsigned char val) +{ + unsigned long t; + int err; + + if (br->stp_enabled == BR_KERNEL_STP && br->topology_change != val) { + /* On topology change, set the bridge ageing time to twice the + * forward delay. Otherwise, restore its default ageing time. + */ + + if (val) { + t = 2 * br->forward_delay; + br_debug(br, "decreasing ageing time to %lu\n", t); + } else { + t = br->bridge_ageing_time; + br_debug(br, "restoring ageing time to %lu\n", t); + } + + err = __set_ageing_time(br->dev, t); + if (err) + br_warn(br, "error offloading ageing time\n"); + else + br->ageing_time = t; + } + + br->topology_change = val; +} + void __br_set_forward_delay(struct net_bridge *br, unsigned long t) { br->bridge_forward_delay = t; diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index d8ad73b38de2..08341d2aa9c9 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -36,12 +36,6 @@ static inline port_id br_make_port_id(__u8 priority, __u16 port_no) /* called under bridge lock */ void br_init_port(struct net_bridge_port *p) { - struct switchdev_attr attr = { - .orig_dev = p->dev, - .id = SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME, - .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP | SWITCHDEV_F_DEFER, - .u.ageing_time = jiffies_to_clock_t(p->br->ageing_time), - }; int err; p->port_id = br_make_port_id(p->priority, p->port_no); @@ -50,9 +44,9 @@ void br_init_port(struct net_bridge_port *p) p->topology_change_ack = 0; p->config_pending = 0; - err = switchdev_port_attr_set(p->dev, &attr); - if (err && err != -EOPNOTSUPP) - netdev_err(p->dev, "failed to set HW ageing time\n"); + err = __set_ageing_time(p->dev, p->br->ageing_time); + if (err) + netdev_err(p->dev, "failed to offload ageing time\n"); } /* NO locks held */ @@ -63,7 +57,7 @@ void br_stp_enable_bridge(struct net_bridge *br) spin_lock_bh(&br->lock); if (br->stp_enabled == BR_KERNEL_STP) mod_timer(&br->hello_timer, jiffies + br->hello_time); - mod_timer(&br->gc_timer, jiffies + HZ/10); + mod_delayed_work(system_long_wq, &br->gc_work, HZ / 10); br_config_bpdu_generation(br); @@ -87,14 +81,14 @@ void br_stp_disable_bridge(struct net_bridge *br) } - br->topology_change = 0; + __br_set_topology_change(br, 0); br->topology_change_detected = 0; spin_unlock_bh(&br->lock); del_timer_sync(&br->hello_timer); del_timer_sync(&br->topology_change_timer); del_timer_sync(&br->tcn_timer); - del_timer_sync(&br->gc_timer); + cancel_delayed_work_sync(&br->gc_work); } /* called under bridge lock */ diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c index da058b85aa22..c98b3e5c140a 100644 --- a/net/bridge/br_stp_timer.c +++ b/net/bridge/br_stp_timer.c @@ -125,7 +125,7 @@ static void br_topology_change_timer_expired(unsigned long arg) br_debug(br, "topo change timer expired\n"); spin_lock(&br->lock); br->topology_change_detected = 0; - br->topology_change = 0; + __br_set_topology_change(br, 0); spin_unlock(&br->lock); } @@ -153,8 +153,6 @@ void br_stp_timer_init(struct net_bridge *br) setup_timer(&br->topology_change_timer, br_topology_change_timer_expired, (unsigned long) br); - - setup_timer(&br->gc_timer, br_fdb_cleanup, (unsigned long) br); } void br_stp_port_timer_init(struct net_bridge_port *p) diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index f88c4df3f91e..0b5dd607444c 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -19,6 +19,7 @@ #include <linux/rtnetlink.h> #include <linux/spinlock.h> #include <linux/times.h> +#include <linux/sched/signal.h> #include "br_private.h" @@ -263,7 +264,7 @@ static ssize_t gc_timer_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); - return sprintf(buf, "%ld\n", br_timer_value(&br->gc_timer)); + return sprintf(buf, "%ld\n", br_timer_value(&br->gc_work.timer)); } static DEVICE_ATTR_RO(gc_timer); @@ -440,6 +441,23 @@ static ssize_t hash_max_store(struct device *d, struct device_attribute *attr, } static DEVICE_ATTR_RW(hash_max); +static ssize_t multicast_igmp_version_show(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + + return sprintf(buf, "%u\n", br->multicast_igmp_version); +} + +static ssize_t multicast_igmp_version_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, br_multicast_set_igmp_version); +} +static DEVICE_ATTR_RW(multicast_igmp_version); + static ssize_t multicast_last_member_count_show(struct device *d, struct device_attribute *attr, char *buf) @@ -642,6 +660,25 @@ static ssize_t multicast_stats_enabled_store(struct device *d, return store_bridge_parm(d, buf, len, set_stats_enabled); } static DEVICE_ATTR_RW(multicast_stats_enabled); + +#if IS_ENABLED(CONFIG_IPV6) +static ssize_t multicast_mld_version_show(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + + return sprintf(buf, "%u\n", br->multicast_mld_version); +} + +static ssize_t multicast_mld_version_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, br_multicast_set_mld_version); +} +static DEVICE_ATTR_RW(multicast_mld_version); +#endif #endif #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) static ssize_t nf_call_iptables_show( @@ -809,6 +846,10 @@ static struct attribute *bridge_attrs[] = { &dev_attr_multicast_query_response_interval.attr, &dev_attr_multicast_startup_query_interval.attr, &dev_attr_multicast_stats_enabled.attr, + &dev_attr_multicast_igmp_version.attr, +#if IS_ENABLED(CONFIG_IPV6) + &dev_attr_multicast_mld_version.attr, +#endif #endif #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) &dev_attr_nf_call_iptables.attr, diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 8bd569695e76..79aee759aba5 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -17,6 +17,7 @@ #include <linux/if_bridge.h> #include <linux/rtnetlink.h> #include <linux/spinlock.h> +#include <linux/sched/signal.h> #include "br_private.h" @@ -188,6 +189,7 @@ static BRPORT_ATTR(multicast_router, S_IRUGO | S_IWUSR, show_multicast_router, store_multicast_router); BRPORT_ATTR_FLAG(multicast_fast_leave, BR_MULTICAST_FAST_LEAVE); +BRPORT_ATTR_FLAG(multicast_to_unicast, BR_MULTICAST_TO_UNICAST); #endif static const struct brport_attribute *brport_attrs[] = { @@ -214,6 +216,7 @@ static const struct brport_attribute *brport_attrs[] = { #ifdef CONFIG_BRIDGE_IGMP_SNOOPING &brport_attr_multicast_router, &brport_attr_multicast_fast_leave, + &brport_attr_multicast_to_unicast, #endif &brport_attr_proxyarp, &brport_attr_proxyarp_wifi, diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index b6de4f457161..b838213c408e 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -5,6 +5,7 @@ #include <net/switchdev.h> #include "br_private.h" +#include "br_private_tunnel.h" static inline int br_vlan_cmp(struct rhashtable_compare_arg *arg, const void *ptr) @@ -310,6 +311,7 @@ static int __vlan_del(struct net_bridge_vlan *v) } if (masterv != v) { + vlan_tunnel_info_del(vg, v); rhashtable_remove_fast(&vg->vlan_hash, &v->vnode, br_vlan_rht_params); __vlan_del_list(v); @@ -325,6 +327,7 @@ static void __vlan_group_free(struct net_bridge_vlan_group *vg) { WARN_ON(!list_empty(&vg->vlan_list)); rhashtable_destroy(&vg->vlan_hash); + vlan_tunnel_deinit(vg); kfree(vg); } @@ -338,6 +341,7 @@ static void __vlan_flush(struct net_bridge_vlan_group *vg) } struct sk_buff *br_handle_vlan(struct net_bridge *br, + const struct net_bridge_port *p, struct net_bridge_vlan_group *vg, struct sk_buff *skb) { @@ -378,6 +382,12 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br, if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED) skb->vlan_tci = 0; + + if (p && (p->flags & BR_VLAN_TUNNEL) && + br_handle_egress_vlan_tunnel(skb, v)) { + kfree_skb(skb); + return NULL; + } out: return skb; } @@ -613,6 +623,8 @@ int br_vlan_delete(struct net_bridge *br, u16 vid) br_fdb_find_delete_local(br, NULL, br->dev->dev_addr, vid); br_fdb_delete_by_port(br, NULL, vid, 0); + vlan_tunnel_info_del(vg, v); + return __vlan_del(v); } @@ -918,6 +930,9 @@ int br_vlan_init(struct net_bridge *br) ret = rhashtable_init(&vg->vlan_hash, &br_vlan_rht_params); if (ret) goto err_rhtbl; + ret = vlan_tunnel_init(vg); + if (ret) + goto err_tunnel_init; INIT_LIST_HEAD(&vg->vlan_list); br->vlan_proto = htons(ETH_P_8021Q); br->default_pvid = 1; @@ -932,6 +947,8 @@ out: return ret; err_vlan_add: + vlan_tunnel_deinit(vg); +err_tunnel_init: rhashtable_destroy(&vg->vlan_hash); err_rhtbl: kfree(vg); @@ -961,6 +978,9 @@ int nbp_vlan_init(struct net_bridge_port *p) ret = rhashtable_init(&vg->vlan_hash, &br_vlan_rht_params); if (ret) goto err_rhtbl; + ret = vlan_tunnel_init(vg); + if (ret) + goto err_tunnel_init; INIT_LIST_HEAD(&vg->vlan_list); rcu_assign_pointer(p->vlgrp, vg); if (p->br->default_pvid) { @@ -976,9 +996,11 @@ out: err_vlan_add: RCU_INIT_POINTER(p->vlgrp, NULL); synchronize_rcu(); + vlan_tunnel_deinit(vg); +err_tunnel_init: rhashtable_destroy(&vg->vlan_hash); -err_vlan_enabled: err_rhtbl: +err_vlan_enabled: kfree(vg); goto out; diff --git a/net/bridge/br_vlan_tunnel.c b/net/bridge/br_vlan_tunnel.c new file mode 100644 index 000000000000..6d2c4eed2dc8 --- /dev/null +++ b/net/bridge/br_vlan_tunnel.c @@ -0,0 +1,205 @@ +/* + * Bridge per vlan tunnel port dst_metadata handling code + * + * Authors: + * Roopa Prabhu <roopa@cumulusnetworks.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/rtnetlink.h> +#include <linux/slab.h> +#include <net/switchdev.h> +#include <net/dst_metadata.h> + +#include "br_private.h" +#include "br_private_tunnel.h" + +static inline int br_vlan_tunid_cmp(struct rhashtable_compare_arg *arg, + const void *ptr) +{ + const struct net_bridge_vlan *vle = ptr; + __be64 tunid = *(__be64 *)arg->key; + + return vle->tinfo.tunnel_id != tunid; +} + +static const struct rhashtable_params br_vlan_tunnel_rht_params = { + .head_offset = offsetof(struct net_bridge_vlan, tnode), + .key_offset = offsetof(struct net_bridge_vlan, tinfo.tunnel_id), + .key_len = sizeof(__be64), + .nelem_hint = 3, + .locks_mul = 1, + .obj_cmpfn = br_vlan_tunid_cmp, + .automatic_shrinking = true, +}; + +static struct net_bridge_vlan *br_vlan_tunnel_lookup(struct rhashtable *tbl, + u64 tunnel_id) +{ + return rhashtable_lookup_fast(tbl, &tunnel_id, + br_vlan_tunnel_rht_params); +} + +void vlan_tunnel_info_del(struct net_bridge_vlan_group *vg, + struct net_bridge_vlan *vlan) +{ + if (!vlan->tinfo.tunnel_dst) + return; + rhashtable_remove_fast(&vg->tunnel_hash, &vlan->tnode, + br_vlan_tunnel_rht_params); + vlan->tinfo.tunnel_id = 0; + dst_release(&vlan->tinfo.tunnel_dst->dst); + vlan->tinfo.tunnel_dst = NULL; +} + +static int __vlan_tunnel_info_add(struct net_bridge_vlan_group *vg, + struct net_bridge_vlan *vlan, u32 tun_id) +{ + struct metadata_dst *metadata = NULL; + __be64 key = key32_to_tunnel_id(cpu_to_be32(tun_id)); + int err; + + if (vlan->tinfo.tunnel_dst) + return -EEXIST; + + metadata = __ip_tun_set_dst(0, 0, 0, 0, 0, TUNNEL_KEY, + key, 0); + if (!metadata) + return -EINVAL; + + metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_BRIDGE; + vlan->tinfo.tunnel_dst = metadata; + vlan->tinfo.tunnel_id = key; + + err = rhashtable_lookup_insert_fast(&vg->tunnel_hash, &vlan->tnode, + br_vlan_tunnel_rht_params); + if (err) + goto out; + + return 0; +out: + dst_release(&vlan->tinfo.tunnel_dst->dst); + vlan->tinfo.tunnel_dst = NULL; + vlan->tinfo.tunnel_id = 0; + + return err; +} + +/* Must be protected by RTNL. + * Must be called with vid in range from 1 to 4094 inclusive. + */ +int nbp_vlan_tunnel_info_add(struct net_bridge_port *port, u16 vid, u32 tun_id) +{ + struct net_bridge_vlan_group *vg; + struct net_bridge_vlan *vlan; + + ASSERT_RTNL(); + + vg = nbp_vlan_group(port); + vlan = br_vlan_find(vg, vid); + if (!vlan) + return -EINVAL; + + return __vlan_tunnel_info_add(vg, vlan, tun_id); +} + +/* Must be protected by RTNL. + * Must be called with vid in range from 1 to 4094 inclusive. + */ +int nbp_vlan_tunnel_info_delete(struct net_bridge_port *port, u16 vid) +{ + struct net_bridge_vlan_group *vg; + struct net_bridge_vlan *v; + + ASSERT_RTNL(); + + vg = nbp_vlan_group(port); + v = br_vlan_find(vg, vid); + if (!v) + return -ENOENT; + + vlan_tunnel_info_del(vg, v); + + return 0; +} + +static void __vlan_tunnel_info_flush(struct net_bridge_vlan_group *vg) +{ + struct net_bridge_vlan *vlan, *tmp; + + list_for_each_entry_safe(vlan, tmp, &vg->vlan_list, vlist) + vlan_tunnel_info_del(vg, vlan); +} + +void nbp_vlan_tunnel_info_flush(struct net_bridge_port *port) +{ + struct net_bridge_vlan_group *vg; + + ASSERT_RTNL(); + + vg = nbp_vlan_group(port); + __vlan_tunnel_info_flush(vg); +} + +int vlan_tunnel_init(struct net_bridge_vlan_group *vg) +{ + return rhashtable_init(&vg->tunnel_hash, &br_vlan_tunnel_rht_params); +} + +void vlan_tunnel_deinit(struct net_bridge_vlan_group *vg) +{ + rhashtable_destroy(&vg->tunnel_hash); +} + +int br_handle_ingress_vlan_tunnel(struct sk_buff *skb, + struct net_bridge_port *p, + struct net_bridge_vlan_group *vg) +{ + struct ip_tunnel_info *tinfo = skb_tunnel_info(skb); + struct net_bridge_vlan *vlan; + + if (!vg || !tinfo) + return 0; + + /* if already tagged, ignore */ + if (skb_vlan_tagged(skb)) + return 0; + + /* lookup vid, given tunnel id */ + vlan = br_vlan_tunnel_lookup(&vg->tunnel_hash, tinfo->key.tun_id); + if (!vlan) + return 0; + + skb_dst_drop(skb); + + __vlan_hwaccel_put_tag(skb, p->br->vlan_proto, vlan->vid); + + return 0; +} + +int br_handle_egress_vlan_tunnel(struct sk_buff *skb, + struct net_bridge_vlan *vlan) +{ + int err; + + if (!vlan || !vlan->tinfo.tunnel_id) + return 0; + + if (unlikely(!skb_vlan_tag_present(skb))) + return 0; + + skb_dst_drop(skb); + err = skb_vlan_pop(skb); + if (err) + return err; + + skb_dst_set(skb, dst_clone(&vlan->tinfo.tunnel_dst->dst)); + + return 0; +} diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index 9cebf47ac840..e7ef1a1ef3a6 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig @@ -22,6 +22,7 @@ config NFT_BRIDGE_REJECT config NF_LOG_BRIDGE tristate "Bridge packet logging" + select NF_LOG_COMMON endif # NF_TABLES_BRIDGE diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c index 9024283d2bca..279527f8b1fe 100644 --- a/net/bridge/netfilter/ebt_among.c +++ b/net/bridge/netfilter/ebt_among.c @@ -187,7 +187,7 @@ static int ebt_among_mt_check(const struct xt_mtchk_param *par) expected_length += ebt_mac_wormhash_size(wh_src); if (em->match_size != EBT_ALIGN(expected_length)) { - pr_info("wrong size: %d against expected %d, rounded to %Zd\n", + pr_info("wrong size: %d against expected %d, rounded to %zd\n", em->match_size, expected_length, EBT_ALIGN(expected_length)); return -EINVAL; diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c index 070cf134a22f..5929309beaa1 100644 --- a/net/bridge/netfilter/ebt_arpreply.c +++ b/net/bridge/netfilter/ebt_arpreply.c @@ -51,7 +51,8 @@ ebt_arpreply_tg(struct sk_buff *skb, const struct xt_action_param *par) if (diptr == NULL) return EBT_DROP; - arp_send(ARPOP_REPLY, ETH_P_ARP, *siptr, (struct net_device *)par->in, + arp_send(ARPOP_REPLY, ETH_P_ARP, *siptr, + (struct net_device *)xt_in(par), *diptr, shp, info->mac, shp); return info->target; diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c index 517e78befcb2..61a9f1be1263 100644 --- a/net/bridge/netfilter/ebt_limit.c +++ b/net/bridge/netfilter/ebt_limit.c @@ -105,6 +105,7 @@ static struct xt_match ebt_limit_mt_reg __read_mostly = { .match = ebt_limit_mt, .checkentry = ebt_limit_mt_check, .matchsize = sizeof(struct ebt_limit_info), + .usersize = offsetof(struct ebt_limit_info, prev), #ifdef CONFIG_COMPAT .compatsize = sizeof(struct ebt_compat_limit_info), #endif diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 9a11086ba6ff..98b9c8e8615e 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -78,7 +78,7 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum, unsigned int bitmask; /* FIXME: Disabled from containers until syslog ns is supported */ - if (!net_eq(net, &init_net)) + if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns) return; spin_lock_bh(&ebt_log_lock); @@ -179,7 +179,7 @@ ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_log_info *info = par->targinfo; struct nf_loginfo li; - struct net *net = par->net; + struct net *net = xt_net(par); li.type = NF_LOG_TYPE_LOG; li.u.log.level = info->loglevel; @@ -190,11 +190,12 @@ ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par) * nf_log_packet() with NFT_LOG_TYPE_LOG here. --Pablo */ if (info->bitmask & EBT_LOG_NFLOG) - nf_log_packet(net, NFPROTO_BRIDGE, par->hooknum, skb, - par->in, par->out, &li, "%s", info->prefix); + nf_log_packet(net, NFPROTO_BRIDGE, xt_hooknum(par), skb, + xt_in(par), xt_out(par), &li, "%s", + info->prefix); else - ebt_log_packet(net, NFPROTO_BRIDGE, par->hooknum, skb, par->in, - par->out, &li, info->prefix); + ebt_log_packet(net, NFPROTO_BRIDGE, xt_hooknum(par), skb, + xt_in(par), xt_out(par), &li, info->prefix); return EBT_CONTINUE; } diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c index 54816150608e..c1dc48686200 100644 --- a/net/bridge/netfilter/ebt_nflog.c +++ b/net/bridge/netfilter/ebt_nflog.c @@ -23,16 +23,16 @@ static unsigned int ebt_nflog_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_nflog_info *info = par->targinfo; + struct net *net = xt_net(par); struct nf_loginfo li; - struct net *net = par->net; li.type = NF_LOG_TYPE_ULOG; li.u.ulog.copy_len = info->len; li.u.ulog.group = info->group; li.u.ulog.qthreshold = info->threshold; - nf_log_packet(net, PF_BRIDGE, par->hooknum, skb, par->in, - par->out, &li, "%s", info->prefix); + nf_log_packet(net, PF_BRIDGE, xt_hooknum(par), skb, xt_in(par), + xt_out(par), &li, "%s", info->prefix); return EBT_CONTINUE; } diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c index 2e7c4f974340..8d2a85e0594e 100644 --- a/net/bridge/netfilter/ebt_redirect.c +++ b/net/bridge/netfilter/ebt_redirect.c @@ -23,12 +23,12 @@ ebt_redirect_tg(struct sk_buff *skb, const struct xt_action_param *par) if (!skb_make_writable(skb, 0)) return EBT_DROP; - if (par->hooknum != NF_BR_BROUTING) + if (xt_hooknum(par) != NF_BR_BROUTING) /* rcu_read_lock()ed by nf_hook_thresh */ ether_addr_copy(eth_hdr(skb)->h_dest, - br_port_get_rcu(par->in)->br->dev->dev_addr); + br_port_get_rcu(xt_in(par))->br->dev->dev_addr); else - ether_addr_copy(eth_hdr(skb)->h_dest, par->in->dev_addr); + ether_addr_copy(eth_hdr(skb)->h_dest, xt_in(par)->dev_addr); skb->pkt_type = PACKET_HOST; return info->target; } diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index ec94c6f1ae88..8fe36dc3aab2 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -53,7 +53,7 @@ static int ebt_broute(struct sk_buff *skb) struct nf_hook_state state; int ret; - nf_hook_state_init(&state, NULL, NF_BR_BROUTING, INT_MIN, + nf_hook_state_init(&state, NF_BR_BROUTING, NFPROTO_BRIDGE, skb->dev, NULL, NULL, dev_net(skb->dev), NULL); diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index f5c11bbe27db..79b69917f521 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -23,7 +23,7 @@ #include <linux/spinlock.h> #include <linux/mutex.h> #include <linux/slab.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/smp.h> #include <linux/cpumask.h> #include <linux/audit.h> @@ -194,12 +194,8 @@ unsigned int ebt_do_table(struct sk_buff *skb, const struct ebt_table_info *private; struct xt_action_param acpar; - acpar.family = NFPROTO_BRIDGE; - acpar.net = state->net; - acpar.in = state->in; - acpar.out = state->out; + acpar.state = state; acpar.hotdrop = false; - acpar.hooknum = hook; read_lock_bh(&table->lock); private = table->private; @@ -1350,56 +1346,72 @@ static int update_counters(struct net *net, const void __user *user, hlp.num_counters, user, len); } -static inline int ebt_make_matchname(const struct ebt_entry_match *m, - const char *base, char __user *ubase) +static inline int ebt_obj_to_user(char __user *um, const char *_name, + const char *data, int entrysize, + int usersize, int datasize) { - char __user *hlp = ubase + ((char *)m - base); - char name[EBT_FUNCTION_MAXNAMELEN] = {}; + char name[EBT_FUNCTION_MAXNAMELEN] = {0}; /* ebtables expects 32 bytes long names but xt_match names are 29 bytes * long. Copy 29 bytes and fill remaining bytes with zeroes. */ - strlcpy(name, m->u.match->name, sizeof(name)); - if (copy_to_user(hlp, name, EBT_FUNCTION_MAXNAMELEN)) + strlcpy(name, _name, sizeof(name)); + if (copy_to_user(um, name, EBT_FUNCTION_MAXNAMELEN) || + put_user(datasize, (int __user *)(um + EBT_FUNCTION_MAXNAMELEN)) || + xt_data_to_user(um + entrysize, data, usersize, datasize)) return -EFAULT; + return 0; } -static inline int ebt_make_watchername(const struct ebt_entry_watcher *w, - const char *base, char __user *ubase) +static inline int ebt_match_to_user(const struct ebt_entry_match *m, + const char *base, char __user *ubase) { - char __user *hlp = ubase + ((char *)w - base); - char name[EBT_FUNCTION_MAXNAMELEN] = {}; + return ebt_obj_to_user(ubase + ((char *)m - base), + m->u.match->name, m->data, sizeof(*m), + m->u.match->usersize, m->match_size); +} - strlcpy(name, w->u.watcher->name, sizeof(name)); - if (copy_to_user(hlp, name, EBT_FUNCTION_MAXNAMELEN)) - return -EFAULT; - return 0; +static inline int ebt_watcher_to_user(const struct ebt_entry_watcher *w, + const char *base, char __user *ubase) +{ + return ebt_obj_to_user(ubase + ((char *)w - base), + w->u.watcher->name, w->data, sizeof(*w), + w->u.watcher->usersize, w->watcher_size); } -static inline int ebt_make_names(struct ebt_entry *e, const char *base, - char __user *ubase) +static inline int ebt_entry_to_user(struct ebt_entry *e, const char *base, + char __user *ubase) { int ret; char __user *hlp; const struct ebt_entry_target *t; - char name[EBT_FUNCTION_MAXNAMELEN] = {}; - if (e->bitmask == 0) + if (e->bitmask == 0) { + /* special case !EBT_ENTRY_OR_ENTRIES */ + if (copy_to_user(ubase + ((char *)e - base), e, + sizeof(struct ebt_entries))) + return -EFAULT; return 0; + } + + if (copy_to_user(ubase + ((char *)e - base), e, sizeof(*e))) + return -EFAULT; hlp = ubase + (((char *)e + e->target_offset) - base); t = (struct ebt_entry_target *)(((char *)e) + e->target_offset); - ret = EBT_MATCH_ITERATE(e, ebt_make_matchname, base, ubase); + ret = EBT_MATCH_ITERATE(e, ebt_match_to_user, base, ubase); if (ret != 0) return ret; - ret = EBT_WATCHER_ITERATE(e, ebt_make_watchername, base, ubase); + ret = EBT_WATCHER_ITERATE(e, ebt_watcher_to_user, base, ubase); if (ret != 0) return ret; - strlcpy(name, t->u.target->name, sizeof(name)); - if (copy_to_user(hlp, name, EBT_FUNCTION_MAXNAMELEN)) - return -EFAULT; + ret = ebt_obj_to_user(hlp, t->u.target->name, t->data, sizeof(*t), + t->u.target->usersize, t->target_size); + if (ret != 0) + return ret; + return 0; } @@ -1479,13 +1491,9 @@ static int copy_everything_to_user(struct ebt_table *t, void __user *user, if (ret) return ret; - if (copy_to_user(tmp.entries, entries, entries_size)) { - BUGPRINT("Couldn't copy entries to userspace\n"); - return -EFAULT; - } /* set the match/watcher/target names right */ return EBT_ENTRY_ITERATE(entries, entries_size, - ebt_make_names, entries, tmp.entries); + ebt_entry_to_user, entries, tmp.entries); } static int do_ebt_set_ctl(struct sock *sk, @@ -1634,8 +1642,10 @@ static int compat_match_to_user(struct ebt_entry_match *m, void __user **dstptr, if (match->compat_to_user) { if (match->compat_to_user(cm->data, m->data)) return -EFAULT; - } else if (copy_to_user(cm->data, m->data, msize)) + } else { + if (xt_data_to_user(cm->data, m->data, match->usersize, msize)) return -EFAULT; + } *size -= ebt_compat_entry_padsize() + off; *dstptr = cm->data; @@ -1661,8 +1671,10 @@ static int compat_target_to_user(struct ebt_entry_target *t, if (target->compat_to_user) { if (target->compat_to_user(cm->data, t->data)) return -EFAULT; - } else if (copy_to_user(cm->data, t->data, tsize)) - return -EFAULT; + } else { + if (xt_data_to_user(cm->data, t->data, target->usersize, tsize)) + return -EFAULT; + } *size -= ebt_compat_entry_padsize() + off; *dstptr = cm->data; diff --git a/net/bridge/netfilter/nf_log_bridge.c b/net/bridge/netfilter/nf_log_bridge.c index 1663df598545..bd2b3c78f59b 100644 --- a/net/bridge/netfilter/nf_log_bridge.c +++ b/net/bridge/netfilter/nf_log_bridge.c @@ -24,21 +24,8 @@ static void nf_log_bridge_packet(struct net *net, u_int8_t pf, const struct nf_loginfo *loginfo, const char *prefix) { - switch (eth_hdr(skb)->h_proto) { - case htons(ETH_P_IP): - nf_log_packet(net, NFPROTO_IPV4, hooknum, skb, in, out, - loginfo, "%s", prefix); - break; - case htons(ETH_P_IPV6): - nf_log_packet(net, NFPROTO_IPV6, hooknum, skb, in, out, - loginfo, "%s", prefix); - break; - case htons(ETH_P_ARP): - case htons(ETH_P_RARP): - nf_log_packet(net, NFPROTO_ARP, hooknum, skb, in, out, - loginfo, "%s", prefix); - break; - } + nf_log_l2packet(net, pf, eth_hdr(skb)->h_proto, hooknum, skb, + in, out, loginfo, prefix); } static struct nf_logger nf_bridge_logger __read_mostly = { diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c index ad47a921b701..5974dbc1ea24 100644 --- a/net/bridge/netfilter/nft_meta_bridge.c +++ b/net/bridge/netfilter/nft_meta_bridge.c @@ -23,7 +23,7 @@ static void nft_meta_bridge_get_eval(const struct nft_expr *expr, const struct nft_pktinfo *pkt) { const struct nft_meta *priv = nft_expr_priv(expr); - const struct net_device *in = pkt->in, *out = pkt->out; + const struct net_device *in = nft_in(pkt), *out = nft_out(pkt); u32 *dest = ®s->data[priv->dreg]; const struct net_bridge_port *p; diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index 4b3df6b0e3b9..206dc266ecd2 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -315,17 +315,20 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr, case htons(ETH_P_IP): switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nft_reject_br_send_v4_unreach(pkt->net, pkt->skb, - pkt->in, pkt->hook, + nft_reject_br_send_v4_unreach(nft_net(pkt), pkt->skb, + nft_in(pkt), + nft_hook(pkt), priv->icmp_code); break; case NFT_REJECT_TCP_RST: - nft_reject_br_send_v4_tcp_reset(pkt->net, pkt->skb, - pkt->in, pkt->hook); + nft_reject_br_send_v4_tcp_reset(nft_net(pkt), pkt->skb, + nft_in(pkt), + nft_hook(pkt)); break; case NFT_REJECT_ICMPX_UNREACH: - nft_reject_br_send_v4_unreach(pkt->net, pkt->skb, - pkt->in, pkt->hook, + nft_reject_br_send_v4_unreach(nft_net(pkt), pkt->skb, + nft_in(pkt), + nft_hook(pkt), nft_reject_icmp_code(priv->icmp_code)); break; } @@ -333,17 +336,20 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr, case htons(ETH_P_IPV6): switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nft_reject_br_send_v6_unreach(pkt->net, pkt->skb, - pkt->in, pkt->hook, + nft_reject_br_send_v6_unreach(nft_net(pkt), pkt->skb, + nft_in(pkt), + nft_hook(pkt), priv->icmp_code); break; case NFT_REJECT_TCP_RST: - nft_reject_br_send_v6_tcp_reset(pkt->net, pkt->skb, - pkt->in, pkt->hook); + nft_reject_br_send_v6_tcp_reset(nft_net(pkt), pkt->skb, + nft_in(pkt), + nft_hook(pkt)); break; case NFT_REJECT_ICMPX_UNREACH: - nft_reject_br_send_v6_unreach(pkt->net, pkt->skb, - pkt->in, pkt->hook, + nft_reject_br_send_v6_unreach(nft_net(pkt), pkt->skb, + nft_in(pkt), + nft_hook(pkt), nft_reject_icmpv6_code(priv->icmp_code)); break; } |