From b3343a2a2c95b3b7ed4f6596e860c4276ba46217 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 18 Sep 2012 00:01:12 +0000 Subject: net, ixgbe: handle link local multicast addresses in SR-IOV mode In SR-IOV mode the PF driver acts as the uplink port and is used to send control packets e.g. lldpad, stp, etc. eth0.1 eth0.2 eth0 VF VF PF | | | <-- stand-in for uplink | | | -------------------------- | Embedded Switch | -------------------------- | MAC <-- uplink But the embedded switch is setup to forward multicast addresses to all interfaces both VFs and PF and onto the physical link. This results in reserved MAC addresses used by control protocols to be forwarded over the switch onto the VF. In the LLDP case the PF sends an LLDPDU and it is currently being forwarded to all the VFs who then see the PF as a peer. This is incorrect. This patch adds the multicast addresses to the RAR table in the hardware to prevent this behavior. Signed-off-by: John Fastabend Tested-by: Phil Schmitt Tested-by: Sibai Li Signed-off-by: Jeff Kirsher --- net/bridge/br_private.h | 1 - 1 file changed, 1 deletion(-) (limited to 'net/bridge/br_private.h') diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 9b278c4ebee1..65b191a05dd6 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -288,7 +288,6 @@ struct br_input_skb_cb { pr_debug("%s: " format, (br)->dev->name, ##args) extern struct notifier_block br_device_notifier; -extern const u8 br_group_address[ETH_ALEN]; /* called under bridge lock */ static inline int br_is_root_bridge(const struct net_bridge *br) -- cgit v1.2.3 From e5a55a898720096f43bc24938f8875c0a1b34cd7 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 24 Oct 2012 08:12:57 +0000 Subject: net: create generic bridge ops The PF_BRIDGE:RTM_{GET|SET}LINK nlmsg family and type are currently embedded in the ./net/bridge module. This prohibits them from being used by other bridging devices. One example of this being hardware that has embedded bridging components. In order to use these nlmsg types more generically this patch adds two net_device_ops hooks. One to set link bridge attributes and another to dump the current bride attributes. ndo_bridge_setlink() ndo_bridge_getlink() CC: Lennert Buytenhek CC: Stephen Hemminger Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- include/linux/netdevice.h | 10 ++++++ net/bridge/br_device.c | 2 ++ net/bridge/br_netlink.c | 73 ++++++++---------------------------------- net/bridge/br_private.h | 3 ++ net/core/rtnetlink.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 108 insertions(+), 60 deletions(-) (limited to 'net/bridge/br_private.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f8eda0276f03..7bf867c97043 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -887,6 +887,10 @@ struct netdev_fcoe_hbainfo { * struct net_device *dev, int idx) * Used to add FDB entries to dump requests. Implementers should add * entries to skb and update idx with the number of entries. + * + * int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh) + * int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq, + * struct net_device *dev) */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); @@ -998,6 +1002,12 @@ struct net_device_ops { struct netlink_callback *cb, struct net_device *dev, int idx); + + int (*ndo_bridge_setlink)(struct net_device *dev, + struct nlmsghdr *nlh); + int (*ndo_bridge_getlink)(struct sk_buff *skb, + u32 pid, u32 seq, + struct net_device *dev); }; /* diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 070e8a68cfc6..63b5b088e80f 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -313,6 +313,8 @@ static const struct net_device_ops br_netdev_ops = { .ndo_fdb_add = br_fdb_add, .ndo_fdb_del = br_fdb_delete, .ndo_fdb_dump = br_fdb_dump, + .ndo_bridge_getlink = br_getlink, + .ndo_bridge_setlink = br_setlink, }; static void br_dev_free(struct net_device *dev) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 093f527276a3..743511bb7319 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -111,54 +111,33 @@ errout: /* * Dump information about all ports, in response to GETLINK */ -static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) +int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, + struct net_device *dev) { - struct net *net = sock_net(skb->sk); - struct net_device *dev; - int idx; - - idx = 0; - rcu_read_lock(); - for_each_netdev_rcu(net, dev) { - struct net_bridge_port *port = br_port_get_rcu(dev); - - /* not a bridge port */ - if (!port || idx < cb->args[0]) - goto skip; - - if (br_fill_ifinfo(skb, port, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, RTM_NEWLINK, - NLM_F_MULTI) < 0) - break; -skip: - ++idx; - } - rcu_read_unlock(); - cb->args[0] = idx; + int err = 0; + struct net_bridge_port *port = br_port_get_rcu(dev); + + /* not a bridge port */ + if (!port) + goto out; - return skb->len; + err = br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI); +out: + return err; } /* * Change state of port (ie from forwarding to blocking etc) * Used by spanning tree in user space. */ -static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) { - struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; struct nlattr *protinfo; - struct net_device *dev; struct net_bridge_port *p; u8 new_state; - if (nlmsg_len(nlh) < sizeof(*ifm)) - return -EINVAL; - ifm = nlmsg_data(nlh); - if (ifm->ifi_family != AF_BRIDGE) - return -EPFNOSUPPORT; protinfo = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_PROTINFO); if (!protinfo || nla_len(protinfo) < sizeof(u8)) @@ -168,10 +147,6 @@ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (new_state > BR_STATE_BLOCKING) return -EINVAL; - dev = __dev_get_by_index(net, ifm->ifi_index); - if (!dev) - return -ENODEV; - p = br_port_get_rtnl(dev); if (!p) return -EINVAL; @@ -218,29 +193,7 @@ struct rtnl_link_ops br_link_ops __read_mostly = { int __init br_netlink_init(void) { - int err; - - err = rtnl_link_register(&br_link_ops); - if (err < 0) - goto err1; - - err = __rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, - br_dump_ifinfo, NULL); - if (err) - goto err2; - err = __rtnl_register(PF_BRIDGE, RTM_SETLINK, - br_rtm_setlink, NULL, NULL); - if (err) - goto err3; - - return 0; - -err3: - rtnl_unregister_all(PF_BRIDGE); -err2: - rtnl_link_unregister(&br_link_ops); -err1: - return err; + return rtnl_link_register(&br_link_ops); } void __exit br_netlink_fini(void) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 9b278c4ebee1..fdcd5f626ca6 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -553,6 +553,9 @@ extern struct rtnl_link_ops br_link_ops; extern int br_netlink_init(void); extern void br_netlink_fini(void); extern void br_ifinfo_notify(int event, struct net_bridge_port *port); +extern int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg); +extern int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, + struct net_device *dev); #ifdef CONFIG_SYSFS /* br_sysfs_if.c */ diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 64fe3cca2a4e..a068666b322f 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2252,6 +2252,83 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } +static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + struct net_device *dev; + int idx = 0; + u32 portid = NETLINK_CB(cb->skb).portid; + u32 seq = cb->nlh->nlmsg_seq; + + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { + const struct net_device_ops *ops = dev->netdev_ops; + struct net_device *master = dev->master; + + if (idx < cb->args[0]) + continue; + + if (master && master->netdev_ops->ndo_bridge_getlink) { + const struct net_device_ops *bops = master->netdev_ops; + int err = bops->ndo_bridge_getlink(skb, portid, + seq, dev); + + if (err < 0) + break; + else + idx++; + } + + if (ops->ndo_bridge_getlink) { + int err = ops->ndo_bridge_getlink(skb, portid, + seq, dev); + + if (err < 0) + break; + else + idx++; + } + } + rcu_read_unlock(); + cb->args[0] = idx; + + return skb->len; +} + +static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, + void *arg) +{ + struct net *net = sock_net(skb->sk); + struct ifinfomsg *ifm; + struct net_device *dev; + int err = -EINVAL; + + if (nlmsg_len(nlh) < sizeof(*ifm)) + return -EINVAL; + + ifm = nlmsg_data(nlh); + if (ifm->ifi_family != AF_BRIDGE) + return -EPFNOSUPPORT; + + dev = __dev_get_by_index(net, ifm->ifi_index); + if (!dev) { + pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n"); + return -ENODEV; + } + + if (dev->master && dev->master->netdev_ops->ndo_bridge_setlink) { + err = dev->master->netdev_ops->ndo_bridge_setlink(dev, nlh); + if (err) + goto out; + } + + if (dev->netdev_ops->ndo_bridge_setlink) + err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh); + +out: + return err; +} + /* Protected by RTNL sempahore. */ static struct rtattr **rta_buf; static int rtattr_max; @@ -2433,5 +2510,8 @@ void __init rtnetlink_init(void) rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, NULL); rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, NULL); rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, NULL); + + rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, NULL); + rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL); } -- cgit v1.2.3 From 2469ffd723f76ac2d3ce3d4f31ee31ee0a06cd38 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 24 Oct 2012 08:13:03 +0000 Subject: net: set and query VEB/VEPA bridge mode via PF_BRIDGE Hardware switches may support enabling and disabling the loopback switch which puts the device in a VEPA mode defined in the IEEE 802.1Qbg specification. In this mode frames are not switched in the hardware but sent directly to the switch. SR-IOV capable NICs will likely support this mode I am aware of at least two such devices. Also I am told (but don't have any of this hardware available) that there are devices that only support VEPA modes. In these cases it is important at a minimum to be able to query these attributes. This patch adds an additional IFLA_BRIDGE_MODE attribute that can be set and dumped via the PF_BRIDGE:{SET|GET}LINK operations. Also anticipating bridge attributes that may be common for both embedded bridges and software bridges this adds a flags attribute IFLA_BRIDGE_FLAGS currently used to determine if the command or event is being generated to/from an embedded bridge or software bridge. Finally, the event generation is pulled out of the bridge module and into rtnetlink proper. For example using the macvlan driver in VEPA mode on top of an embedded switch requires putting the embedded switch into a VEPA mode to get the expected results. -------- -------- | VEPA | | VEPA | <-- macvlan vepa edge relays -------- -------- | | | | ------------------ | VEPA | <-- embedded switch in NIC ------------------ | | ------------------- | external switch | <-- shiny new physical ------------------- switch with VEPA support A packet sent from the macvlan VEPA at the top could be loopbacked on the embedded switch and never seen by the external switch. So in order for this to work the embedded switch needs to be set in the VEPA state via the above described commands. By making these attributes nested in IFLA_AF_SPEC we allow future extensions to be made as needed. CC: Lennert Buytenhek CC: Stephen Hemminger Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 18 +++++++++ net/bridge/br_netlink.c | 2 - net/bridge/br_private.h | 4 +- net/core/rtnetlink.c | 85 ++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 102 insertions(+), 7 deletions(-) (limited to 'net/bridge/br_private.h') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index a8fe9549ddbc..b3885791e11e 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -97,5 +97,23 @@ struct __fdb_entry { __u16 unused; }; +/* Bridge Flags */ +#define BRIDGE_FLAGS_MASTER 1 /* Bridge command to/from master */ +#define BRIDGE_FLAGS_SELF 2 /* Bridge command to/from lowerdev */ +#define BRIDGE_MODE_VEB 0 /* Default loopback mode */ +#define BRIDGE_MODE_VEPA 1 /* 802.1Qbg defined VEPA mode */ + +/* Bridge management nested attributes + * [IFLA_AF_SPEC] = { + * [IFLA_BRIDGE_FLAGS] + * [IFLA_BRIDGE_MODE] + * } + */ +enum { + IFLA_BRIDGE_FLAGS, + IFLA_BRIDGE_MODE, + __IFLA_BRIDGE_MAX, +}; +#define IFLA_BRIDGE_MAX (__IFLA_BRIDGE_MAX - 1) #endif /* _UAPI_LINUX_IF_BRIDGE_H */ diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 743511bb7319..14b065cbd214 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -166,8 +166,6 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) br_port_state_selection(p->br); spin_unlock_bh(&p->br->lock); - br_ifinfo_notify(RTM_NEWLINK, p); - return 0; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index fdcd5f626ca6..6f40c14a2a65 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -158,7 +158,9 @@ struct net_bridge_port static inline struct net_bridge_port *br_port_get_rcu(const struct net_device *dev) { - struct net_bridge_port *port = rcu_dereference(dev->rx_handler_data); + struct net_bridge_port *port = + rcu_dereference_rtnl(dev->rx_handler_data); + return br_port_exists(dev) ? port : NULL; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a068666b322f..8d2af0f77d36 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2295,13 +2295,60 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } +static inline size_t bridge_nlmsg_size(void) +{ + return NLMSG_ALIGN(sizeof(struct ifinfomsg)) + + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ + + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ + + nla_total_size(sizeof(u32)) /* IFLA_MASTER */ + + nla_total_size(sizeof(u32)) /* IFLA_MTU */ + + nla_total_size(sizeof(u32)) /* IFLA_LINK */ + + nla_total_size(sizeof(u32)) /* IFLA_OPERSTATE */ + + nla_total_size(sizeof(u8)) /* IFLA_PROTINFO */ + + nla_total_size(sizeof(struct nlattr)) /* IFLA_AF_SPEC */ + + nla_total_size(sizeof(u16)) /* IFLA_BRIDGE_FLAGS */ + + nla_total_size(sizeof(u16)); /* IFLA_BRIDGE_MODE */ +} + +static int rtnl_bridge_notify(struct net_device *dev, u16 flags) +{ + struct net *net = dev_net(dev); + struct net_device *master = dev->master; + struct sk_buff *skb; + int err = -EOPNOTSUPP; + + skb = nlmsg_new(bridge_nlmsg_size(), GFP_ATOMIC); + if (!skb) { + err = -ENOMEM; + goto errout; + } + + if (!flags && master && master->netdev_ops->ndo_bridge_getlink) + err = master->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev); + else if (dev->netdev_ops->ndo_bridge_getlink) + err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev); + + if (err < 0) + goto errout; + + rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); + return 0; +errout: + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + rtnl_set_sk_err(net, RTNLGRP_LINK, err); + return err; +} + static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; struct net_device *dev; - int err = -EINVAL; + struct nlattr *br_spec, *attr = NULL; + int rem, err = -EOPNOTSUPP; + u16 flags = 0; if (nlmsg_len(nlh) < sizeof(*ifm)) return -EINVAL; @@ -2316,15 +2363,45 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, return -ENODEV; } - if (dev->master && dev->master->netdev_ops->ndo_bridge_setlink) { + br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (br_spec) { + nla_for_each_nested(attr, br_spec, rem) { + if (nla_type(attr) == IFLA_BRIDGE_FLAGS) { + flags = nla_get_u16(attr); + break; + } + } + } + + if (!flags || (flags & BRIDGE_FLAGS_MASTER)) { + if (!dev->master || + !dev->master->netdev_ops->ndo_bridge_setlink) { + err = -EOPNOTSUPP; + goto out; + } + err = dev->master->netdev_ops->ndo_bridge_setlink(dev, nlh); if (err) goto out; + + flags &= ~BRIDGE_FLAGS_MASTER; } - if (dev->netdev_ops->ndo_bridge_setlink) - err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh); + if ((flags & BRIDGE_FLAGS_SELF)) { + if (!dev->netdev_ops->ndo_bridge_setlink) + err = -EOPNOTSUPP; + else + err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh); + + if (!err) + flags &= ~BRIDGE_FLAGS_SELF; + } + if (attr && nla_type(attr) == IFLA_BRIDGE_FLAGS) + memcpy(nla_data(attr), &flags, sizeof(flags)); + /* Generate event to notify upper layer of bridge change */ + if (!err) + err = rtnl_bridge_notify(dev, flags); out: return err; } -- cgit v1.2.3 From 0cb2bbbea0857c5c76db4cae85343553a882c0de Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Sat, 3 Nov 2012 23:02:30 +0100 Subject: bridge: Avoid 'statement with no effect' compiler warnings Instead of issuing (0) statements when !CONFIG_SYSFS which will cause 'warning: ', we'll use inline statements instead. This will effectively do the same thing, but suppress any unnecessary warnings. Cc: Stephen Hemminger Cc: bridge@lists.linux-foundation.org Cc: netdev@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: David S. Miller --- net/bridge/br_private.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net/bridge/br_private.h') diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index d5efa57a2c20..22111ffd68df 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -570,10 +570,10 @@ extern void br_sysfs_delbr(struct net_device *dev); #else -#define br_sysfs_addif(p) (0) -#define br_sysfs_renameif(p) (0) -#define br_sysfs_addbr(dev) (0) -#define br_sysfs_delbr(dev) do { } while(0) +static inline int br_sysfs_addif(struct net_bridge_port *p) { return 0; } +static inline int br_sysfs_renameif(struct net_bridge_port *p) { return 0; } +static inline int br_sysfs_addbr(struct net_device *dev) { return 0; } +static inline void br_sysfs_delbr(struct net_device *dev) { return; } #endif /* CONFIG_SYSFS */ #endif -- cgit v1.2.3 From a2e01a65cd7135dab26d27d4b589b2e5358bec99 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 13 Nov 2012 07:53:07 +0000 Subject: bridge: implement BPDU blocking This is Linux bridge implementation of STP protection (Cisco BPDU guard/Juniper BPDU block). BPDU block disables the bridge port if a STP BPDU packet is received. Why would you want to do this? If running Spanning Tree on bridge, hostile devices on the network may send BPDU and cause network failure. Enabling bpdu block will detect and stop this. How to recover the port? The port will be restarted if link is brought down, or removed and reattached. For example: # ip li set dev eth0 down; ip li set dev eth0 up Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + net/bridge/br_netlink.c | 6 +++++- net/bridge/br_private.h | 1 + net/bridge/br_stp_bpdu.c | 7 +++++++ net/bridge/br_sysfs_if.c | 2 ++ 5 files changed, 16 insertions(+), 1 deletion(-) (limited to 'net/bridge/br_private.h') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 96f7cf49367b..5e871e4d923f 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -216,6 +216,7 @@ enum { IFLA_BRPORT_PRIORITY, /* " priority */ IFLA_BRPORT_COST, /* " cost */ IFLA_BRPORT_MODE, /* mode (hairpin) */ + IFLA_BRPORT_GUARD, /* bpdu guard */ __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 0188a2f706c4..c331e28c7880 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -26,6 +26,7 @@ static inline size_t br_port_info_size(void) + nla_total_size(2) /* IFLA_BRPORT_PRIORITY */ + nla_total_size(4) /* IFLA_BRPORT_COST */ + nla_total_size(1) /* IFLA_BRPORT_MODE */ + + nla_total_size(1) /* IFLA_BRPORT_GUARD */ + 0; } @@ -49,7 +50,8 @@ static int br_port_fill_attrs(struct sk_buff *skb, if (nla_put_u8(skb, IFLA_BRPORT_STATE, p->state) || nla_put_u16(skb, IFLA_BRPORT_PRIORITY, p->priority) || nla_put_u32(skb, IFLA_BRPORT_COST, p->path_cost) || - nla_put_u8(skb, IFLA_BRPORT_MODE, mode)) + nla_put_u8(skb, IFLA_BRPORT_MODE, mode) || + nla_put_u8(skb, IFLA_BRPORT_GUARD, !!(p->flags & BR_BPDU_GUARD))) return -EMSGSIZE; return 0; @@ -162,6 +164,7 @@ static const struct nla_policy ifla_brport_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_COST] = { .type = NLA_U32 }, [IFLA_BRPORT_PRIORITY] = { .type = NLA_U16 }, [IFLA_BRPORT_MODE] = { .type = NLA_U8 }, + [IFLA_BRPORT_GUARD] = { .type = NLA_U8 }, }; /* Change the state of the port and notify spanning tree */ @@ -203,6 +206,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) int err; br_set_port_flag(p, tb, IFLA_BRPORT_MODE, BR_HAIRPIN_MODE); + br_set_port_flag(p, tb, IFLA_BRPORT_GUARD, BR_BPDU_GUARD); if (tb[IFLA_BRPORT_COST]) { err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST])); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 22111ffd68df..c92b0804ff2d 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -135,6 +135,7 @@ struct net_bridge_port unsigned long flags; #define BR_HAIRPIN_MODE 0x00000001 +#define BR_BPDU_GUARD 0x00000002 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING u32 multicast_startup_queries_sent; diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index fd30a6022dea..7f884e3fb955 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -170,6 +170,13 @@ void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb, if (!ether_addr_equal(dest, br->group_addr)) goto out; + if (p->flags & BR_BPDU_GUARD) { + br_notice(br, "BPDU received on blocked port %u(%s)\n", + (unsigned int) p->port_no, p->dev->name); + br_stp_disable_port(p); + goto out; + } + buf = skb_pull(skb, 3); if (buf[0] == BPDU_TYPE_CONFIG) { diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index f26173d33d8d..d1dfa4026185 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -156,6 +156,7 @@ static int store_flush(struct net_bridge_port *p, unsigned long v) static BRPORT_ATTR(flush, S_IWUSR, NULL, store_flush); BRPORT_ATTR_FLAG(hairpin_mode, BR_HAIRPIN_MODE); +BRPORT_ATTR_FLAG(bpdu_guard, BR_BPDU_GUARD); #ifdef CONFIG_BRIDGE_IGMP_SNOOPING static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf) @@ -189,6 +190,7 @@ static const struct brport_attribute *brport_attrs[] = { &brport_attr_hold_timer, &brport_attr_flush, &brport_attr_hairpin_mode, + &brport_attr_bpdu_guard, #ifdef CONFIG_BRIDGE_IGMP_SNOOPING &brport_attr_multicast_router, #endif -- cgit v1.2.3 From 1007dd1aa50b0403df370834f647abef1722925c Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 13 Nov 2012 07:53:08 +0000 Subject: bridge: add root port blocking This is Linux bridge implementation of root port guard. If BPDU is received from a leaf (edge) port, it should not be elected as root port. Why would you want to do this? If using STP on a bridge and the downstream bridges are not fully trusted; this prevents a hostile guest for rerouting traffic. Why not just use netfilter? Netfilter does not track of follow spanning tree decisions. It would be difficult and error prone to try and mirror STP resolution in netfilter module. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + net/bridge/br_netlink.c | 5 ++++- net/bridge/br_private.h | 1 + net/bridge/br_stp.c | 22 +++++++++++++++++++++- net/bridge/br_sysfs_if.c | 2 ++ 5 files changed, 29 insertions(+), 2 deletions(-) (limited to 'net/bridge/br_private.h') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 5e871e4d923f..7aae0179ae44 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -217,6 +217,7 @@ enum { IFLA_BRPORT_COST, /* " cost */ IFLA_BRPORT_MODE, /* mode (hairpin) */ IFLA_BRPORT_GUARD, /* bpdu guard */ + IFLA_BRPORT_PROTECT, /* root port protection */ __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index c331e28c7880..65429b99a2a3 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -27,6 +27,7 @@ static inline size_t br_port_info_size(void) + nla_total_size(4) /* IFLA_BRPORT_COST */ + nla_total_size(1) /* IFLA_BRPORT_MODE */ + nla_total_size(1) /* IFLA_BRPORT_GUARD */ + + nla_total_size(1) /* IFLA_BRPORT_PROTECT */ + 0; } @@ -51,7 +52,8 @@ static int br_port_fill_attrs(struct sk_buff *skb, nla_put_u16(skb, IFLA_BRPORT_PRIORITY, p->priority) || nla_put_u32(skb, IFLA_BRPORT_COST, p->path_cost) || nla_put_u8(skb, IFLA_BRPORT_MODE, mode) || - nla_put_u8(skb, IFLA_BRPORT_GUARD, !!(p->flags & BR_BPDU_GUARD))) + nla_put_u8(skb, IFLA_BRPORT_GUARD, !!(p->flags & BR_BPDU_GUARD)) || + nla_put_u8(skb, IFLA_BRPORT_PROTECT, !!(p->flags & BR_ROOT_BLOCK))) return -EMSGSIZE; return 0; @@ -165,6 +167,7 @@ static const struct nla_policy ifla_brport_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_PRIORITY] = { .type = NLA_U16 }, [IFLA_BRPORT_MODE] = { .type = NLA_U8 }, [IFLA_BRPORT_GUARD] = { .type = NLA_U8 }, + [IFLA_BRPORT_PROTECT] = { .type = NLA_U8 }, }; /* Change the state of the port and notify spanning tree */ diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index c92b0804ff2d..eb9cd42146a5 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -136,6 +136,7 @@ struct net_bridge_port unsigned long flags; #define BR_HAIRPIN_MODE 0x00000001 #define BR_BPDU_GUARD 0x00000002 +#define BR_ROOT_BLOCK 0x00000004 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING u32 multicast_startup_queries_sent; diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index af9a12099ba4..b01849a74310 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -100,6 +100,21 @@ static int br_should_become_root_port(const struct net_bridge_port *p, return 0; } +static void br_root_port_block(const struct net_bridge *br, + struct net_bridge_port *p) +{ + + br_notice(br, "port %u(%s) tried to become root port (blocked)", + (unsigned int) p->port_no, p->dev->name); + + p->state = BR_STATE_LISTENING; + br_log_state(p); + br_ifinfo_notify(RTM_NEWLINK, p); + + if (br->forward_delay > 0) + mod_timer(&p->forward_delay_timer, jiffies + br->forward_delay); +} + /* called under bridge lock */ static void br_root_selection(struct net_bridge *br) { @@ -107,7 +122,12 @@ static void br_root_selection(struct net_bridge *br) u16 root_port = 0; list_for_each_entry(p, &br->port_list, list) { - if (br_should_become_root_port(p, root_port)) + if (!br_should_become_root_port(p, root_port)) + continue; + + if (p->flags & BR_ROOT_BLOCK) + br_root_port_block(br, p); + else root_port = p->port_no; } diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index d1dfa4026185..80a4fc5d96ab 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -157,6 +157,7 @@ static BRPORT_ATTR(flush, S_IWUSR, NULL, store_flush); BRPORT_ATTR_FLAG(hairpin_mode, BR_HAIRPIN_MODE); BRPORT_ATTR_FLAG(bpdu_guard, BR_BPDU_GUARD); +BRPORT_ATTR_FLAG(root_block, BR_ROOT_BLOCK); #ifdef CONFIG_BRIDGE_IGMP_SNOOPING static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf) @@ -191,6 +192,7 @@ static const struct brport_attribute *brport_attrs[] = { &brport_attr_flush, &brport_attr_hairpin_mode, &brport_attr_bpdu_guard, + &brport_attr_root_block, #ifdef CONFIG_BRIDGE_IGMP_SNOOPING &brport_attr_multicast_router, #endif -- cgit v1.2.3 From 50426b5925ff0d7f47c20e6886047f1bb6245901 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Mon, 3 Dec 2012 23:56:40 +0000 Subject: bridge: implement multicast fast leave V2: make the toggle per-port Fast leave allows bridge to immediately stops the multicast traffic on the port receives IGMP Leave when IGMP snooping is enabled, no timeouts are observed. Cc: Herbert Xu Cc: Stephen Hemminger Cc: "David S. Miller" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 21 +++++++++++++++++++++ net/bridge/br_private.h | 1 + net/bridge/br_sysfs_if.c | 20 ++++++++++++++++++++ 3 files changed, 42 insertions(+) (limited to 'net/bridge/br_private.h') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 241743417f49..2391bae4f733 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1225,6 +1225,27 @@ static void br_multicast_leave_group(struct net_bridge *br, if (!mp) goto out; + if (port && port->multicast_fast_leave) { + struct net_bridge_port_group __rcu **pp; + + for (pp = &mp->ports; + (p = mlock_dereference(*pp, br)) != NULL; + pp = &p->next) { + if (p->port != port) + continue; + + rcu_assign_pointer(*pp, p->next); + hlist_del_init(&p->mglist); + del_timer(&p->timer); + call_rcu_bh(&p->rcu, br_multicast_free_pg); + + if (!mp->ports && !mp->mglist && + netif_running(br->dev)) + mod_timer(&mp->timer, jiffies); + } + goto out; + } + now = jiffies; time = now + br->multicast_last_member_count * br->multicast_last_member_interval; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index eb9cd42146a5..cdbf9047a659 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -141,6 +141,7 @@ struct net_bridge_port #ifdef CONFIG_BRIDGE_IGMP_SNOOPING u32 multicast_startup_queries_sent; unsigned char multicast_router; + unsigned char multicast_fast_leave; struct timer_list multicast_router_timer; struct timer_list multicast_query_timer; struct hlist_head mglist; diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 7ff95ba21982..dc484ace0be3 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -172,6 +172,25 @@ static int store_multicast_router(struct net_bridge_port *p, } static BRPORT_ATTR(multicast_router, S_IRUGO | S_IWUSR, show_multicast_router, store_multicast_router); + +static ssize_t show_multicast_fast_leave(struct net_bridge_port *p, + char *buf) +{ + return sprintf(buf, "%d\n", p->multicast_fast_leave); +} + +static int store_multicast_fast_leave(struct net_bridge_port *p, + unsigned long v) +{ + if (p->br->multicast_disabled) + return -EINVAL; + + p->multicast_fast_leave = !!v; + return 0; +} + +static BRPORT_ATTR(multicast_fast_leave, S_IRUGO | S_IWUSR, + show_multicast_fast_leave, store_multicast_fast_leave); #endif static const struct brport_attribute *brport_attrs[] = { @@ -195,6 +214,7 @@ static const struct brport_attribute *brport_attrs[] = { &brport_attr_root_block, #ifdef CONFIG_BRIDGE_IGMP_SNOOPING &brport_attr_multicast_router, + &brport_attr_multicast_fast_leave, #endif NULL }; -- cgit v1.2.3 From c2d3babfafbb9f6629cfb47139758e59a5eb0d80 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 5 Dec 2012 16:24:45 -0500 Subject: bridge: implement multicast fast leave V3: make it a flag V2: make the toggle per-port Fast leave allows bridge to immediately stops the multicast traffic on the port receives IGMP Leave when IGMP snooping is enabled, no timeouts are observed. Cc: Herbert Xu Cc: Stephen Hemminger Cc: "David S. Miller" Signed-off-by: Cong Wang --- include/uapi/linux/if_link.h | 1 + net/bridge/br_multicast.c | 2 +- net/bridge/br_netlink.c | 4 +++- net/bridge/br_private.h | 2 +- net/bridge/br_sysfs_if.c | 19 +------------------ 5 files changed, 7 insertions(+), 21 deletions(-) (limited to 'net/bridge/br_private.h') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index bb58aeb7f34d..60f3b6b90602 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -218,6 +218,7 @@ enum { IFLA_BRPORT_MODE, /* mode (hairpin) */ IFLA_BRPORT_GUARD, /* bpdu guard */ IFLA_BRPORT_PROTECT, /* root port protection */ + IFLA_BRPORT_FAST_LEAVE, /* multicast fast leave */ __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 2391bae4f733..a2a7a1a79081 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1225,7 +1225,7 @@ static void br_multicast_leave_group(struct net_bridge *br, if (!mp) goto out; - if (port && port->multicast_fast_leave) { + if (port && (port->flags & BR_MULTICAST_FAST_LEAVE)) { struct net_bridge_port_group __rcu **pp; for (pp = &mp->ports; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 65429b99a2a3..850b7d1f3a41 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -53,7 +53,8 @@ static int br_port_fill_attrs(struct sk_buff *skb, nla_put_u32(skb, IFLA_BRPORT_COST, p->path_cost) || nla_put_u8(skb, IFLA_BRPORT_MODE, mode) || nla_put_u8(skb, IFLA_BRPORT_GUARD, !!(p->flags & BR_BPDU_GUARD)) || - nla_put_u8(skb, IFLA_BRPORT_PROTECT, !!(p->flags & BR_ROOT_BLOCK))) + nla_put_u8(skb, IFLA_BRPORT_PROTECT, !!(p->flags & BR_ROOT_BLOCK)) || + nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE, !!(p->flags & BR_MULTICAST_FAST_LEAVE))) return -EMSGSIZE; return 0; @@ -210,6 +211,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) br_set_port_flag(p, tb, IFLA_BRPORT_MODE, BR_HAIRPIN_MODE); br_set_port_flag(p, tb, IFLA_BRPORT_GUARD, BR_BPDU_GUARD); + br_set_port_flag(p, tb, IFLA_BRPORT_FAST_LEAVE, BR_MULTICAST_FAST_LEAVE); if (tb[IFLA_BRPORT_COST]) { err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST])); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index cdbf9047a659..cd86222cf5e3 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -137,11 +137,11 @@ struct net_bridge_port #define BR_HAIRPIN_MODE 0x00000001 #define BR_BPDU_GUARD 0x00000002 #define BR_ROOT_BLOCK 0x00000004 +#define BR_MULTICAST_FAST_LEAVE 0x00000008 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING u32 multicast_startup_queries_sent; unsigned char multicast_router; - unsigned char multicast_fast_leave; struct timer_list multicast_router_timer; struct timer_list multicast_query_timer; struct hlist_head mglist; diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index dc484ace0be3..a1ef1b6e14dc 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -173,24 +173,7 @@ static int store_multicast_router(struct net_bridge_port *p, static BRPORT_ATTR(multicast_router, S_IRUGO | S_IWUSR, show_multicast_router, store_multicast_router); -static ssize_t show_multicast_fast_leave(struct net_bridge_port *p, - char *buf) -{ - return sprintf(buf, "%d\n", p->multicast_fast_leave); -} - -static int store_multicast_fast_leave(struct net_bridge_port *p, - unsigned long v) -{ - if (p->br->multicast_disabled) - return -EINVAL; - - p->multicast_fast_leave = !!v; - return 0; -} - -static BRPORT_ATTR(multicast_fast_leave, S_IRUGO | S_IWUSR, - show_multicast_fast_leave, store_multicast_fast_leave); +BRPORT_ATTR_FLAG(multicast_fast_leave, BR_MULTICAST_FAST_LEAVE); #endif static const struct brport_attribute *brport_attrs[] = { -- cgit v1.2.3 From ee07c6e7a6f8a25c18f0a6b18152fbd7499245f6 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 7 Dec 2012 00:04:48 +0000 Subject: bridge: export multicast database via netlink V5: fix two bugs pointed out by Thomas remove seq check for now, mark it as TODO V4: remove some useless #include some coding style fix V3: drop debugging printk's update selinux perm table as well V2: drop patch 1/2, export ifindex directly Redesign netlink attributes Improve netlink seq check Handle IPv6 addr as well This patch exports bridge multicast database via netlink message type RTM_GETMDB. Similar to fdb, but currently bridge-specific. We may need to support modify multicast database too (RTM_{ADD,DEL}MDB). (Thanks to Thomas for patient reviews) Cc: Herbert Xu Cc: Stephen Hemminger Cc: "David S. Miller" Cc: Thomas Graf Cc: Jesper Dangaard Brouer Signed-off-by: Cong Wang Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 55 ++++++++++++++ include/uapi/linux/rtnetlink.h | 3 + net/bridge/Makefile | 2 +- net/bridge/br_mdb.c | 163 +++++++++++++++++++++++++++++++++++++++++ net/bridge/br_multicast.c | 1 + net/bridge/br_private.h | 1 + security/selinux/nlmsgtab.c | 1 + 7 files changed, 225 insertions(+), 1 deletion(-) create mode 100644 net/bridge/br_mdb.c (limited to 'net/bridge/br_private.h') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index b3885791e11e..9a0f6ff0d7e7 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -116,4 +116,59 @@ enum { __IFLA_BRIDGE_MAX, }; #define IFLA_BRIDGE_MAX (__IFLA_BRIDGE_MAX - 1) + +/* Bridge multicast database attributes + * [MDBA_MDB] = { + * [MDBA_MDB_ENTRY] = { + * [MDBA_MDB_ENTRY_INFO] + * } + * } + * [MDBA_ROUTER] = { + * [MDBA_ROUTER_PORT] + * } + */ +enum { + MDBA_UNSPEC, + MDBA_MDB, + MDBA_ROUTER, + __MDBA_MAX, +}; +#define MDBA_MAX (__MDBA_MAX - 1) + +enum { + MDBA_MDB_UNSPEC, + MDBA_MDB_ENTRY, + __MDBA_MDB_MAX, +}; +#define MDBA_MDB_MAX (__MDBA_MDB_MAX - 1) + +enum { + MDBA_MDB_ENTRY_UNSPEC, + MDBA_MDB_ENTRY_INFO, + __MDBA_MDB_ENTRY_MAX, +}; +#define MDBA_MDB_ENTRY_MAX (__MDBA_MDB_ENTRY_MAX - 1) + +enum { + MDBA_ROUTER_UNSPEC, + MDBA_ROUTER_PORT, + __MDBA_ROUTER_MAX, +}; +#define MDBA_ROUTER_MAX (__MDBA_ROUTER_MAX - 1) + +struct br_port_msg { + __u32 ifindex; +}; + +struct br_mdb_entry { + __u32 ifindex; + struct { + union { + __be32 ip4; + struct in6_addr ip6; + } u; + __be16 proto; + } addr; +}; + #endif /* _UAPI_LINUX_IF_BRIDGE_H */ diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 33d29cea37ea..354a1e7d32a3 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -125,6 +125,9 @@ enum { RTM_GETNETCONF = 82, #define RTM_GETNETCONF RTM_GETNETCONF + RTM_GETMDB = 86, +#define RTM_GETMDB RTM_GETMDB + __RTM_MAX, #define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1) }; diff --git a/net/bridge/Makefile b/net/bridge/Makefile index d0359ea8ee79..e859098f5ee9 100644 --- a/net/bridge/Makefile +++ b/net/bridge/Makefile @@ -12,6 +12,6 @@ bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o bridge-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o -bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o +bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o obj-$(CONFIG_BRIDGE_NF_EBTABLES) += netfilter/ diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c new file mode 100644 index 000000000000..edc0d731f6b2 --- /dev/null +++ b/net/bridge/br_mdb.c @@ -0,0 +1,163 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#if IS_ENABLED(CONFIG_IPV6) +#include +#endif + +#include "br_private.h" + +static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb, + struct net_device *dev) +{ + struct net_bridge *br = netdev_priv(dev); + struct net_bridge_port *p; + struct hlist_node *n; + struct nlattr *nest; + + if (!br->multicast_router || hlist_empty(&br->router_list)) + return 0; + + nest = nla_nest_start(skb, MDBA_ROUTER); + if (nest == NULL) + return -EMSGSIZE; + + hlist_for_each_entry_rcu(p, n, &br->router_list, rlist) { + if (p && nla_put_u32(skb, MDBA_ROUTER_PORT, p->dev->ifindex)) + goto fail; + } + + nla_nest_end(skb, nest); + return 0; +fail: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + +static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb, + struct net_device *dev) +{ + struct net_bridge *br = netdev_priv(dev); + struct net_bridge_mdb_htable *mdb; + struct nlattr *nest, *nest2; + int i, err = 0; + int idx = 0, s_idx = cb->args[1]; + + if (br->multicast_disabled) + return 0; + + mdb = rcu_dereference(br->mdb); + if (!mdb) + return 0; + + nest = nla_nest_start(skb, MDBA_MDB); + if (nest == NULL) + return -EMSGSIZE; + + for (i = 0; i < mdb->max; i++) { + struct hlist_node *h; + struct net_bridge_mdb_entry *mp; + struct net_bridge_port_group *p, **pp; + struct net_bridge_port *port; + + hlist_for_each_entry_rcu(mp, h, &mdb->mhash[i], hlist[mdb->ver]) { + if (idx < s_idx) + goto skip; + + nest2 = nla_nest_start(skb, MDBA_MDB_ENTRY); + if (nest2 == NULL) { + err = -EMSGSIZE; + goto out; + } + + for (pp = &mp->ports; + (p = rcu_dereference(*pp)) != NULL; + pp = &p->next) { + port = p->port; + if (port) { + struct br_mdb_entry e; + e.ifindex = port->dev->ifindex; + e.addr.u.ip4 = p->addr.u.ip4; +#if IS_ENABLED(CONFIG_IPV6) + e.addr.u.ip6 = p->addr.u.ip6; +#endif + e.addr.proto = p->addr.proto; + if (nla_put(skb, MDBA_MDB_ENTRY_INFO, sizeof(e), &e)) { + nla_nest_cancel(skb, nest2); + err = -EMSGSIZE; + goto out; + } + } + } + nla_nest_end(skb, nest2); + skip: + idx++; + } + } + +out: + cb->args[1] = idx; + nla_nest_end(skb, nest); + return err; +} + +static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net_device *dev; + struct net *net = sock_net(skb->sk); + struct nlmsghdr *nlh = NULL; + int idx = 0, s_idx; + + s_idx = cb->args[0]; + + rcu_read_lock(); + + /* TODO: in case of rehashing, we need to check + * consistency for dumping. + */ + cb->seq = net->dev_base_seq; + + for_each_netdev_rcu(net, dev) { + if (dev->priv_flags & IFF_EBRIDGE) { + struct br_port_msg *bpm; + + if (idx < s_idx) + goto skip; + + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, RTM_GETMDB, + sizeof(*bpm), NLM_F_MULTI); + if (nlh == NULL) + break; + + bpm = nlmsg_data(nlh); + bpm->ifindex = dev->ifindex; + if (br_mdb_fill_info(skb, cb, dev) < 0) + goto out; + if (br_rports_fill_info(skb, cb, dev) < 0) + goto out; + + cb->args[1] = 0; + nlmsg_end(skb, nlh); + skip: + idx++; + } + } + +out: + if (nlh) + nlmsg_end(skb, nlh); + rcu_read_unlock(); + cb->args[0] = idx; + return skb->len; +} + +void br_mdb_init(void) +{ + rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, NULL); +} diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index a2a7a1a79081..68e375ac93bd 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1605,6 +1605,7 @@ void br_multicast_init(struct net_bridge *br) br_multicast_querier_expired, (unsigned long)br); setup_timer(&br->multicast_query_timer, br_multicast_query_expired, (unsigned long)br); + br_mdb_init(); } void br_multicast_open(struct net_bridge *br) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index cd86222cf5e3..ae0a6ec0a702 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -433,6 +433,7 @@ extern int br_multicast_set_port_router(struct net_bridge_port *p, extern int br_multicast_toggle(struct net_bridge *br, unsigned long val); extern int br_multicast_set_querier(struct net_bridge *br, unsigned long val); extern int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val); +extern void br_mdb_init(void); static inline bool br_multicast_is_router(struct net_bridge *br) { diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c index d309e7f472d8..163aaa77d5aa 100644 --- a/security/selinux/nlmsgtab.c +++ b/security/selinux/nlmsgtab.c @@ -67,6 +67,7 @@ static struct nlmsg_perm nlmsg_route_perms[] = { RTM_GETADDRLABEL, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_GETDCB, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_SETDCB, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, + { RTM_GETMDB, NETLINK_ROUTE_SOCKET__NLMSG_READ }, }; static struct nlmsg_perm nlmsg_tcpdiag_perms[] = -- cgit v1.2.3 From 2ce297fc24d1f0b70c756d1f593e7a089a2d888d Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 10 Dec 2012 02:15:35 +0000 Subject: bridge: fix seq check in br_mdb_dump() In case of rehashing, introduce a global variable 'br_mdb_rehash_seq' which gets increased every time when rehashing, and assign net->dev_base_seq + br_mdb_rehash_seq to cb->seq. In theory cb->seq could be wrapped to zero, but this is not easy to fix, as net->dev_base_seq is not visible inside br_mdb_rehash(). In practice, this is rare. Cc: Herbert Xu Cc: Stephen Hemminger Cc: "David S. Miller" Cc: Thomas Graf Cc: Jesper Dangaard Brouer Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/bridge/br_mdb.c | 6 ++---- net/bridge/br_multicast.c | 2 ++ net/bridge/br_private.h | 1 + 3 files changed, 5 insertions(+), 4 deletions(-) (limited to 'net/bridge/br_private.h') diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index edc0d731f6b2..ccc43a9bff80 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -117,10 +117,8 @@ static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); - /* TODO: in case of rehashing, we need to check - * consistency for dumping. - */ - cb->seq = net->dev_base_seq; + /* In theory this could be wrapped to 0... */ + cb->seq = net->dev_base_seq + br_mdb_rehash_seq; for_each_netdev_rcu(net, dev) { if (dev->priv_flags & IFF_EBRIDGE) { diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 68e375ac93bd..847b98a1d5e0 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -37,6 +37,7 @@ rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock)) static void br_multicast_start_querier(struct net_bridge *br); +unsigned int br_mdb_rehash_seq; #if IS_ENABLED(CONFIG_IPV6) static inline int ipv6_is_transient_multicast(const struct in6_addr *addr) @@ -338,6 +339,7 @@ static int br_mdb_rehash(struct net_bridge_mdb_htable __rcu **mdbp, int max, return err; } + br_mdb_rehash_seq++; call_rcu_bh(&mdb->rcu, br_mdb_free); out: diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index ae0a6ec0a702..f95b766c7a98 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -411,6 +411,7 @@ extern int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __us /* br_multicast.c */ #ifdef CONFIG_BRIDGE_IGMP_SNOOPING +extern unsigned int br_mdb_rehash_seq; extern int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, struct sk_buff *skb); -- cgit v1.2.3 From 37a393bc4932d7bac360f40064aaafc01ab44901 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 11 Dec 2012 22:23:07 +0000 Subject: bridge: notify mdb changes via netlink As Stephen mentioned, we need to monitor the mdb changes in user-space, so add notifications via netlink too. Cc: Herbert Xu Cc: Stephen Hemminger Cc: "David S. Miller" Cc: Thomas Graf Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 6 ++++ net/bridge/br_mdb.c | 80 ++++++++++++++++++++++++++++++++++++++++++ net/bridge/br_multicast.c | 2 ++ net/bridge/br_private.h | 2 ++ 4 files changed, 90 insertions(+) (limited to 'net/bridge/br_private.h') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 354a1e7d32a3..7a5eb196ade9 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -125,6 +125,10 @@ enum { RTM_GETNETCONF = 82, #define RTM_GETNETCONF RTM_GETNETCONF + RTM_NEWMDB = 84, +#define RTM_NEWMDB RTM_NEWMDB + RTM_DELMDB = 85, +#define RTM_DELMDB RTM_DELMDB RTM_GETMDB = 86, #define RTM_GETMDB RTM_GETMDB @@ -607,6 +611,8 @@ enum rtnetlink_groups { #define RTNLGRP_IPV4_NETCONF RTNLGRP_IPV4_NETCONF RTNLGRP_IPV6_NETCONF, #define RTNLGRP_IPV6_NETCONF RTNLGRP_IPV6_NETCONF + RTNLGRP_MDB, +#define RTNLGRP_MDB RTNLGRP_MDB __RTNLGRP_MAX }; #define RTNLGRP_MAX (__RTNLGRP_MAX - 1) diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index ccc43a9bff80..a8cfbf5f3c68 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -155,6 +155,86 @@ out: return skb->len; } +static int nlmsg_populate_mdb_fill(struct sk_buff *skb, + struct net_device *dev, + struct br_mdb_entry *entry, u32 pid, + u32 seq, int type, unsigned int flags) +{ + struct nlmsghdr *nlh; + struct br_port_msg *bpm; + struct nlattr *nest, *nest2; + + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*bpm), NLM_F_MULTI); + if (!nlh) + return -EMSGSIZE; + + bpm = nlmsg_data(nlh); + bpm->family = AF_BRIDGE; + bpm->ifindex = dev->ifindex; + nest = nla_nest_start(skb, MDBA_MDB); + if (nest == NULL) + goto cancel; + nest2 = nla_nest_start(skb, MDBA_MDB_ENTRY); + if (nest2 == NULL) + goto end; + + if (nla_put(skb, MDBA_MDB_ENTRY_INFO, sizeof(*entry), entry)) + goto end; + + nla_nest_end(skb, nest2); + nla_nest_end(skb, nest); + return nlmsg_end(skb, nlh); + +end: + nla_nest_end(skb, nest); +cancel: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static inline size_t rtnl_mdb_nlmsg_size(void) +{ + return NLMSG_ALIGN(sizeof(struct br_port_msg)) + + nla_total_size(sizeof(struct br_mdb_entry)); +} + +static void __br_mdb_notify(struct net_device *dev, struct br_mdb_entry *entry, + int type) +{ + struct net *net = dev_net(dev); + struct sk_buff *skb; + int err = -ENOBUFS; + + skb = nlmsg_new(rtnl_mdb_nlmsg_size(), GFP_ATOMIC); + if (!skb) + goto errout; + + err = nlmsg_populate_mdb_fill(skb, dev, entry, 0, 0, type, NTF_SELF); + if (err < 0) { + kfree_skb(skb); + goto errout; + } + + rtnl_notify(skb, net, 0, RTNLGRP_MDB, NULL, GFP_ATOMIC); + return; +errout: + rtnl_set_sk_err(net, RTNLGRP_MDB, err); +} + +void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, + struct br_ip *group, int type) +{ + struct br_mdb_entry entry; + + entry.ifindex = port->dev->ifindex; + entry.addr.proto = group->proto; + entry.addr.u.ip4 = group->u.ip4; +#if IS_ENABLED(CONFIG_IPV6) + entry.addr.u.ip6 = group->u.ip6; +#endif + __br_mdb_notify(dev, &entry, type); +} + void br_mdb_init(void) { rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, NULL); diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 847b98a1d5e0..d929586ce39e 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -681,6 +681,7 @@ static int br_multicast_add_group(struct net_bridge *br, (unsigned long)p); rcu_assign_pointer(*pp, p); + br_mdb_notify(br->dev, port, group, RTM_NEWMDB); found: mod_timer(&p->timer, now + br->multicast_membership_interval); @@ -1240,6 +1241,7 @@ static void br_multicast_leave_group(struct net_bridge *br, hlist_del_init(&p->mglist); del_timer(&p->timer); call_rcu_bh(&p->rcu, br_multicast_free_pg); + br_mdb_notify(br->dev, port, group, RTM_DELMDB); if (!mp->ports && !mp->mglist && netif_running(br->dev)) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index f95b766c7a98..2807c7680c38 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -435,6 +435,8 @@ extern int br_multicast_toggle(struct net_bridge *br, unsigned long val); extern int br_multicast_set_querier(struct net_bridge *br, unsigned long val); extern int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val); extern void br_mdb_init(void); +extern void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, + struct br_ip *group, int type); static inline bool br_multicast_is_router(struct net_bridge *br) { -- cgit v1.2.3 From cfd567543590f71ca0af397437e2554f9756d750 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 11 Dec 2012 22:23:08 +0000 Subject: bridge: add support of adding and deleting mdb entries This patch implents adding/deleting mdb entries via netlink. Currently all entries are temp, we probably need a flag to distinguish permanent entries too. Cc: Herbert Xu Cc: Stephen Hemminger Cc: "David S. Miller" Cc: Thomas Graf Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 8 ++ net/bridge/br_mdb.c | 240 +++++++++++++++++++++++++++++++++++++++++ net/bridge/br_multicast.c | 55 +++++----- net/bridge/br_private.h | 23 ++++ 4 files changed, 297 insertions(+), 29 deletions(-) (limited to 'net/bridge/br_private.h') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 9a0f6ff0d7e7..afbb18a0227c 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -157,6 +157,7 @@ enum { #define MDBA_ROUTER_MAX (__MDBA_ROUTER_MAX - 1) struct br_port_msg { + __u8 family; __u32 ifindex; }; @@ -171,4 +172,11 @@ struct br_mdb_entry { } addr; }; +enum { + MDBA_SET_ENTRY_UNSPEC, + MDBA_SET_ENTRY, + __MDBA_SET_ENTRY_MAX, +}; +#define MDBA_SET_ENTRY_MAX (__MDBA_SET_ENTRY_MAX - 1) + #endif /* _UAPI_LINUX_IF_BRIDGE_H */ diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index a8cfbf5f3c68..6f0a2eebcb27 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #if IS_ENABLED(CONFIG_IPV6) @@ -235,7 +236,246 @@ void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, __br_mdb_notify(dev, &entry, type); } +static bool is_valid_mdb_entry(struct br_mdb_entry *entry) +{ + if (entry->ifindex == 0) + return false; + + if (entry->addr.proto == htons(ETH_P_IP)) { + if (!ipv4_is_multicast(entry->addr.u.ip4)) + return false; + if (ipv4_is_local_multicast(entry->addr.u.ip4)) + return false; +#if IS_ENABLED(CONFIG_IPV6) + } else if (entry->addr.proto == htons(ETH_P_IPV6)) { + if (!ipv6_is_transient_multicast(&entry->addr.u.ip6)) + return false; +#endif + } else + return false; + + return true; +} + +static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh, + struct net_device **pdev, struct br_mdb_entry **pentry) +{ + struct net *net = sock_net(skb->sk); + struct br_mdb_entry *entry; + struct br_port_msg *bpm; + struct nlattr *tb[MDBA_SET_ENTRY_MAX+1]; + struct net_device *dev; + int err; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + err = nlmsg_parse(nlh, sizeof(*bpm), tb, MDBA_SET_ENTRY, NULL); + if (err < 0) + return err; + + bpm = nlmsg_data(nlh); + if (bpm->ifindex == 0) { + pr_info("PF_BRIDGE: br_mdb_parse() with invalid ifindex\n"); + return -EINVAL; + } + + dev = __dev_get_by_index(net, bpm->ifindex); + if (dev == NULL) { + pr_info("PF_BRIDGE: br_mdb_parse() with unknown ifindex\n"); + return -ENODEV; + } + + if (!(dev->priv_flags & IFF_EBRIDGE)) { + pr_info("PF_BRIDGE: br_mdb_parse() with non-bridge\n"); + return -EOPNOTSUPP; + } + + *pdev = dev; + + if (!tb[MDBA_SET_ENTRY] || + nla_len(tb[MDBA_SET_ENTRY]) != sizeof(struct br_mdb_entry)) { + pr_info("PF_BRIDGE: br_mdb_parse() with invalid attr\n"); + return -EINVAL; + } + + entry = nla_data(tb[MDBA_SET_ENTRY]); + if (!is_valid_mdb_entry(entry)) { + pr_info("PF_BRIDGE: br_mdb_parse() with invalid entry\n"); + return -EINVAL; + } + + *pentry = entry; + return 0; +} + +static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, + struct br_ip *group) +{ + struct net_bridge_mdb_entry *mp; + struct net_bridge_port_group *p; + struct net_bridge_port_group __rcu **pp; + struct net_bridge_mdb_htable *mdb; + int err; + + mdb = mlock_dereference(br->mdb, br); + mp = br_mdb_ip_get(mdb, group); + if (!mp) { + mp = br_multicast_new_group(br, port, group); + err = PTR_ERR(mp); + if (IS_ERR(mp)) + return err; + } + + for (pp = &mp->ports; + (p = mlock_dereference(*pp, br)) != NULL; + pp = &p->next) { + if (p->port == port) + return -EEXIST; + if ((unsigned long)p->port < (unsigned long)port) + break; + } + + p = br_multicast_new_port_group(port, group, *pp); + if (unlikely(!p)) + return -ENOMEM; + rcu_assign_pointer(*pp, p); + + br_mdb_notify(br->dev, port, group, RTM_NEWMDB); + return 0; +} + +static int __br_mdb_add(struct net *net, struct net_bridge *br, + struct br_mdb_entry *entry) +{ + struct br_ip ip; + struct net_device *dev; + struct net_bridge_port *p; + int ret; + + if (!netif_running(br->dev) || br->multicast_disabled) + return -EINVAL; + + dev = __dev_get_by_index(net, entry->ifindex); + if (!dev) + return -ENODEV; + + p = br_port_get_rtnl(dev); + if (!p || p->br != br || p->state == BR_STATE_DISABLED) + return -EINVAL; + + ip.proto = entry->addr.proto; + if (ip.proto == htons(ETH_P_IP)) + ip.u.ip4 = entry->addr.u.ip4; +#if IS_ENABLED(CONFIG_IPV6) + else + ip.u.ip6 = entry->addr.u.ip6; +#endif + + spin_lock_bh(&br->multicast_lock); + ret = br_mdb_add_group(br, p, &ip); + spin_unlock_bh(&br->multicast_lock); + return ret; +} + +static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +{ + struct net *net = sock_net(skb->sk); + struct br_mdb_entry *entry; + struct net_device *dev; + struct net_bridge *br; + int err; + + err = br_mdb_parse(skb, nlh, &dev, &entry); + if (err < 0) + return err; + + br = netdev_priv(dev); + + err = __br_mdb_add(net, br, entry); + if (!err) + __br_mdb_notify(dev, entry, RTM_NEWMDB); + return err; +} + +static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) +{ + struct net_bridge_mdb_htable *mdb; + struct net_bridge_mdb_entry *mp; + struct net_bridge_port_group *p; + struct net_bridge_port_group __rcu **pp; + struct br_ip ip; + int err = -EINVAL; + + if (!netif_running(br->dev) || br->multicast_disabled) + return -EINVAL; + + if (timer_pending(&br->multicast_querier_timer)) + return -EBUSY; + + ip.proto = entry->addr.proto; + if (ip.proto == htons(ETH_P_IP)) + ip.u.ip4 = entry->addr.u.ip4; +#if IS_ENABLED(CONFIG_IPV6) + else + ip.u.ip6 = entry->addr.u.ip6; +#endif + + spin_lock_bh(&br->multicast_lock); + mdb = mlock_dereference(br->mdb, br); + + mp = br_mdb_ip_get(mdb, &ip); + if (!mp) + goto unlock; + + for (pp = &mp->ports; + (p = mlock_dereference(*pp, br)) != NULL; + pp = &p->next) { + if (!p->port || p->port->dev->ifindex != entry->ifindex) + continue; + + if (p->port->state == BR_STATE_DISABLED) + goto unlock; + + rcu_assign_pointer(*pp, p->next); + hlist_del_init(&p->mglist); + del_timer(&p->timer); + call_rcu_bh(&p->rcu, br_multicast_free_pg); + err = 0; + + if (!mp->ports && !mp->mglist && + netif_running(br->dev)) + mod_timer(&mp->timer, jiffies); + break; + } + +unlock: + spin_unlock_bh(&br->multicast_lock); + return err; +} + +static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +{ + struct net_device *dev; + struct br_mdb_entry *entry; + struct net_bridge *br; + int err; + + err = br_mdb_parse(skb, nlh, &dev, &entry); + if (err < 0) + return err; + + br = netdev_priv(dev); + + err = __br_mdb_del(br, entry); + if (!err) + __br_mdb_notify(dev, entry, RTM_DELMDB); + return err; +} + void br_mdb_init(void) { rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, NULL); + rtnl_register(PF_BRIDGE, RTM_NEWMDB, br_mdb_add, NULL, NULL); + rtnl_register(PF_BRIDGE, RTM_DELMDB, br_mdb_del, NULL, NULL); } diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index d929586ce39e..977c3ee02e65 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -27,27 +27,14 @@ #if IS_ENABLED(CONFIG_IPV6) #include #include -#include #include #endif #include "br_private.h" -#define mlock_dereference(X, br) \ - rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock)) - static void br_multicast_start_querier(struct net_bridge *br); unsigned int br_mdb_rehash_seq; -#if IS_ENABLED(CONFIG_IPV6) -static inline int ipv6_is_transient_multicast(const struct in6_addr *addr) -{ - if (ipv6_addr_is_multicast(addr) && IPV6_ADDR_MC_FLAG_TRANSIENT(addr)) - return 1; - return 0; -} -#endif - static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b) { if (a->proto != b->proto) @@ -104,8 +91,8 @@ static struct net_bridge_mdb_entry *__br_mdb_ip_get( return NULL; } -static struct net_bridge_mdb_entry *br_mdb_ip_get( - struct net_bridge_mdb_htable *mdb, struct br_ip *dst) +struct net_bridge_mdb_entry *br_mdb_ip_get(struct net_bridge_mdb_htable *mdb, + struct br_ip *dst) { if (!mdb) return NULL; @@ -208,7 +195,7 @@ static int br_mdb_copy(struct net_bridge_mdb_htable *new, return maxlen > elasticity ? -EINVAL : 0; } -static void br_multicast_free_pg(struct rcu_head *head) +void br_multicast_free_pg(struct rcu_head *head) { struct net_bridge_port_group *p = container_of(head, struct net_bridge_port_group, rcu); @@ -584,9 +571,8 @@ err: return mp; } -static struct net_bridge_mdb_entry *br_multicast_new_group( - struct net_bridge *br, struct net_bridge_port *port, - struct br_ip *group) +struct net_bridge_mdb_entry *br_multicast_new_group(struct net_bridge *br, + struct net_bridge_port *port, struct br_ip *group) { struct net_bridge_mdb_htable *mdb; struct net_bridge_mdb_entry *mp; @@ -633,6 +619,26 @@ out: return mp; } +struct net_bridge_port_group *br_multicast_new_port_group( + struct net_bridge_port *port, + struct br_ip *group, + struct net_bridge_port_group *next) +{ + struct net_bridge_port_group *p; + + p = kzalloc(sizeof(*p), GFP_ATOMIC); + if (unlikely(!p)) + return NULL; + + p->addr = *group; + p->port = port; + p->next = next; + hlist_add_head(&p->mglist, &port->mglist); + setup_timer(&p->timer, br_multicast_port_group_expired, + (unsigned long)p); + return p; +} + static int br_multicast_add_group(struct net_bridge *br, struct net_bridge_port *port, struct br_ip *group) @@ -668,18 +674,9 @@ static int br_multicast_add_group(struct net_bridge *br, break; } - p = kzalloc(sizeof(*p), GFP_ATOMIC); - err = -ENOMEM; + p = br_multicast_new_port_group(port, group, *pp); if (unlikely(!p)) goto err; - - p->addr = *group; - p->port = port; - p->next = *pp; - hlist_add_head(&p->mglist, &port->mglist); - setup_timer(&p->timer, br_multicast_port_group_expired, - (unsigned long)p); - rcu_assign_pointer(*pp, p); br_mdb_notify(br->dev, port, group, RTM_NEWMDB); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 2807c7680c38..f21a739a6186 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -434,10 +434,33 @@ extern int br_multicast_set_port_router(struct net_bridge_port *p, extern int br_multicast_toggle(struct net_bridge *br, unsigned long val); extern int br_multicast_set_querier(struct net_bridge *br, unsigned long val); extern int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val); +extern struct net_bridge_mdb_entry *br_mdb_ip_get( + struct net_bridge_mdb_htable *mdb, + struct br_ip *dst); +extern struct net_bridge_mdb_entry *br_multicast_new_group(struct net_bridge *br, + struct net_bridge_port *port, struct br_ip *group); +extern void br_multicast_free_pg(struct rcu_head *head); +extern struct net_bridge_port_group *br_multicast_new_port_group( + struct net_bridge_port *port, + struct br_ip *group, + struct net_bridge_port_group *next); extern void br_mdb_init(void); extern void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, struct br_ip *group, int type); +#define mlock_dereference(X, br) \ + rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock)) + +#if IS_ENABLED(CONFIG_IPV6) +#include +static inline int ipv6_is_transient_multicast(const struct in6_addr *addr) +{ + if (ipv6_addr_is_multicast(addr) && IPV6_ADDR_MC_FLAG_TRANSIENT(addr)) + return 1; + return 0; +} +#endif + static inline bool br_multicast_is_router(struct net_bridge *br) { return br->multicast_router == 2 || -- cgit v1.2.3 From ccb1c31a7a8744cd153a7d92b726a56b56ad61d3 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Fri, 14 Dec 2012 22:09:51 +0000 Subject: bridge: add flags to distinguish permanent mdb entires This patch adds a flag to each mdb entry, so that we can distinguish permanent entries with temporary entries. Cc: Herbert Xu Cc: Stephen Hemminger Cc: "David S. Miller" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 3 +++ net/bridge/br_mdb.c | 9 ++++++--- net/bridge/br_multicast.c | 8 +++++--- net/bridge/br_private.h | 4 +++- 4 files changed, 17 insertions(+), 7 deletions(-) (limited to 'net/bridge/br_private.h') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index afbb18a0227c..5db297514aec 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -163,6 +163,9 @@ struct br_port_msg { struct br_mdb_entry { __u32 ifindex; +#define MDB_TEMPORARY 0 +#define MDB_PERMANENT 1 + __u8 state; struct { union { __be32 ip4; diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 6f0a2eebcb27..9cf5d2b28c76 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -83,6 +83,7 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb, if (port) { struct br_mdb_entry e; e.ifindex = port->dev->ifindex; + e.state = p->state; e.addr.u.ip4 = p->addr.u.ip4; #if IS_ENABLED(CONFIG_IPV6) e.addr.u.ip6 = p->addr.u.ip6; @@ -253,6 +254,8 @@ static bool is_valid_mdb_entry(struct br_mdb_entry *entry) #endif } else return false; + if (entry->state != MDB_PERMANENT && entry->state != MDB_TEMPORARY) + return false; return true; } @@ -310,7 +313,7 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh, } static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, - struct br_ip *group) + struct br_ip *group, unsigned char state) { struct net_bridge_mdb_entry *mp; struct net_bridge_port_group *p; @@ -336,7 +339,7 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, break; } - p = br_multicast_new_port_group(port, group, *pp); + p = br_multicast_new_port_group(port, group, *pp, state); if (unlikely(!p)) return -ENOMEM; rcu_assign_pointer(*pp, p); @@ -373,7 +376,7 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br, #endif spin_lock_bh(&br->multicast_lock); - ret = br_mdb_add_group(br, p, &ip); + ret = br_mdb_add_group(br, p, &ip, entry->state); spin_unlock_bh(&br->multicast_lock); return ret; } diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 2561af9d18a2..dce9defae3c6 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -279,7 +279,7 @@ static void br_multicast_port_group_expired(unsigned long data) spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || timer_pending(&pg->timer) || - hlist_unhashed(&pg->mglist)) + hlist_unhashed(&pg->mglist) || pg->state & MDB_PERMANENT) goto out; br_multicast_del_pg(br, pg); @@ -622,7 +622,8 @@ out: struct net_bridge_port_group *br_multicast_new_port_group( struct net_bridge_port *port, struct br_ip *group, - struct net_bridge_port_group __rcu *next) + struct net_bridge_port_group __rcu *next, + unsigned char state) { struct net_bridge_port_group *p; @@ -632,6 +633,7 @@ struct net_bridge_port_group *br_multicast_new_port_group( p->addr = *group; p->port = port; + p->state = state; rcu_assign_pointer(p->next, next); hlist_add_head(&p->mglist, &port->mglist); setup_timer(&p->timer, br_multicast_port_group_expired, @@ -674,7 +676,7 @@ static int br_multicast_add_group(struct net_bridge *br, break; } - p = br_multicast_new_port_group(port, group, *pp); + p = br_multicast_new_port_group(port, group, *pp, MDB_TEMPORARY); if (unlikely(!p)) goto err; rcu_assign_pointer(*pp, p); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index f21a739a6186..49b85af44016 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -83,6 +83,7 @@ struct net_bridge_port_group { struct rcu_head rcu; struct timer_list timer; struct br_ip addr; + unsigned char state; }; struct net_bridge_mdb_entry @@ -443,7 +444,8 @@ extern void br_multicast_free_pg(struct rcu_head *head); extern struct net_bridge_port_group *br_multicast_new_port_group( struct net_bridge_port *port, struct br_ip *group, - struct net_bridge_port_group *next); + struct net_bridge_port_group *next, + unsigned char state); extern void br_mdb_init(void); extern void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, struct br_ip *group, int type); -- cgit v1.2.3 From 63233159fd4e596568f5f168ecb0879b61631d47 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 19 Dec 2012 09:13:48 +0000 Subject: bridge: Do not unregister all PF_BRIDGE rtnl operations Bridge fdb and link rtnl operations are registered in core/rtnetlink. Bridge mdb operations are registred in bridge/mdb. When removing bridge module, do not unregister ALL PF_BRIDGE ops since that would remove the ops from rtnetlink as well. Do remove mdb ops when bridge is destroyed. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/bridge/br_mdb.c | 7 +++++++ net/bridge/br_multicast.c | 1 + net/bridge/br_netlink.c | 1 - net/bridge/br_private.h | 1 + 4 files changed, 9 insertions(+), 1 deletion(-) (limited to 'net/bridge/br_private.h') diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 9cf5d2b28c76..3e05cc32da50 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -482,3 +482,10 @@ void br_mdb_init(void) rtnl_register(PF_BRIDGE, RTM_NEWMDB, br_mdb_add, NULL, NULL); rtnl_register(PF_BRIDGE, RTM_DELMDB, br_mdb_del, NULL, NULL); } + +void br_mdb_uninit(void) +{ + rtnl_unregister(PF_BRIDGE, RTM_GETMDB); + rtnl_unregister(PF_BRIDGE, RTM_NEWMDB); + rtnl_unregister(PF_BRIDGE, RTM_DELMDB); +} diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index dce9defae3c6..5391ca43336a 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1633,6 +1633,7 @@ void br_multicast_stop(struct net_bridge *br) del_timer_sync(&br->multicast_querier_timer); del_timer_sync(&br->multicast_query_timer); + br_mdb_uninit(); spin_lock_bh(&br->multicast_lock); mdb = mlock_dereference(br->mdb, br); if (!mdb) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index dead9dfe865b..97ba0189c6f7 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -305,5 +305,4 @@ int __init br_netlink_init(void) void __exit br_netlink_fini(void) { rtnl_link_unregister(&br_link_ops); - rtnl_unregister_all(PF_BRIDGE); } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 49b85af44016..8d83be5ffedc 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -447,6 +447,7 @@ extern struct net_bridge_port_group *br_multicast_new_port_group( struct net_bridge_port_group *next, unsigned char state); extern void br_mdb_init(void); +extern void br_mdb_uninit(void); extern void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, struct br_ip *group, int type); -- cgit v1.2.3 From fdb184d1467f9ed5b17c553b85eb1bb72bdbf62f Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Thu, 3 Jan 2013 13:30:43 +0200 Subject: bridge: add empty br_mdb_init() and br_mdb_uninit() definitions. This patch adds empty br_mdb_init() and br_mdb_uninit() definitions in br_private.h to avoid build failure when CONFIG_BRIDGE_IGMP_SNOOPING is not set. These methods were moved from br_multicast.c to br_netlink.c by commit 3ec8e9f085bcaef0de1077f555c2c5102c223390 Signed-off-by: Rami Rosen Signed-off-by: David S. Miller --- net/bridge/br_private.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net/bridge/br_private.h') diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 8d83be5ffedc..711094aed41a 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -526,6 +526,12 @@ static inline bool br_multicast_is_router(struct net_bridge *br) { return 0; } +static inline void br_mdb_init(void) +{ +} +static inline void br_mdb_uninit(void) +{ +} #endif /* br_netfilter.c */ -- cgit v1.2.3