diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /net/bridge | |
download | lwn-1da177e4c3f41524e886b7f1b8a0c1fc7321cac2.tar.gz lwn-1da177e4c3f41524e886b7f1b8a0c1fc7321cac2.zip |
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'net/bridge')
40 files changed, 8982 insertions, 0 deletions
diff --git a/net/bridge/Makefile b/net/bridge/Makefile new file mode 100644 index 000000000000..59556e40e143 --- /dev/null +++ b/net/bridge/Makefile @@ -0,0 +1,15 @@ +# +# Makefile for the IEEE 802.1d ethernet bridging layer. +# + +obj-$(CONFIG_BRIDGE) += bridge.o + +bridge-y := br.o br_device.o br_fdb.o br_forward.o br_if.o br_input.o \ + br_ioctl.o br_notify.o br_stp.o br_stp_bpdu.o \ + br_stp_if.o br_stp_timer.o + +bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o + +bridge-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o + +obj-$(CONFIG_BRIDGE_NF_EBTABLES) += netfilter/ diff --git a/net/bridge/br.c b/net/bridge/br.c new file mode 100644 index 000000000000..f8f184942aaf --- /dev/null +++ b/net/bridge/br.c @@ -0,0 +1,69 @@ +/* + * Generic parts + * Linux ethernet bridge + * + * Authors: + * Lennert Buytenhek <buytenh@gnu.org> + * + * $Id: br.c,v 1.47 2001/12/24 00:56:41 davem Exp $ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/init.h> + +#include "br_private.h" + +int (*br_should_route_hook) (struct sk_buff **pskb) = NULL; + +static int __init br_init(void) +{ + br_fdb_init(); + +#ifdef CONFIG_BRIDGE_NETFILTER + if (br_netfilter_init()) + return 1; +#endif + brioctl_set(br_ioctl_deviceless_stub); + br_handle_frame_hook = br_handle_frame; + + br_fdb_get_hook = br_fdb_get; + br_fdb_put_hook = br_fdb_put; + + register_netdevice_notifier(&br_device_notifier); + + return 0; +} + +static void __exit br_deinit(void) +{ +#ifdef CONFIG_BRIDGE_NETFILTER + br_netfilter_fini(); +#endif + unregister_netdevice_notifier(&br_device_notifier); + brioctl_set(NULL); + + br_cleanup_bridges(); + + synchronize_net(); + + br_fdb_get_hook = NULL; + br_fdb_put_hook = NULL; + + br_handle_frame_hook = NULL; + br_fdb_fini(); +} + +EXPORT_SYMBOL(br_should_route_hook); + +module_init(br_init) +module_exit(br_deinit) +MODULE_LICENSE("GPL"); diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c new file mode 100644 index 000000000000..d9b72fde433c --- /dev/null +++ b/net/bridge/br_device.c @@ -0,0 +1,104 @@ +/* + * Device handling code + * Linux ethernet bridge + * + * Authors: + * Lennert Buytenhek <buytenh@gnu.org> + * + * $Id: br_device.c,v 1.6 2001/12/24 00:59:55 davem Exp $ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/module.h> +#include <asm/uaccess.h> +#include "br_private.h" + +static struct net_device_stats *br_dev_get_stats(struct net_device *dev) +{ + struct net_bridge *br; + + br = dev->priv; + + return &br->statistics; +} + +int br_dev_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct net_bridge *br = netdev_priv(dev); + const unsigned char *dest = skb->data; + struct net_bridge_fdb_entry *dst; + + br->statistics.tx_packets++; + br->statistics.tx_bytes += skb->len; + + skb->mac.raw = skb->data; + skb_pull(skb, ETH_HLEN); + + rcu_read_lock(); + if (dest[0] & 1) + br_flood_deliver(br, skb, 0); + else if ((dst = __br_fdb_get(br, dest)) != NULL) + br_deliver(dst->dst, skb); + else + br_flood_deliver(br, skb, 0); + + rcu_read_unlock(); + return 0; +} + +static int br_dev_open(struct net_device *dev) +{ + netif_start_queue(dev); + + br_stp_enable_bridge(dev->priv); + + return 0; +} + +static void br_dev_set_multicast_list(struct net_device *dev) +{ +} + +static int br_dev_stop(struct net_device *dev) +{ + br_stp_disable_bridge(dev->priv); + + netif_stop_queue(dev); + + return 0; +} + +static int br_change_mtu(struct net_device *dev, int new_mtu) +{ + if ((new_mtu < 68) || new_mtu > br_min_mtu(dev->priv)) + return -EINVAL; + + dev->mtu = new_mtu; + return 0; +} + +void br_dev_setup(struct net_device *dev) +{ + memset(dev->dev_addr, 0, ETH_ALEN); + + ether_setup(dev); + + dev->do_ioctl = br_dev_ioctl; + dev->get_stats = br_dev_get_stats; + dev->hard_start_xmit = br_dev_xmit; + dev->open = br_dev_open; + dev->set_multicast_list = br_dev_set_multicast_list; + dev->change_mtu = br_change_mtu; + dev->destructor = free_netdev; + SET_MODULE_OWNER(dev); + dev->stop = br_dev_stop; + dev->tx_queue_len = 0; + dev->set_mac_address = NULL; + dev->priv_flags = IFF_EBRIDGE; +} diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c new file mode 100644 index 000000000000..e6c2200b7ca3 --- /dev/null +++ b/net/bridge/br_fdb.c @@ -0,0 +1,368 @@ +/* + * Forwarding database + * Linux ethernet bridge + * + * Authors: + * Lennert Buytenhek <buytenh@gnu.org> + * + * $Id: br_fdb.c,v 1.6 2002/01/17 00:57:07 davem Exp $ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/spinlock.h> +#include <linux/times.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/jhash.h> +#include <asm/atomic.h> +#include "br_private.h" + +static kmem_cache_t *br_fdb_cache; +static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, + const unsigned char *addr); + +void __init br_fdb_init(void) +{ + br_fdb_cache = kmem_cache_create("bridge_fdb_cache", + sizeof(struct net_bridge_fdb_entry), + 0, + SLAB_HWCACHE_ALIGN, NULL, NULL); +} + +void __exit br_fdb_fini(void) +{ + kmem_cache_destroy(br_fdb_cache); +} + + +/* if topology_changing then use forward_delay (default 15 sec) + * otherwise keep longer (default 5 minutes) + */ +static __inline__ unsigned long hold_time(const struct net_bridge *br) +{ + return br->topology_change ? br->forward_delay : br->ageing_time; +} + +static __inline__ int has_expired(const struct net_bridge *br, + const struct net_bridge_fdb_entry *fdb) +{ + return !fdb->is_static + && time_before_eq(fdb->ageing_timer + hold_time(br), jiffies); +} + +static __inline__ int br_mac_hash(const unsigned char *mac) +{ + return jhash(mac, ETH_ALEN, 0) & (BR_HASH_SIZE - 1); +} + +static __inline__ void fdb_delete(struct net_bridge_fdb_entry *f) +{ + hlist_del_rcu(&f->hlist); + br_fdb_put(f); +} + +void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr) +{ + struct net_bridge *br = p->br; + int i; + + spin_lock_bh(&br->hash_lock); + + /* Search all chains since old address/hash is unknown */ + for (i = 0; i < BR_HASH_SIZE; i++) { + struct hlist_node *h; + hlist_for_each(h, &br->hash[i]) { + struct net_bridge_fdb_entry *f; + + f = hlist_entry(h, struct net_bridge_fdb_entry, hlist); + if (f->dst == p && f->is_local) { + /* maybe another port has same hw addr? */ + struct net_bridge_port *op; + list_for_each_entry(op, &br->port_list, list) { + if (op != p && + !memcmp(op->dev->dev_addr, + f->addr.addr, ETH_ALEN)) { + f->dst = op; + goto insert; + } + } + + /* delete old one */ + fdb_delete(f); + goto insert; + } + } + } + insert: + /* insert new address, may fail if invalid address or dup. */ + fdb_insert(br, p, newaddr); + + spin_unlock_bh(&br->hash_lock); +} + +void br_fdb_cleanup(unsigned long _data) +{ + struct net_bridge *br = (struct net_bridge *)_data; + unsigned long delay = hold_time(br); + int i; + + spin_lock_bh(&br->hash_lock); + for (i = 0; i < BR_HASH_SIZE; i++) { + struct net_bridge_fdb_entry *f; + struct hlist_node *h, *n; + + hlist_for_each_entry_safe(f, h, n, &br->hash[i], hlist) { + if (!f->is_static && + time_before_eq(f->ageing_timer + delay, jiffies)) + fdb_delete(f); + } + } + spin_unlock_bh(&br->hash_lock); + + mod_timer(&br->gc_timer, jiffies + HZ/10); +} + +void br_fdb_delete_by_port(struct net_bridge *br, struct net_bridge_port *p) +{ + int i; + + spin_lock_bh(&br->hash_lock); + for (i = 0; i < BR_HASH_SIZE; i++) { + struct hlist_node *h, *g; + + hlist_for_each_safe(h, g, &br->hash[i]) { + struct net_bridge_fdb_entry *f + = hlist_entry(h, struct net_bridge_fdb_entry, hlist); + if (f->dst != p) + continue; + + /* + * if multiple ports all have the same device address + * then when one port is deleted, assign + * the local entry to other port + */ + if (f->is_local) { + struct net_bridge_port *op; + list_for_each_entry(op, &br->port_list, list) { + if (op != p && + !memcmp(op->dev->dev_addr, + f->addr.addr, ETH_ALEN)) { + f->dst = op; + goto skip_delete; + } + } + } + + fdb_delete(f); + skip_delete: ; + } + } + spin_unlock_bh(&br->hash_lock); +} + +/* No locking or refcounting, assumes caller has no preempt (rcu_read_lock) */ +struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br, + const unsigned char *addr) +{ + struct hlist_node *h; + struct net_bridge_fdb_entry *fdb; + + hlist_for_each_entry_rcu(fdb, h, &br->hash[br_mac_hash(addr)], hlist) { + if (!memcmp(fdb->addr.addr, addr, ETH_ALEN)) { + if (unlikely(has_expired(br, fdb))) + break; + return fdb; + } + } + + return NULL; +} + +/* Interface used by ATM hook that keeps a ref count */ +struct net_bridge_fdb_entry *br_fdb_get(struct net_bridge *br, + unsigned char *addr) +{ + struct net_bridge_fdb_entry *fdb; + + rcu_read_lock(); + fdb = __br_fdb_get(br, addr); + if (fdb) + atomic_inc(&fdb->use_count); + rcu_read_unlock(); + return fdb; +} + +static void fdb_rcu_free(struct rcu_head *head) +{ + struct net_bridge_fdb_entry *ent + = container_of(head, struct net_bridge_fdb_entry, rcu); + kmem_cache_free(br_fdb_cache, ent); +} + +/* Set entry up for deletion with RCU */ +void br_fdb_put(struct net_bridge_fdb_entry *ent) +{ + if (atomic_dec_and_test(&ent->use_count)) + call_rcu(&ent->rcu, fdb_rcu_free); +} + +/* + * Fill buffer with forwarding table records in + * the API format. + */ +int br_fdb_fillbuf(struct net_bridge *br, void *buf, + unsigned long maxnum, unsigned long skip) +{ + struct __fdb_entry *fe = buf; + int i, num = 0; + struct hlist_node *h; + struct net_bridge_fdb_entry *f; + + memset(buf, 0, maxnum*sizeof(struct __fdb_entry)); + + rcu_read_lock(); + for (i = 0; i < BR_HASH_SIZE; i++) { + hlist_for_each_entry_rcu(f, h, &br->hash[i], hlist) { + if (num >= maxnum) + goto out; + + if (has_expired(br, f)) + continue; + + if (skip) { + --skip; + continue; + } + + /* convert from internal format to API */ + memcpy(fe->mac_addr, f->addr.addr, ETH_ALEN); + fe->port_no = f->dst->port_no; + fe->is_local = f->is_local; + if (!f->is_static) + fe->ageing_timer_value = jiffies_to_clock_t(jiffies - f->ageing_timer); + ++fe; + ++num; + } + } + + out: + rcu_read_unlock(); + + return num; +} + +static inline struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head, + const unsigned char *addr) +{ + struct hlist_node *h; + struct net_bridge_fdb_entry *fdb; + + hlist_for_each_entry_rcu(fdb, h, head, hlist) { + if (!memcmp(fdb->addr.addr, addr, ETH_ALEN)) + return fdb; + } + return NULL; +} + +static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, + struct net_bridge_port *source, + const unsigned char *addr, + int is_local) +{ + struct net_bridge_fdb_entry *fdb; + + fdb = kmem_cache_alloc(br_fdb_cache, GFP_ATOMIC); + if (fdb) { + memcpy(fdb->addr.addr, addr, ETH_ALEN); + atomic_set(&fdb->use_count, 1); + hlist_add_head_rcu(&fdb->hlist, head); + + fdb->dst = source; + fdb->is_local = is_local; + fdb->is_static = is_local; + fdb->ageing_timer = jiffies; + } + return fdb; +} + +static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, + const unsigned char *addr) +{ + struct hlist_head *head = &br->hash[br_mac_hash(addr)]; + struct net_bridge_fdb_entry *fdb; + + if (!is_valid_ether_addr(addr)) + return -EINVAL; + + fdb = fdb_find(head, addr); + if (fdb) { + /* it is okay to have multiple ports with same + * address, just use the first one. + */ + if (fdb->is_local) + return 0; + + printk(KERN_WARNING "%s adding interface with same address " + "as a received packet\n", + source->dev->name); + fdb_delete(fdb); + } + + if (!fdb_create(head, source, addr, 1)) + return -ENOMEM; + + return 0; +} + +int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source, + const unsigned char *addr) +{ + int ret; + + spin_lock_bh(&br->hash_lock); + ret = fdb_insert(br, source, addr); + spin_unlock_bh(&br->hash_lock); + return ret; +} + +void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, + const unsigned char *addr) +{ + struct hlist_head *head = &br->hash[br_mac_hash(addr)]; + struct net_bridge_fdb_entry *fdb; + + /* some users want to always flood. */ + if (hold_time(br) == 0) + return; + + rcu_read_lock(); + fdb = fdb_find(head, addr); + if (likely(fdb)) { + /* attempt to update an entry for a local interface */ + if (unlikely(fdb->is_local)) { + if (net_ratelimit()) + printk(KERN_WARNING "%s: received packet with " + " own address as source address\n", + source->dev->name); + } else { + /* fastpath: update of existing entry */ + fdb->dst = source; + fdb->ageing_timer = jiffies; + } + } else { + spin_lock_bh(&br->hash_lock); + if (!fdb_find(head, addr)) + fdb_create(head, source, addr, 0); + /* else we lose race and someone else inserts + * it first, don't bother updating + */ + spin_unlock_bh(&br->hash_lock); + } + rcu_read_unlock(); +} diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c new file mode 100644 index 000000000000..ef9f2095f96e --- /dev/null +++ b/net/bridge/br_forward.c @@ -0,0 +1,159 @@ +/* + * Forwarding decision + * Linux ethernet bridge + * + * Authors: + * Lennert Buytenhek <buytenh@gnu.org> + * + * $Id: br_forward.c,v 1.4 2001/08/14 22:05:57 davem Exp $ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <linux/netfilter_bridge.h> +#include "br_private.h" + +static inline int should_deliver(const struct net_bridge_port *p, + const struct sk_buff *skb) +{ + if (skb->dev == p->dev || + p->state != BR_STATE_FORWARDING) + return 0; + + return 1; +} + +int br_dev_queue_push_xmit(struct sk_buff *skb) +{ + if (skb->len > skb->dev->mtu) + kfree_skb(skb); + else { +#ifdef CONFIG_BRIDGE_NETFILTER + /* ip_refrag calls ip_fragment, doesn't copy the MAC header. */ + nf_bridge_maybe_copy_header(skb); +#endif + skb_push(skb, ETH_HLEN); + + dev_queue_xmit(skb); + } + + return 0; +} + +int br_forward_finish(struct sk_buff *skb) +{ + NF_HOOK(PF_BRIDGE, NF_BR_POST_ROUTING, skb, NULL, skb->dev, + br_dev_queue_push_xmit); + + return 0; +} + +static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) +{ + skb->dev = to->dev; +#ifdef CONFIG_NETFILTER_DEBUG + skb->nf_debug = 0; +#endif + NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, + br_forward_finish); +} + +static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb) +{ + struct net_device *indev; + + indev = skb->dev; + skb->dev = to->dev; + skb->ip_summed = CHECKSUM_NONE; + + NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, indev, skb->dev, + br_forward_finish); +} + +/* called with rcu_read_lock */ +void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) +{ + if (should_deliver(to, skb)) { + __br_deliver(to, skb); + return; + } + + kfree_skb(skb); +} + +/* called with rcu_read_lock */ +void br_forward(const struct net_bridge_port *to, struct sk_buff *skb) +{ + if (should_deliver(to, skb)) { + __br_forward(to, skb); + return; + } + + kfree_skb(skb); +} + +/* called under bridge lock */ +static void br_flood(struct net_bridge *br, struct sk_buff *skb, int clone, + void (*__packet_hook)(const struct net_bridge_port *p, + struct sk_buff *skb)) +{ + struct net_bridge_port *p; + struct net_bridge_port *prev; + + if (clone) { + struct sk_buff *skb2; + + if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL) { + br->statistics.tx_dropped++; + return; + } + + skb = skb2; + } + + prev = NULL; + + list_for_each_entry_rcu(p, &br->port_list, list) { + if (should_deliver(p, skb)) { + if (prev != NULL) { + struct sk_buff *skb2; + + if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL) { + br->statistics.tx_dropped++; + kfree_skb(skb); + return; + } + + __packet_hook(prev, skb2); + } + + prev = p; + } + } + + if (prev != NULL) { + __packet_hook(prev, skb); + return; + } + + kfree_skb(skb); +} + + +/* called with rcu_read_lock */ +void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb, int clone) +{ + br_flood(br, skb, clone, __br_deliver); +} + +/* called under bridge lock */ +void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, int clone) +{ + br_flood(br, skb, clone, __br_forward); +} diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c new file mode 100644 index 000000000000..69872bf3b87e --- /dev/null +++ b/net/bridge/br_if.c @@ -0,0 +1,388 @@ +/* + * Userspace interface + * Linux ethernet bridge + * + * Authors: + * Lennert Buytenhek <buytenh@gnu.org> + * + * $Id: br_if.c,v 1.7 2001/12/24 00:59:55 davem Exp $ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/ethtool.h> +#include <linux/if_arp.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/rtnetlink.h> +#include <net/sock.h> + +#include "br_private.h" + +/* + * Determine initial path cost based on speed. + * using recommendations from 802.1d standard + * + * Need to simulate user ioctl because not all device's that support + * ethtool, use ethtool_ops. Also, since driver might sleep need to + * not be holding any locks. + */ +static int br_initial_port_cost(struct net_device *dev) +{ + + struct ethtool_cmd ecmd = { ETHTOOL_GSET }; + struct ifreq ifr; + mm_segment_t old_fs; + int err; + + strncpy(ifr.ifr_name, dev->name, IFNAMSIZ); + ifr.ifr_data = (void __user *) &ecmd; + + old_fs = get_fs(); + set_fs(KERNEL_DS); + err = dev_ethtool(&ifr); + set_fs(old_fs); + + if (!err) { + switch(ecmd.speed) { + case SPEED_100: + return 19; + case SPEED_1000: + return 4; + case SPEED_10000: + return 2; + case SPEED_10: + return 100; + default: + pr_info("bridge: can't decode speed from %s: %d\n", + dev->name, ecmd.speed); + return 100; + } + } + + /* Old silly heuristics based on name */ + if (!strncmp(dev->name, "lec", 3)) + return 7; + + if (!strncmp(dev->name, "plip", 4)) + return 2500; + + return 100; /* assume old 10Mbps */ +} + +static void destroy_nbp(struct net_bridge_port *p) +{ + struct net_device *dev = p->dev; + + dev->br_port = NULL; + p->br = NULL; + p->dev = NULL; + dev_put(dev); + + br_sysfs_freeif(p); +} + +static void destroy_nbp_rcu(struct rcu_head *head) +{ + struct net_bridge_port *p = + container_of(head, struct net_bridge_port, rcu); + destroy_nbp(p); +} + +/* called with RTNL */ +static void del_nbp(struct net_bridge_port *p) +{ + struct net_bridge *br = p->br; + struct net_device *dev = p->dev; + + dev_set_promiscuity(dev, -1); + + spin_lock_bh(&br->lock); + br_stp_disable_port(p); + spin_unlock_bh(&br->lock); + + br_fdb_delete_by_port(br, p); + + list_del_rcu(&p->list); + + del_timer_sync(&p->message_age_timer); + del_timer_sync(&p->forward_delay_timer); + del_timer_sync(&p->hold_timer); + + call_rcu(&p->rcu, destroy_nbp_rcu); +} + +/* called with RTNL */ +static void del_br(struct net_bridge *br) +{ + struct net_bridge_port *p, *n; + + list_for_each_entry_safe(p, n, &br->port_list, list) { + br_sysfs_removeif(p); + del_nbp(p); + } + + del_timer_sync(&br->gc_timer); + + br_sysfs_delbr(br->dev); + unregister_netdevice(br->dev); +} + +static struct net_device *new_bridge_dev(const char *name) +{ + struct net_bridge *br; + struct net_device *dev; + + dev = alloc_netdev(sizeof(struct net_bridge), name, + br_dev_setup); + + if (!dev) + return NULL; + + br = netdev_priv(dev); + br->dev = dev; + + spin_lock_init(&br->lock); + INIT_LIST_HEAD(&br->port_list); + spin_lock_init(&br->hash_lock); + + br->bridge_id.prio[0] = 0x80; + br->bridge_id.prio[1] = 0x00; + memset(br->bridge_id.addr, 0, ETH_ALEN); + + br->stp_enabled = 0; + br->designated_root = br->bridge_id; + br->root_path_cost = 0; + br->root_port = 0; + br->bridge_max_age = br->max_age = 20 * HZ; + br->bridge_hello_time = br->hello_time = 2 * HZ; + br->bridge_forward_delay = br->forward_delay = 15 * HZ; + br->topology_change = 0; + br->topology_change_detected = 0; + br->ageing_time = 300 * HZ; + INIT_LIST_HEAD(&br->age_list); + + br_stp_timer_init(br); + + return dev; +} + +/* find an available port number */ +static int find_portno(struct net_bridge *br) +{ + int index; + struct net_bridge_port *p; + unsigned long *inuse; + + inuse = kmalloc(BITS_TO_LONGS(BR_MAX_PORTS)*sizeof(unsigned long), + GFP_KERNEL); + if (!inuse) + return -ENOMEM; + + memset(inuse, 0, BITS_TO_LONGS(BR_MAX_PORTS)*sizeof(unsigned long)); + set_bit(0, inuse); /* zero is reserved */ + list_for_each_entry(p, &br->port_list, list) { + set_bit(p->port_no, inuse); + } + index = find_first_zero_bit(inuse, BR_MAX_PORTS); + kfree(inuse); + + return (index >= BR_MAX_PORTS) ? -EXFULL : index; +} + +/* called with RTNL */ +static struct net_bridge_port *new_nbp(struct net_bridge *br, + struct net_device *dev, + unsigned long cost) +{ + int index; + struct net_bridge_port *p; + + index = find_portno(br); + if (index < 0) + return ERR_PTR(index); + + p = kmalloc(sizeof(*p), GFP_KERNEL); + if (p == NULL) + return ERR_PTR(-ENOMEM); + + memset(p, 0, sizeof(*p)); + p->br = br; + dev_hold(dev); + p->dev = dev; + p->path_cost = cost; + p->priority = 0x8000 >> BR_PORT_BITS; + dev->br_port = p; + p->port_no = index; + br_init_port(p); + p->state = BR_STATE_DISABLED; + kobject_init(&p->kobj); + + return p; +} + +int br_add_bridge(const char *name) +{ + struct net_device *dev; + int ret; + + dev = new_bridge_dev(name); + if (!dev) + return -ENOMEM; + + rtnl_lock(); + if (strchr(dev->name, '%')) { + ret = dev_alloc_name(dev, dev->name); + if (ret < 0) + goto err1; + } + + ret = register_netdevice(dev); + if (ret) + goto err2; + + /* network device kobject is not setup until + * after rtnl_unlock does it's hotplug magic. + * so hold reference to avoid race. + */ + dev_hold(dev); + rtnl_unlock(); + + ret = br_sysfs_addbr(dev); + dev_put(dev); + + if (ret) + unregister_netdev(dev); + out: + return ret; + + err2: + free_netdev(dev); + err1: + rtnl_unlock(); + goto out; +} + +int br_del_bridge(const char *name) +{ + struct net_device *dev; + int ret = 0; + + rtnl_lock(); + dev = __dev_get_by_name(name); + if (dev == NULL) + ret = -ENXIO; /* Could not find device */ + + else if (!(dev->priv_flags & IFF_EBRIDGE)) { + /* Attempt to delete non bridge device! */ + ret = -EPERM; + } + + else if (dev->flags & IFF_UP) { + /* Not shutdown yet. */ + ret = -EBUSY; + } + + else + del_br(netdev_priv(dev)); + + rtnl_unlock(); + return ret; +} + +/* Mtu of the bridge pseudo-device 1500 or the minimum of the ports */ +int br_min_mtu(const struct net_bridge *br) +{ + const struct net_bridge_port *p; + int mtu = 0; + + ASSERT_RTNL(); + + if (list_empty(&br->port_list)) + mtu = 1500; + else { + list_for_each_entry(p, &br->port_list, list) { + if (!mtu || p->dev->mtu < mtu) + mtu = p->dev->mtu; + } + } + return mtu; +} + +/* called with RTNL */ +int br_add_if(struct net_bridge *br, struct net_device *dev) +{ + struct net_bridge_port *p; + int err = 0; + + if (dev->flags & IFF_LOOPBACK || dev->type != ARPHRD_ETHER) + return -EINVAL; + + if (dev->hard_start_xmit == br_dev_xmit) + return -ELOOP; + + if (dev->br_port != NULL) + return -EBUSY; + + if (IS_ERR(p = new_nbp(br, dev, br_initial_port_cost(dev)))) + return PTR_ERR(p); + + if ((err = br_fdb_insert(br, p, dev->dev_addr))) + destroy_nbp(p); + + else if ((err = br_sysfs_addif(p))) + del_nbp(p); + else { + dev_set_promiscuity(dev, 1); + + list_add_rcu(&p->list, &br->port_list); + + spin_lock_bh(&br->lock); + br_stp_recalculate_bridge_id(br); + if ((br->dev->flags & IFF_UP) + && (dev->flags & IFF_UP) && netif_carrier_ok(dev)) + br_stp_enable_port(p); + spin_unlock_bh(&br->lock); + + dev_set_mtu(br->dev, br_min_mtu(br)); + } + + return err; +} + +/* called with RTNL */ +int br_del_if(struct net_bridge *br, struct net_device *dev) +{ + struct net_bridge_port *p = dev->br_port; + + if (!p || p->br != br) + return -EINVAL; + + br_sysfs_removeif(p); + del_nbp(p); + + spin_lock_bh(&br->lock); + br_stp_recalculate_bridge_id(br); + spin_unlock_bh(&br->lock); + + return 0; +} + +void __exit br_cleanup_bridges(void) +{ + struct net_device *dev, *nxt; + + rtnl_lock(); + for (dev = dev_base; dev; dev = nxt) { + nxt = dev->next; + if (dev->priv_flags & IFF_EBRIDGE) + del_br(dev->priv); + } + rtnl_unlock(); + +} diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c new file mode 100644 index 000000000000..2b1cce46cab4 --- /dev/null +++ b/net/bridge/br_input.c @@ -0,0 +1,144 @@ +/* + * Handle incoming frames + * Linux ethernet bridge + * + * Authors: + * Lennert Buytenhek <buytenh@gnu.org> + * + * $Id: br_input.c,v 1.10 2001/12/24 04:50:20 davem Exp $ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/netfilter_bridge.h> +#include "br_private.h" + +const unsigned char bridge_ula[6] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }; + +static int br_pass_frame_up_finish(struct sk_buff *skb) +{ +#ifdef CONFIG_NETFILTER_DEBUG + skb->nf_debug = 0; +#endif + netif_rx(skb); + + return 0; +} + +static void br_pass_frame_up(struct net_bridge *br, struct sk_buff *skb) +{ + struct net_device *indev; + + br->statistics.rx_packets++; + br->statistics.rx_bytes += skb->len; + + indev = skb->dev; + skb->dev = br->dev; + + NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, indev, NULL, + br_pass_frame_up_finish); +} + +/* note: already called with rcu_read_lock (preempt_disabled) */ +int br_handle_frame_finish(struct sk_buff *skb) +{ + const unsigned char *dest = eth_hdr(skb)->h_dest; + struct net_bridge_port *p = skb->dev->br_port; + struct net_bridge *br = p->br; + struct net_bridge_fdb_entry *dst; + int passedup = 0; + + if (br->dev->flags & IFF_PROMISC) { + struct sk_buff *skb2; + + skb2 = skb_clone(skb, GFP_ATOMIC); + if (skb2 != NULL) { + passedup = 1; + br_pass_frame_up(br, skb2); + } + } + + if (dest[0] & 1) { + br_flood_forward(br, skb, !passedup); + if (!passedup) + br_pass_frame_up(br, skb); + goto out; + } + + dst = __br_fdb_get(br, dest); + if (dst != NULL && dst->is_local) { + if (!passedup) + br_pass_frame_up(br, skb); + else + kfree_skb(skb); + goto out; + } + + if (dst != NULL) { + br_forward(dst->dst, skb); + goto out; + } + + br_flood_forward(br, skb, 0); + +out: + return 0; +} + +/* + * Called via br_handle_frame_hook. + * Return 0 if *pskb should be processed furthur + * 1 if *pskb is handled + * note: already called with rcu_read_lock (preempt_disabled) + */ +int br_handle_frame(struct net_bridge_port *p, struct sk_buff **pskb) +{ + struct sk_buff *skb = *pskb; + const unsigned char *dest = eth_hdr(skb)->h_dest; + + if (p->state == BR_STATE_DISABLED) + goto err; + + if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) + goto err; + + if (p->state == BR_STATE_LEARNING || + p->state == BR_STATE_FORWARDING) + br_fdb_update(p->br, p, eth_hdr(skb)->h_source); + + if (p->br->stp_enabled && + !memcmp(dest, bridge_ula, 5) && + !(dest[5] & 0xF0)) { + if (!dest[5]) { + NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev, + NULL, br_stp_handle_bpdu); + return 1; + } + } + + else if (p->state == BR_STATE_FORWARDING) { + if (br_should_route_hook) { + if (br_should_route_hook(pskb)) + return 0; + skb = *pskb; + dest = eth_hdr(skb)->h_dest; + } + + if (!memcmp(p->br->dev->dev_addr, dest, ETH_ALEN)) + skb->pkt_type = PACKET_HOST; + + NF_HOOK(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, + br_handle_frame_finish); + return 1; + } + +err: + kfree_skb(skb); + return 1; +} diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c new file mode 100644 index 000000000000..b8ce14b22181 --- /dev/null +++ b/net/bridge/br_ioctl.c @@ -0,0 +1,410 @@ +/* + * Ioctl handler + * Linux ethernet bridge + * + * Authors: + * Lennert Buytenhek <buytenh@gnu.org> + * + * $Id: br_ioctl.c,v 1.4 2000/11/08 05:16:40 davem Exp $ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/if_bridge.h> +#include <linux/netdevice.h> +#include <linux/times.h> +#include <asm/uaccess.h> +#include "br_private.h" + +/* called with RTNL */ +static int get_bridge_ifindices(int *indices, int num) +{ + struct net_device *dev; + int i = 0; + + for (dev = dev_base; dev && i < num; dev = dev->next) { + if (dev->priv_flags & IFF_EBRIDGE) + indices[i++] = dev->ifindex; + } + + return i; +} + +/* called with RTNL */ +static void get_port_ifindices(struct net_bridge *br, int *ifindices, int num) +{ + struct net_bridge_port *p; + + list_for_each_entry(p, &br->port_list, list) { + if (p->port_no < num) + ifindices[p->port_no] = p->dev->ifindex; + } +} + +/* + * Format up to a page worth of forwarding table entries + * userbuf -- where to copy result + * maxnum -- maximum number of entries desired + * (limited to a page for sanity) + * offset -- number of records to skip + */ +static int get_fdb_entries(struct net_bridge *br, void __user *userbuf, + unsigned long maxnum, unsigned long offset) +{ + int num; + void *buf; + size_t size = maxnum * sizeof(struct __fdb_entry); + + if (size > PAGE_SIZE) { + size = PAGE_SIZE; + maxnum = PAGE_SIZE/sizeof(struct __fdb_entry); + } + + buf = kmalloc(size, GFP_USER); + if (!buf) + return -ENOMEM; + + num = br_fdb_fillbuf(br, buf, maxnum, offset); + if (num > 0) { + if (copy_to_user(userbuf, buf, num*sizeof(struct __fdb_entry))) + num = -EFAULT; + } + kfree(buf); + + return num; +} + +static int add_del_if(struct net_bridge *br, int ifindex, int isadd) +{ + struct net_device *dev; + int ret; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + dev = dev_get_by_index(ifindex); + if (dev == NULL) + return -EINVAL; + + if (isadd) + ret = br_add_if(br, dev); + else + ret = br_del_if(br, dev); + + dev_put(dev); + return ret; +} + +/* + * Legacy ioctl's through SIOCDEVPRIVATE + * This interface is deprecated because it was too difficult to + * to do the translation for 32/64bit ioctl compatability. + */ +static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +{ + struct net_bridge *br = netdev_priv(dev); + unsigned long args[4]; + + if (copy_from_user(args, rq->ifr_data, sizeof(args))) + return -EFAULT; + + switch (args[0]) { + case BRCTL_ADD_IF: + case BRCTL_DEL_IF: + return add_del_if(br, args[1], args[0] == BRCTL_ADD_IF); + + case BRCTL_GET_BRIDGE_INFO: + { + struct __bridge_info b; + + memset(&b, 0, sizeof(struct __bridge_info)); + rcu_read_lock(); + memcpy(&b.designated_root, &br->designated_root, 8); + memcpy(&b.bridge_id, &br->bridge_id, 8); + b.root_path_cost = br->root_path_cost; + b.max_age = jiffies_to_clock_t(br->max_age); + b.hello_time = jiffies_to_clock_t(br->hello_time); + b.forward_delay = br->forward_delay; + b.bridge_max_age = br->bridge_max_age; + b.bridge_hello_time = br->bridge_hello_time; + b.bridge_forward_delay = jiffies_to_clock_t(br->bridge_forward_delay); + b.topology_change = br->topology_change; + b.topology_change_detected = br->topology_change_detected; + b.root_port = br->root_port; + b.stp_enabled = br->stp_enabled; + b.ageing_time = jiffies_to_clock_t(br->ageing_time); + b.hello_timer_value = br_timer_value(&br->hello_timer); + b.tcn_timer_value = br_timer_value(&br->tcn_timer); + b.topology_change_timer_value = br_timer_value(&br->topology_change_timer); + b.gc_timer_value = br_timer_value(&br->gc_timer); + rcu_read_unlock(); + + if (copy_to_user((void __user *)args[1], &b, sizeof(b))) + return -EFAULT; + + return 0; + } + + case BRCTL_GET_PORT_LIST: + { + int num, *indices; + + num = args[2]; + if (num < 0) + return -EINVAL; + if (num == 0) + num = 256; + if (num > BR_MAX_PORTS) + num = BR_MAX_PORTS; + + indices = kmalloc(num*sizeof(int), GFP_KERNEL); + if (indices == NULL) + return -ENOMEM; + + memset(indices, 0, num*sizeof(int)); + + get_port_ifindices(br, indices, num); + if (copy_to_user((void __user *)args[1], indices, num*sizeof(int))) + num = -EFAULT; + kfree(indices); + return num; + } + + case BRCTL_SET_BRIDGE_FORWARD_DELAY: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + spin_lock_bh(&br->lock); + br->bridge_forward_delay = clock_t_to_jiffies(args[1]); + if (br_is_root_bridge(br)) + br->forward_delay = br->bridge_forward_delay; + spin_unlock_bh(&br->lock); + return 0; + + case BRCTL_SET_BRIDGE_HELLO_TIME: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + spin_lock_bh(&br->lock); + br->bridge_hello_time = clock_t_to_jiffies(args[1]); + if (br_is_root_bridge(br)) + br->hello_time = br->bridge_hello_time; + spin_unlock_bh(&br->lock); + return 0; + + case BRCTL_SET_BRIDGE_MAX_AGE: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + spin_lock_bh(&br->lock); + br->bridge_max_age = clock_t_to_jiffies(args[1]); + if (br_is_root_bridge(br)) + br->max_age = br->bridge_max_age; + spin_unlock_bh(&br->lock); + return 0; + + case BRCTL_SET_AGEING_TIME: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + br->ageing_time = clock_t_to_jiffies(args[1]); + return 0; + + case BRCTL_GET_PORT_INFO: + { + struct __port_info p; + struct net_bridge_port *pt; + + rcu_read_lock(); + if ((pt = br_get_port(br, args[2])) == NULL) { + rcu_read_unlock(); + return -EINVAL; + } + + memset(&p, 0, sizeof(struct __port_info)); + memcpy(&p.designated_root, &pt->designated_root, 8); + memcpy(&p.designated_bridge, &pt->designated_bridge, 8); + p.port_id = pt->port_id; + p.designated_port = pt->designated_port; + p.path_cost = pt->path_cost; + p.designated_cost = pt->designated_cost; + p.state = pt->state; + p.top_change_ack = pt->topology_change_ack; + p.config_pending = pt->config_pending; + p.message_age_timer_value = br_timer_value(&pt->message_age_timer); + p.forward_delay_timer_value = br_timer_value(&pt->forward_delay_timer); + p.hold_timer_value = br_timer_value(&pt->hold_timer); + + rcu_read_unlock(); + + if (copy_to_user((void __user *)args[1], &p, sizeof(p))) + return -EFAULT; + + return 0; + } + + case BRCTL_SET_BRIDGE_STP_STATE: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + br->stp_enabled = args[1]?1:0; + return 0; + + case BRCTL_SET_BRIDGE_PRIORITY: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + spin_lock_bh(&br->lock); + br_stp_set_bridge_priority(br, args[1]); + spin_unlock_bh(&br->lock); + return 0; + + case BRCTL_SET_PORT_PRIORITY: + { + struct net_bridge_port *p; + int ret = 0; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + if (args[2] >= (1<<(16-BR_PORT_BITS))) + return -ERANGE; + + spin_lock_bh(&br->lock); + if ((p = br_get_port(br, args[1])) == NULL) + ret = -EINVAL; + else + br_stp_set_port_priority(p, args[2]); + spin_unlock_bh(&br->lock); + return ret; + } + + case BRCTL_SET_PATH_COST: + { + struct net_bridge_port *p; + int ret = 0; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + spin_lock_bh(&br->lock); + if ((p = br_get_port(br, args[1])) == NULL) + ret = -EINVAL; + else + br_stp_set_path_cost(p, args[2]); + spin_unlock_bh(&br->lock); + return ret; + } + + case BRCTL_GET_FDB_ENTRIES: + return get_fdb_entries(br, (void __user *)args[1], + args[2], args[3]); + } + + return -EOPNOTSUPP; +} + +static int old_deviceless(void __user *uarg) +{ + unsigned long args[3]; + + if (copy_from_user(args, uarg, sizeof(args))) + return -EFAULT; + + switch (args[0]) { + case BRCTL_GET_VERSION: + return BRCTL_VERSION; + + case BRCTL_GET_BRIDGES: + { + int *indices; + int ret = 0; + + if (args[2] >= 2048) + return -ENOMEM; + indices = kmalloc(args[2]*sizeof(int), GFP_KERNEL); + if (indices == NULL) + return -ENOMEM; + + memset(indices, 0, args[2]*sizeof(int)); + args[2] = get_bridge_ifindices(indices, args[2]); + + ret = copy_to_user((void __user *)args[1], indices, args[2]*sizeof(int)) + ? -EFAULT : args[2]; + + kfree(indices); + return ret; + } + + case BRCTL_ADD_BRIDGE: + case BRCTL_DEL_BRIDGE: + { + char buf[IFNAMSIZ]; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + if (copy_from_user(buf, (void __user *)args[1], IFNAMSIZ)) + return -EFAULT; + + buf[IFNAMSIZ-1] = 0; + + if (args[0] == BRCTL_ADD_BRIDGE) + return br_add_bridge(buf); + + return br_del_bridge(buf); + } + } + + return -EOPNOTSUPP; +} + +int br_ioctl_deviceless_stub(unsigned int cmd, void __user *uarg) +{ + switch (cmd) { + case SIOCGIFBR: + case SIOCSIFBR: + return old_deviceless(uarg); + + case SIOCBRADDBR: + case SIOCBRDELBR: + { + char buf[IFNAMSIZ]; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + if (copy_from_user(buf, uarg, IFNAMSIZ)) + return -EFAULT; + + buf[IFNAMSIZ-1] = 0; + if (cmd == SIOCBRADDBR) + return br_add_bridge(buf); + + return br_del_bridge(buf); + } + } + return -EOPNOTSUPP; +} + +int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +{ + struct net_bridge *br = netdev_priv(dev); + + switch(cmd) { + case SIOCDEVPRIVATE: + return old_dev_ioctl(dev, rq, cmd); + + case SIOCBRADDIF: + case SIOCBRDELIF: + return add_del_if(br, rq->ifr_ifindex, cmd == SIOCBRADDIF); + + } + + pr_debug("Bridge does not support ioctl 0x%x\n", cmd); + return -EOPNOTSUPP; +} diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c new file mode 100644 index 000000000000..be03d3ad2648 --- /dev/null +++ b/net/bridge/br_netfilter.c @@ -0,0 +1,1087 @@ +/* + * Handle firewalling + * Linux ethernet bridge + * + * Authors: + * Lennert Buytenhek <buytenh@gnu.org> + * Bart De Schuymer (maintainer) <bdschuym@pandora.be> + * + * Changes: + * Apr 29 2003: physdev module support (bdschuym) + * Jun 19 2003: let arptables see bridged ARP traffic (bdschuym) + * Oct 06 2003: filter encapsulated IP/ARP VLAN traffic on untagged bridge + * (bdschuym) + * Sep 01 2004: add IPv6 filtering (bdschuym) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Lennert dedicates this file to Kerstin Wurdinger. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/ip.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <linux/if_ether.h> +#include <linux/if_vlan.h> +#include <linux/netfilter_bridge.h> +#include <linux/netfilter_ipv4.h> +#include <linux/netfilter_ipv6.h> +#include <linux/netfilter_arp.h> +#include <linux/in_route.h> +#include <net/ip.h> +#include <net/ipv6.h> +#include <asm/uaccess.h> +#include <asm/checksum.h> +#include "br_private.h" +#ifdef CONFIG_SYSCTL +#include <linux/sysctl.h> +#endif + +#define skb_origaddr(skb) (((struct bridge_skb_cb *) \ + (skb->nf_bridge->data))->daddr.ipv4) +#define store_orig_dstaddr(skb) (skb_origaddr(skb) = (skb)->nh.iph->daddr) +#define dnat_took_place(skb) (skb_origaddr(skb) != (skb)->nh.iph->daddr) + +#define has_bridge_parent(device) ((device)->br_port != NULL) +#define bridge_parent(device) ((device)->br_port->br->dev) + +#ifdef CONFIG_SYSCTL +static struct ctl_table_header *brnf_sysctl_header; +static int brnf_call_iptables = 1; +static int brnf_call_ip6tables = 1; +static int brnf_call_arptables = 1; +static int brnf_filter_vlan_tagged = 1; +#else +#define brnf_filter_vlan_tagged 1 +#endif + +#define IS_VLAN_IP (skb->protocol == __constant_htons(ETH_P_8021Q) && \ + hdr->h_vlan_encapsulated_proto == __constant_htons(ETH_P_IP) && \ + brnf_filter_vlan_tagged) +#define IS_VLAN_IPV6 (skb->protocol == __constant_htons(ETH_P_8021Q) && \ + hdr->h_vlan_encapsulated_proto == __constant_htons(ETH_P_IPV6) && \ + brnf_filter_vlan_tagged) +#define IS_VLAN_ARP (skb->protocol == __constant_htons(ETH_P_8021Q) && \ + hdr->h_vlan_encapsulated_proto == __constant_htons(ETH_P_ARP) && \ + brnf_filter_vlan_tagged) + +/* We need these fake structures to make netfilter happy -- + * lots of places assume that skb->dst != NULL, which isn't + * all that unreasonable. + * + * Currently, we fill in the PMTU entry because netfilter + * refragmentation needs it, and the rt_flags entry because + * ipt_REJECT needs it. Future netfilter modules might + * require us to fill additional fields. */ +static struct net_device __fake_net_device = { + .hard_header_len = ETH_HLEN +}; + +static struct rtable __fake_rtable = { + .u = { + .dst = { + .__refcnt = ATOMIC_INIT(1), + .dev = &__fake_net_device, + .path = &__fake_rtable.u.dst, + .metrics = {[RTAX_MTU - 1] = 1500}, + } + }, + .rt_flags = 0, +}; + + +/* PF_BRIDGE/PRE_ROUTING *********************************************/ +/* Undo the changes made for ip6tables PREROUTING and continue the + * bridge PRE_ROUTING hook. */ +static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb) +{ + struct nf_bridge_info *nf_bridge = skb->nf_bridge; + +#ifdef CONFIG_NETFILTER_DEBUG + skb->nf_debug ^= (1 << NF_BR_PRE_ROUTING); +#endif + + if (nf_bridge->mask & BRNF_PKT_TYPE) { + skb->pkt_type = PACKET_OTHERHOST; + nf_bridge->mask ^= BRNF_PKT_TYPE; + } + nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; + + skb->dst = (struct dst_entry *)&__fake_rtable; + dst_hold(skb->dst); + + skb->dev = nf_bridge->physindev; + if (skb->protocol == __constant_htons(ETH_P_8021Q)) { + skb_push(skb, VLAN_HLEN); + skb->nh.raw -= VLAN_HLEN; + } + NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, + br_handle_frame_finish, 1); + + return 0; +} + +static void __br_dnat_complain(void) +{ + static unsigned long last_complaint; + + if (jiffies - last_complaint >= 5 * HZ) { + printk(KERN_WARNING "Performing cross-bridge DNAT requires IP " + "forwarding to be enabled\n"); + last_complaint = jiffies; + } +} + +/* This requires some explaining. If DNAT has taken place, + * we will need to fix up the destination Ethernet address, + * and this is a tricky process. + * + * There are two cases to consider: + * 1. The packet was DNAT'ed to a device in the same bridge + * port group as it was received on. We can still bridge + * the packet. + * 2. The packet was DNAT'ed to a different device, either + * a non-bridged device or another bridge port group. + * The packet will need to be routed. + * + * The correct way of distinguishing between these two cases is to + * call ip_route_input() and to look at skb->dst->dev, which is + * changed to the destination device if ip_route_input() succeeds. + * + * Let us first consider the case that ip_route_input() succeeds: + * + * If skb->dst->dev equals the logical bridge device the packet + * came in on, we can consider this bridging. We then call + * skb->dst->output() which will make the packet enter br_nf_local_out() + * not much later. In that function it is assured that the iptables + * FORWARD chain is traversed for the packet. + * + * Otherwise, the packet is considered to be routed and we just + * change the destination MAC address so that the packet will + * later be passed up to the IP stack to be routed. + * + * Let us now consider the case that ip_route_input() fails: + * + * After a "echo '0' > /proc/sys/net/ipv4/ip_forward" ip_route_input() + * will fail, while __ip_route_output_key() will return success. The source + * address for __ip_route_output_key() is set to zero, so __ip_route_output_key + * thinks we're handling a locally generated packet and won't care + * if IP forwarding is allowed. We send a warning message to the users's + * log telling her to put IP forwarding on. + * + * ip_route_input() will also fail if there is no route available. + * In that case we just drop the packet. + * + * --Lennert, 20020411 + * --Bart, 20020416 (updated) + * --Bart, 20021007 (updated) */ +static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb) +{ +#ifdef CONFIG_NETFILTER_DEBUG + skb->nf_debug |= (1 << NF_BR_PRE_ROUTING) | (1 << NF_BR_FORWARD); +#endif + + if (skb->pkt_type == PACKET_OTHERHOST) { + skb->pkt_type = PACKET_HOST; + skb->nf_bridge->mask |= BRNF_PKT_TYPE; + } + skb->nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; + + skb->dev = bridge_parent(skb->dev); + if (skb->protocol == __constant_htons(ETH_P_8021Q)) { + skb_pull(skb, VLAN_HLEN); + skb->nh.raw += VLAN_HLEN; + } + skb->dst->output(skb); + return 0; +} + +static int br_nf_pre_routing_finish(struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + struct iphdr *iph = skb->nh.iph; + struct nf_bridge_info *nf_bridge = skb->nf_bridge; + +#ifdef CONFIG_NETFILTER_DEBUG + skb->nf_debug ^= (1 << NF_BR_PRE_ROUTING); +#endif + + if (nf_bridge->mask & BRNF_PKT_TYPE) { + skb->pkt_type = PACKET_OTHERHOST; + nf_bridge->mask ^= BRNF_PKT_TYPE; + } + nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; + + if (dnat_took_place(skb)) { + if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, + dev)) { + struct rtable *rt; + struct flowi fl = { .nl_u = + { .ip4_u = { .daddr = iph->daddr, .saddr = 0 , + .tos = RT_TOS(iph->tos)} }, .proto = 0}; + + if (!ip_route_output_key(&rt, &fl)) { + /* Bridged-and-DNAT'ed traffic doesn't + * require ip_forwarding. */ + if (((struct dst_entry *)rt)->dev == dev) { + skb->dst = (struct dst_entry *)rt; + goto bridged_dnat; + } + __br_dnat_complain(); + dst_release((struct dst_entry *)rt); + } + kfree_skb(skb); + return 0; + } else { + if (skb->dst->dev == dev) { +bridged_dnat: + /* Tell br_nf_local_out this is a + * bridged frame */ + nf_bridge->mask |= BRNF_BRIDGED_DNAT; + skb->dev = nf_bridge->physindev; + if (skb->protocol == + __constant_htons(ETH_P_8021Q)) { + skb_push(skb, VLAN_HLEN); + skb->nh.raw -= VLAN_HLEN; + } + NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, + skb, skb->dev, NULL, + br_nf_pre_routing_finish_bridge, + 1); + return 0; + } + memcpy(eth_hdr(skb)->h_dest, dev->dev_addr, + ETH_ALEN); + skb->pkt_type = PACKET_HOST; + } + } else { + skb->dst = (struct dst_entry *)&__fake_rtable; + dst_hold(skb->dst); + } + + skb->dev = nf_bridge->physindev; + if (skb->protocol == __constant_htons(ETH_P_8021Q)) { + skb_push(skb, VLAN_HLEN); + skb->nh.raw -= VLAN_HLEN; + } + NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, + br_handle_frame_finish, 1); + + return 0; +} + +/* Some common code for IPv4/IPv6 */ +static void setup_pre_routing(struct sk_buff *skb) +{ + struct nf_bridge_info *nf_bridge = skb->nf_bridge; + + if (skb->pkt_type == PACKET_OTHERHOST) { + skb->pkt_type = PACKET_HOST; + nf_bridge->mask |= BRNF_PKT_TYPE; + } + + nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING; + nf_bridge->physindev = skb->dev; + skb->dev = bridge_parent(skb->dev); +} + +/* We only check the length. A bridge shouldn't do any hop-by-hop stuff anyway */ +static int check_hbh_len(struct sk_buff *skb) +{ + unsigned char *raw = (u8*)(skb->nh.ipv6h+1); + u32 pkt_len; + int off = raw - skb->nh.raw; + int len = (raw[1]+1)<<3; + + if ((raw + len) - skb->data > skb_headlen(skb)) + goto bad; + + off += 2; + len -= 2; + + while (len > 0) { + int optlen = raw[off+1]+2; + + switch (skb->nh.raw[off]) { + case IPV6_TLV_PAD0: + optlen = 1; + break; + + case IPV6_TLV_PADN: + break; + + case IPV6_TLV_JUMBO: + if (skb->nh.raw[off+1] != 4 || (off&3) != 2) + goto bad; + + pkt_len = ntohl(*(u32*)(skb->nh.raw+off+2)); + + if (pkt_len > skb->len - sizeof(struct ipv6hdr)) + goto bad; + if (pkt_len + sizeof(struct ipv6hdr) < skb->len) { + if (__pskb_trim(skb, + pkt_len + sizeof(struct ipv6hdr))) + goto bad; + if (skb->ip_summed == CHECKSUM_HW) + skb->ip_summed = CHECKSUM_NONE; + } + break; + default: + if (optlen > len) + goto bad; + break; + } + off += optlen; + len -= optlen; + } + if (len == 0) + return 0; +bad: + return -1; + +} + +/* Replicate the checks that IPv6 does on packet reception and pass the packet + * to ip6tables, which doesn't support NAT, so things are fairly simple. */ +static unsigned int br_nf_pre_routing_ipv6(unsigned int hook, + struct sk_buff *skb, const struct net_device *in, + const struct net_device *out, int (*okfn)(struct sk_buff *)) +{ + struct ipv6hdr *hdr; + u32 pkt_len; + struct nf_bridge_info *nf_bridge; + + if (skb->len < sizeof(struct ipv6hdr)) + goto inhdr_error; + + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) + goto inhdr_error; + + hdr = skb->nh.ipv6h; + + if (hdr->version != 6) + goto inhdr_error; + + pkt_len = ntohs(hdr->payload_len); + + if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) { + if (pkt_len + sizeof(struct ipv6hdr) > skb->len) + goto inhdr_error; + if (pkt_len + sizeof(struct ipv6hdr) < skb->len) { + if (__pskb_trim(skb, pkt_len + sizeof(struct ipv6hdr))) + goto inhdr_error; + if (skb->ip_summed == CHECKSUM_HW) + skb->ip_summed = CHECKSUM_NONE; + } + } + if (hdr->nexthdr == NEXTHDR_HOP && check_hbh_len(skb)) + goto inhdr_error; + +#ifdef CONFIG_NETFILTER_DEBUG + skb->nf_debug ^= (1 << NF_IP6_PRE_ROUTING); +#endif + if ((nf_bridge = nf_bridge_alloc(skb)) == NULL) + return NF_DROP; + setup_pre_routing(skb); + + NF_HOOK(PF_INET6, NF_IP6_PRE_ROUTING, skb, skb->dev, NULL, + br_nf_pre_routing_finish_ipv6); + + return NF_STOLEN; + +inhdr_error: + return NF_DROP; +} + +/* Direct IPv6 traffic to br_nf_pre_routing_ipv6. + * Replicate the checks that IPv4 does on packet reception. + * Set skb->dev to the bridge device (i.e. parent of the + * receiving device) to make netfilter happy, the REDIRECT + * target in particular. Save the original destination IP + * address to be able to detect DNAT afterwards. */ +static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct iphdr *iph; + __u32 len; + struct sk_buff *skb = *pskb; + struct nf_bridge_info *nf_bridge; + struct vlan_ethhdr *hdr = vlan_eth_hdr(*pskb); + + if (skb->protocol == __constant_htons(ETH_P_IPV6) || IS_VLAN_IPV6) { +#ifdef CONFIG_SYSCTL + if (!brnf_call_ip6tables) + return NF_ACCEPT; +#endif + if ((skb = skb_share_check(*pskb, GFP_ATOMIC)) == NULL) + goto out; + + if (skb->protocol == __constant_htons(ETH_P_8021Q)) { + skb_pull(skb, VLAN_HLEN); + (skb)->nh.raw += VLAN_HLEN; + } + return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn); + } +#ifdef CONFIG_SYSCTL + if (!brnf_call_iptables) + return NF_ACCEPT; +#endif + + if (skb->protocol != __constant_htons(ETH_P_IP) && !IS_VLAN_IP) + return NF_ACCEPT; + + if ((skb = skb_share_check(*pskb, GFP_ATOMIC)) == NULL) + goto out; + + if (skb->protocol == __constant_htons(ETH_P_8021Q)) { + skb_pull(skb, VLAN_HLEN); + (skb)->nh.raw += VLAN_HLEN; + } + + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + goto inhdr_error; + + iph = skb->nh.iph; + if (iph->ihl < 5 || iph->version != 4) + goto inhdr_error; + + if (!pskb_may_pull(skb, 4*iph->ihl)) + goto inhdr_error; + + iph = skb->nh.iph; + if (ip_fast_csum((__u8 *)iph, iph->ihl) != 0) + goto inhdr_error; + + len = ntohs(iph->tot_len); + if (skb->len < len || len < 4*iph->ihl) + goto inhdr_error; + + if (skb->len > len) { + __pskb_trim(skb, len); + if (skb->ip_summed == CHECKSUM_HW) + skb->ip_summed = CHECKSUM_NONE; + } + +#ifdef CONFIG_NETFILTER_DEBUG + skb->nf_debug ^= (1 << NF_IP_PRE_ROUTING); +#endif + if ((nf_bridge = nf_bridge_alloc(skb)) == NULL) + return NF_DROP; + setup_pre_routing(skb); + store_orig_dstaddr(skb); + + NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL, + br_nf_pre_routing_finish); + + return NF_STOLEN; + +inhdr_error: +// IP_INC_STATS_BH(IpInHdrErrors); +out: + return NF_DROP; +} + + +/* PF_BRIDGE/LOCAL_IN ************************************************/ +/* The packet is locally destined, which requires a real + * dst_entry, so detach the fake one. On the way up, the + * packet would pass through PRE_ROUTING again (which already + * took place when the packet entered the bridge), but we + * register an IPv4 PRE_ROUTING 'sabotage' hook that will + * prevent this from happening. */ +static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff **pskb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct sk_buff *skb = *pskb; + + if (skb->dst == (struct dst_entry *)&__fake_rtable) { + dst_release(skb->dst); + skb->dst = NULL; + } + + return NF_ACCEPT; +} + + +/* PF_BRIDGE/FORWARD *************************************************/ +static int br_nf_forward_finish(struct sk_buff *skb) +{ + struct nf_bridge_info *nf_bridge = skb->nf_bridge; + struct net_device *in; + struct vlan_ethhdr *hdr = vlan_eth_hdr(skb); + +#ifdef CONFIG_NETFILTER_DEBUG + skb->nf_debug ^= (1 << NF_BR_FORWARD); +#endif + + if (skb->protocol != __constant_htons(ETH_P_ARP) && !IS_VLAN_ARP) { + in = nf_bridge->physindev; + if (nf_bridge->mask & BRNF_PKT_TYPE) { + skb->pkt_type = PACKET_OTHERHOST; + nf_bridge->mask ^= BRNF_PKT_TYPE; + } + } else { + in = *((struct net_device **)(skb->cb)); + } + if (skb->protocol == __constant_htons(ETH_P_8021Q)) { + skb_push(skb, VLAN_HLEN); + skb->nh.raw -= VLAN_HLEN; + } + NF_HOOK_THRESH(PF_BRIDGE, NF_BR_FORWARD, skb, in, + skb->dev, br_forward_finish, 1); + return 0; +} + +/* This is the 'purely bridged' case. For IP, we pass the packet to + * netfilter with indev and outdev set to the bridge device, + * but we are still able to filter on the 'real' indev/outdev + * because of the physdev module. For ARP, indev and outdev are the + * bridge ports. */ +static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff **pskb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct sk_buff *skb = *pskb; + struct nf_bridge_info *nf_bridge; + struct vlan_ethhdr *hdr = vlan_eth_hdr(skb); + int pf; + + if (!skb->nf_bridge) + return NF_ACCEPT; + + if (skb->protocol == __constant_htons(ETH_P_IP) || IS_VLAN_IP) + pf = PF_INET; + else + pf = PF_INET6; + + if (skb->protocol == __constant_htons(ETH_P_8021Q)) { + skb_pull(*pskb, VLAN_HLEN); + (*pskb)->nh.raw += VLAN_HLEN; + } + +#ifdef CONFIG_NETFILTER_DEBUG + skb->nf_debug ^= (1 << NF_BR_FORWARD); +#endif + nf_bridge = skb->nf_bridge; + if (skb->pkt_type == PACKET_OTHERHOST) { + skb->pkt_type = PACKET_HOST; + nf_bridge->mask |= BRNF_PKT_TYPE; + } + + /* The physdev module checks on this */ + nf_bridge->mask |= BRNF_BRIDGED; + nf_bridge->physoutdev = skb->dev; + + NF_HOOK(pf, NF_IP_FORWARD, skb, bridge_parent(in), + bridge_parent(out), br_nf_forward_finish); + + return NF_STOLEN; +} + +static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff **pskb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct sk_buff *skb = *pskb; + struct vlan_ethhdr *hdr = vlan_eth_hdr(skb); + struct net_device **d = (struct net_device **)(skb->cb); + +#ifdef CONFIG_SYSCTL + if (!brnf_call_arptables) + return NF_ACCEPT; +#endif + + if (skb->protocol != __constant_htons(ETH_P_ARP)) { + if (!IS_VLAN_ARP) + return NF_ACCEPT; + skb_pull(*pskb, VLAN_HLEN); + (*pskb)->nh.raw += VLAN_HLEN; + } + +#ifdef CONFIG_NETFILTER_DEBUG + skb->nf_debug ^= (1 << NF_BR_FORWARD); +#endif + + if (skb->nh.arph->ar_pln != 4) { + if (IS_VLAN_ARP) { + skb_push(*pskb, VLAN_HLEN); + (*pskb)->nh.raw -= VLAN_HLEN; + } + return NF_ACCEPT; + } + *d = (struct net_device *)in; + NF_HOOK(NF_ARP, NF_ARP_FORWARD, skb, (struct net_device *)in, + (struct net_device *)out, br_nf_forward_finish); + + return NF_STOLEN; +} + + +/* PF_BRIDGE/LOCAL_OUT ***********************************************/ +static int br_nf_local_out_finish(struct sk_buff *skb) +{ +#ifdef CONFIG_NETFILTER_DEBUG + skb->nf_debug &= ~(1 << NF_BR_LOCAL_OUT); +#endif + if (skb->protocol == __constant_htons(ETH_P_8021Q)) { + skb_push(skb, VLAN_HLEN); + skb->nh.raw -= VLAN_HLEN; + } + + NF_HOOK_THRESH(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, + br_forward_finish, NF_BR_PRI_FIRST + 1); + + return 0; +} + +/* This function sees both locally originated IP packets and forwarded + * IP packets (in both cases the destination device is a bridge + * device). It also sees bridged-and-DNAT'ed packets. + * To be able to filter on the physical bridge devices (with the physdev + * module), we steal packets destined to a bridge device away from the + * PF_INET/FORWARD and PF_INET/OUTPUT hook functions, and give them back later, + * when we have determined the real output device. This is done in here. + * + * If (nf_bridge->mask & BRNF_BRIDGED_DNAT) then the packet is bridged + * and we fake the PF_BRIDGE/FORWARD hook. The function br_nf_forward() + * will then fake the PF_INET/FORWARD hook. br_nf_local_out() has priority + * NF_BR_PRI_FIRST, so no relevant PF_BRIDGE/INPUT functions have been nor + * will be executed. + * Otherwise, if nf_bridge->physindev is NULL, the bridge-nf code never touched + * this packet before, and so the packet was locally originated. We fake + * the PF_INET/LOCAL_OUT hook. + * Finally, if nf_bridge->physindev isn't NULL, then the packet was IP routed, + * so we fake the PF_INET/FORWARD hook. ip_sabotage_out() makes sure + * even routed packets that didn't arrive on a bridge interface have their + * nf_bridge->physindev set. */ +static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct net_device *realindev, *realoutdev; + struct sk_buff *skb = *pskb; + struct nf_bridge_info *nf_bridge; + struct vlan_ethhdr *hdr = vlan_eth_hdr(skb); + int pf; + + if (!skb->nf_bridge) + return NF_ACCEPT; + + if (skb->protocol == __constant_htons(ETH_P_IP) || IS_VLAN_IP) + pf = PF_INET; + else + pf = PF_INET6; + +#ifdef CONFIG_NETFILTER_DEBUG + /* Sometimes we get packets with NULL ->dst here (for example, + * running a dhcp client daemon triggers this). This should now + * be fixed, but let's keep the check around. */ + if (skb->dst == NULL) { + printk(KERN_CRIT "br_netfilter: skb->dst == NULL."); + return NF_ACCEPT; + } +#endif + + nf_bridge = skb->nf_bridge; + nf_bridge->physoutdev = skb->dev; + realindev = nf_bridge->physindev; + + /* Bridged, take PF_BRIDGE/FORWARD. + * (see big note in front of br_nf_pre_routing_finish) */ + if (nf_bridge->mask & BRNF_BRIDGED_DNAT) { + if (nf_bridge->mask & BRNF_PKT_TYPE) { + skb->pkt_type = PACKET_OTHERHOST; + nf_bridge->mask ^= BRNF_PKT_TYPE; + } + if (skb->protocol == __constant_htons(ETH_P_8021Q)) { + skb_push(skb, VLAN_HLEN); + skb->nh.raw -= VLAN_HLEN; + } + + NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, realindev, + skb->dev, br_forward_finish); + goto out; + } + realoutdev = bridge_parent(skb->dev); + +#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) + /* iptables should match -o br0.x */ + if (nf_bridge->netoutdev) + realoutdev = nf_bridge->netoutdev; +#endif + if (skb->protocol == __constant_htons(ETH_P_8021Q)) { + skb_pull(skb, VLAN_HLEN); + (*pskb)->nh.raw += VLAN_HLEN; + } + /* IP forwarded traffic has a physindev, locally + * generated traffic hasn't. */ + if (realindev != NULL) { + if (!(nf_bridge->mask & BRNF_DONT_TAKE_PARENT) && + has_bridge_parent(realindev)) + realindev = bridge_parent(realindev); + + NF_HOOK_THRESH(pf, NF_IP_FORWARD, skb, realindev, + realoutdev, br_nf_local_out_finish, + NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD + 1); + } else { +#ifdef CONFIG_NETFILTER_DEBUG + skb->nf_debug ^= (1 << NF_IP_LOCAL_OUT); +#endif + + NF_HOOK_THRESH(pf, NF_IP_LOCAL_OUT, skb, realindev, + realoutdev, br_nf_local_out_finish, + NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT + 1); + } + +out: + return NF_STOLEN; +} + + +/* PF_BRIDGE/POST_ROUTING ********************************************/ +static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct sk_buff *skb = *pskb; + struct nf_bridge_info *nf_bridge = (*pskb)->nf_bridge; + struct vlan_ethhdr *hdr = vlan_eth_hdr(skb); + struct net_device *realoutdev = bridge_parent(skb->dev); + int pf; + +#ifdef CONFIG_NETFILTER_DEBUG + /* Be very paranoid. This probably won't happen anymore, but let's + * keep the check just to be sure... */ + if (skb->mac.raw < skb->head || skb->mac.raw + ETH_HLEN > skb->data) { + printk(KERN_CRIT "br_netfilter: Argh!! br_nf_post_routing: " + "bad mac.raw pointer."); + goto print_error; + } +#endif + + if (!nf_bridge) + return NF_ACCEPT; + + if (skb->protocol == __constant_htons(ETH_P_IP) || IS_VLAN_IP) + pf = PF_INET; + else + pf = PF_INET6; + +#ifdef CONFIG_NETFILTER_DEBUG + if (skb->dst == NULL) { + printk(KERN_CRIT "br_netfilter: skb->dst == NULL."); + goto print_error; + } + + skb->nf_debug ^= (1 << NF_IP_POST_ROUTING); +#endif + + /* We assume any code from br_dev_queue_push_xmit onwards doesn't care + * about the value of skb->pkt_type. */ + if (skb->pkt_type == PACKET_OTHERHOST) { + skb->pkt_type = PACKET_HOST; + nf_bridge->mask |= BRNF_PKT_TYPE; + } + + if (skb->protocol == __constant_htons(ETH_P_8021Q)) { + skb_pull(skb, VLAN_HLEN); + skb->nh.raw += VLAN_HLEN; + } + + nf_bridge_save_header(skb); + +#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) + if (nf_bridge->netoutdev) + realoutdev = nf_bridge->netoutdev; +#endif + NF_HOOK(pf, NF_IP_POST_ROUTING, skb, NULL, realoutdev, + br_dev_queue_push_xmit); + + return NF_STOLEN; + +#ifdef CONFIG_NETFILTER_DEBUG +print_error: + if (skb->dev != NULL) { + printk("[%s]", skb->dev->name); + if (has_bridge_parent(skb->dev)) + printk("[%s]", bridge_parent(skb->dev)->name); + } + printk(" head:%p, raw:%p, data:%p\n", skb->head, skb->mac.raw, + skb->data); + return NF_ACCEPT; +#endif +} + + +/* IP/SABOTAGE *****************************************************/ +/* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING + * for the second time. */ +static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff **pskb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + if ((*pskb)->nf_bridge && + !((*pskb)->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) { + return NF_STOP; + } + + return NF_ACCEPT; +} + +/* Postpone execution of PF_INET(6)/FORWARD, PF_INET(6)/LOCAL_OUT + * and PF_INET(6)/POST_ROUTING until we have done the forwarding + * decision in the bridge code and have determined nf_bridge->physoutdev. */ +static unsigned int ip_sabotage_out(unsigned int hook, struct sk_buff **pskb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct sk_buff *skb = *pskb; + + if ((out->hard_start_xmit == br_dev_xmit && + okfn != br_nf_forward_finish && + okfn != br_nf_local_out_finish && + okfn != br_dev_queue_push_xmit) +#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) + || ((out->priv_flags & IFF_802_1Q_VLAN) && + VLAN_DEV_INFO(out)->real_dev->hard_start_xmit == br_dev_xmit) +#endif + ) { + struct nf_bridge_info *nf_bridge; + + if (!skb->nf_bridge) { +#ifdef CONFIG_SYSCTL + /* This code is executed while in the IP(v6) stack, + the version should be 4 or 6. We can't use + skb->protocol because that isn't set on + PF_INET(6)/LOCAL_OUT. */ + struct iphdr *ip = skb->nh.iph; + + if (ip->version == 4 && !brnf_call_iptables) + return NF_ACCEPT; + else if (ip->version == 6 && !brnf_call_ip6tables) + return NF_ACCEPT; +#endif + if (hook == NF_IP_POST_ROUTING) + return NF_ACCEPT; + if (!nf_bridge_alloc(skb)) + return NF_DROP; + } + + nf_bridge = skb->nf_bridge; + + /* This frame will arrive on PF_BRIDGE/LOCAL_OUT and we + * will need the indev then. For a brouter, the real indev + * can be a bridge port, so we make sure br_nf_local_out() + * doesn't use the bridge parent of the indev by using + * the BRNF_DONT_TAKE_PARENT mask. */ + if (hook == NF_IP_FORWARD && nf_bridge->physindev == NULL) { + nf_bridge->mask &= BRNF_DONT_TAKE_PARENT; + nf_bridge->physindev = (struct net_device *)in; + } +#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) + /* the iptables outdev is br0.x, not br0 */ + if (out->priv_flags & IFF_802_1Q_VLAN) + nf_bridge->netoutdev = (struct net_device *)out; +#endif + return NF_STOP; + } + + return NF_ACCEPT; +} + +/* For br_nf_local_out we need (prio = NF_BR_PRI_FIRST), to insure that innocent + * PF_BRIDGE/NF_BR_LOCAL_OUT functions don't get bridged traffic as input. + * For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because + * ip_refrag() can return NF_STOLEN. */ +static struct nf_hook_ops br_nf_ops[] = { + { .hook = br_nf_pre_routing, + .owner = THIS_MODULE, + .pf = PF_BRIDGE, + .hooknum = NF_BR_PRE_ROUTING, + .priority = NF_BR_PRI_BRNF, }, + { .hook = br_nf_local_in, + .owner = THIS_MODULE, + .pf = PF_BRIDGE, + .hooknum = NF_BR_LOCAL_IN, + .priority = NF_BR_PRI_BRNF, }, + { .hook = br_nf_forward_ip, + .owner = THIS_MODULE, + .pf = PF_BRIDGE, + .hooknum = NF_BR_FORWARD, + .priority = NF_BR_PRI_BRNF - 1, }, + { .hook = br_nf_forward_arp, + .owner = THIS_MODULE, + .pf = PF_BRIDGE, + .hooknum = NF_BR_FORWARD, + .priority = NF_BR_PRI_BRNF, }, + { .hook = br_nf_local_out, + .owner = THIS_MODULE, + .pf = PF_BRIDGE, + .hooknum = NF_BR_LOCAL_OUT, + .priority = NF_BR_PRI_FIRST, }, + { .hook = br_nf_post_routing, + .owner = THIS_MODULE, + .pf = PF_BRIDGE, + .hooknum = NF_BR_POST_ROUTING, + .priority = NF_BR_PRI_LAST, }, + { .hook = ip_sabotage_in, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_IP_PRE_ROUTING, + .priority = NF_IP_PRI_FIRST, }, + { .hook = ip_sabotage_in, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_IP6_PRE_ROUTING, + .priority = NF_IP6_PRI_FIRST, }, + { .hook = ip_sabotage_out, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_IP_FORWARD, + .priority = NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD, }, + { .hook = ip_sabotage_out, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_IP6_FORWARD, + .priority = NF_IP6_PRI_BRIDGE_SABOTAGE_FORWARD, }, + { .hook = ip_sabotage_out, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_IP_LOCAL_OUT, + .priority = NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT, }, + { .hook = ip_sabotage_out, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_IP6_LOCAL_OUT, + .priority = NF_IP6_PRI_BRIDGE_SABOTAGE_LOCAL_OUT, }, + { .hook = ip_sabotage_out, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_IP_POST_ROUTING, + .priority = NF_IP_PRI_FIRST, }, + { .hook = ip_sabotage_out, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_IP6_POST_ROUTING, + .priority = NF_IP6_PRI_FIRST, }, +}; + +#ifdef CONFIG_SYSCTL +static +int brnf_sysctl_call_tables(ctl_table *ctl, int write, struct file * filp, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int ret; + + ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); + + if (write && *(int *)(ctl->data)) + *(int *)(ctl->data) = 1; + return ret; +} + +static ctl_table brnf_table[] = { + { + .ctl_name = NET_BRIDGE_NF_CALL_ARPTABLES, + .procname = "bridge-nf-call-arptables", + .data = &brnf_call_arptables, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &brnf_sysctl_call_tables, + }, + { + .ctl_name = NET_BRIDGE_NF_CALL_IPTABLES, + .procname = "bridge-nf-call-iptables", + .data = &brnf_call_iptables, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &brnf_sysctl_call_tables, + }, + { + .ctl_name = NET_BRIDGE_NF_CALL_IP6TABLES, + .procname = "bridge-nf-call-ip6tables", + .data = &brnf_call_ip6tables, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &brnf_sysctl_call_tables, + }, + { + .ctl_name = NET_BRIDGE_NF_FILTER_VLAN_TAGGED, + .procname = "bridge-nf-filter-vlan-tagged", + .data = &brnf_filter_vlan_tagged, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &brnf_sysctl_call_tables, + }, + { .ctl_name = 0 } +}; + +static ctl_table brnf_bridge_table[] = { + { + .ctl_name = NET_BRIDGE, + .procname = "bridge", + .mode = 0555, + .child = brnf_table, + }, + { .ctl_name = 0 } +}; + +static ctl_table brnf_net_table[] = { + { + .ctl_name = CTL_NET, + .procname = "net", + .mode = 0555, + .child = brnf_bridge_table, + }, + { .ctl_name = 0 } +}; +#endif + +int br_netfilter_init(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(br_nf_ops); i++) { + int ret; + + if ((ret = nf_register_hook(&br_nf_ops[i])) >= 0) + continue; + + while (i--) + nf_unregister_hook(&br_nf_ops[i]); + + return ret; + } + +#ifdef CONFIG_SYSCTL + brnf_sysctl_header = register_sysctl_table(brnf_net_table, 0); + if (brnf_sysctl_header == NULL) { + printk(KERN_WARNING "br_netfilter: can't register to sysctl.\n"); + for (i = 0; i < ARRAY_SIZE(br_nf_ops); i++) + nf_unregister_hook(&br_nf_ops[i]); + return -EFAULT; + } +#endif + + printk(KERN_NOTICE "Bridge firewalling registered\n"); + + return 0; +} + +void br_netfilter_fini(void) +{ + int i; + + for (i = ARRAY_SIZE(br_nf_ops) - 1; i >= 0; i--) + nf_unregister_hook(&br_nf_ops[i]); +#ifdef CONFIG_SYSCTL + unregister_sysctl_table(brnf_sysctl_header); +#endif +} diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c new file mode 100644 index 000000000000..f8fb49e34764 --- /dev/null +++ b/net/bridge/br_notify.c @@ -0,0 +1,87 @@ +/* + * Device event handling + * Linux ethernet bridge + * + * Authors: + * Lennert Buytenhek <buytenh@gnu.org> + * + * $Id: br_notify.c,v 1.2 2000/02/21 15:51:34 davem Exp $ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> + +#include "br_private.h" + +static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr); + +struct notifier_block br_device_notifier = { + .notifier_call = br_device_event +}; + +/* + * Handle changes in state of network devices enslaved to a bridge. + * + * Note: don't care about up/down if bridge itself is down, because + * port state is checked when bridge is brought up. + */ +static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr) +{ + struct net_device *dev = ptr; + struct net_bridge_port *p = dev->br_port; + struct net_bridge *br; + + /* not a port of a bridge */ + if (p == NULL) + return NOTIFY_DONE; + + br = p->br; + + spin_lock_bh(&br->lock); + switch (event) { + case NETDEV_CHANGEMTU: + dev_set_mtu(br->dev, br_min_mtu(br)); + break; + + case NETDEV_CHANGEADDR: + br_fdb_changeaddr(p, dev->dev_addr); + br_stp_recalculate_bridge_id(br); + break; + + case NETDEV_CHANGE: /* device is up but carrier changed */ + if (!(br->dev->flags & IFF_UP)) + break; + + if (netif_carrier_ok(dev)) { + if (p->state == BR_STATE_DISABLED) + br_stp_enable_port(p); + } else { + if (p->state != BR_STATE_DISABLED) + br_stp_disable_port(p); + } + break; + + case NETDEV_DOWN: + if (br->dev->flags & IFF_UP) + br_stp_disable_port(p); + break; + + case NETDEV_UP: + if (netif_carrier_ok(dev) && (br->dev->flags & IFF_UP)) + br_stp_enable_port(p); + break; + + case NETDEV_UNREGISTER: + spin_unlock_bh(&br->lock); + br_del_if(br, dev); + goto done; + } + spin_unlock_bh(&br->lock); + + done: + return NOTIFY_DONE; +} diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h new file mode 100644 index 000000000000..54d63f1372a0 --- /dev/null +++ b/net/bridge/br_private.h @@ -0,0 +1,244 @@ +/* + * Linux ethernet bridge + * + * Authors: + * Lennert Buytenhek <buytenh@gnu.org> + * + * $Id: br_private.h,v 1.7 2001/12/24 00:59:55 davem Exp $ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _BR_PRIVATE_H +#define _BR_PRIVATE_H + +#include <linux/netdevice.h> +#include <linux/miscdevice.h> +#include <linux/if_bridge.h> + +#define BR_HASH_BITS 8 +#define BR_HASH_SIZE (1 << BR_HASH_BITS) + +#define BR_HOLD_TIME (1*HZ) + +#define BR_PORT_BITS 10 +#define BR_MAX_PORTS (1<<BR_PORT_BITS) + +typedef struct bridge_id bridge_id; +typedef struct mac_addr mac_addr; +typedef __u16 port_id; + +struct bridge_id +{ + unsigned char prio[2]; + unsigned char addr[6]; +}; + +struct mac_addr +{ + unsigned char addr[6]; +}; + +struct net_bridge_fdb_entry +{ + struct hlist_node hlist; + struct net_bridge_port *dst; + + struct rcu_head rcu; + atomic_t use_count; + unsigned long ageing_timer; + mac_addr addr; + unsigned char is_local; + unsigned char is_static; +}; + +struct net_bridge_port +{ + struct net_bridge *br; + struct net_device *dev; + struct list_head list; + + /* STP */ + u8 priority; + u8 state; + u16 port_no; + unsigned char topology_change_ack; + unsigned char config_pending; + port_id port_id; + port_id designated_port; + bridge_id designated_root; + bridge_id designated_bridge; + u32 path_cost; + u32 designated_cost; + + struct timer_list forward_delay_timer; + struct timer_list hold_timer; + struct timer_list message_age_timer; + struct kobject kobj; + struct rcu_head rcu; +}; + +struct net_bridge +{ + spinlock_t lock; + struct list_head port_list; + struct net_device *dev; + struct net_device_stats statistics; + spinlock_t hash_lock; + struct hlist_head hash[BR_HASH_SIZE]; + struct list_head age_list; + + /* STP */ + bridge_id designated_root; + bridge_id bridge_id; + u32 root_path_cost; + unsigned long max_age; + unsigned long hello_time; + unsigned long forward_delay; + unsigned long bridge_max_age; + unsigned long ageing_time; + unsigned long bridge_hello_time; + unsigned long bridge_forward_delay; + + u16 root_port; + unsigned char stp_enabled; + unsigned char topology_change; + unsigned char topology_change_detected; + + struct timer_list hello_timer; + struct timer_list tcn_timer; + struct timer_list topology_change_timer; + struct timer_list gc_timer; + struct kobject ifobj; +}; + +extern struct notifier_block br_device_notifier; +extern const unsigned char bridge_ula[6]; + +/* called under bridge lock */ +static inline int br_is_root_bridge(const struct net_bridge *br) +{ + return !memcmp(&br->bridge_id, &br->designated_root, 8); +} + + +/* br_device.c */ +extern void br_dev_setup(struct net_device *dev); +extern int br_dev_xmit(struct sk_buff *skb, struct net_device *dev); + +/* br_fdb.c */ +extern void br_fdb_init(void); +extern void br_fdb_fini(void); +extern void br_fdb_changeaddr(struct net_bridge_port *p, + const unsigned char *newaddr); +extern void br_fdb_cleanup(unsigned long arg); +extern void br_fdb_delete_by_port(struct net_bridge *br, + struct net_bridge_port *p); +extern struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br, + const unsigned char *addr); +extern struct net_bridge_fdb_entry *br_fdb_get(struct net_bridge *br, + unsigned char *addr); +extern void br_fdb_put(struct net_bridge_fdb_entry *ent); +extern int br_fdb_fillbuf(struct net_bridge *br, void *buf, + unsigned long count, unsigned long off); +extern int br_fdb_insert(struct net_bridge *br, + struct net_bridge_port *source, + const unsigned char *addr); +extern void br_fdb_update(struct net_bridge *br, + struct net_bridge_port *source, + const unsigned char *addr); + +/* br_forward.c */ +extern void br_deliver(const struct net_bridge_port *to, + struct sk_buff *skb); +extern int br_dev_queue_push_xmit(struct sk_buff *skb); +extern void br_forward(const struct net_bridge_port *to, + struct sk_buff *skb); +extern int br_forward_finish(struct sk_buff *skb); +extern void br_flood_deliver(struct net_bridge *br, + struct sk_buff *skb, + int clone); +extern void br_flood_forward(struct net_bridge *br, + struct sk_buff *skb, + int clone); + +/* br_if.c */ +extern int br_add_bridge(const char *name); +extern int br_del_bridge(const char *name); +extern void br_cleanup_bridges(void); +extern int br_add_if(struct net_bridge *br, + struct net_device *dev); +extern int br_del_if(struct net_bridge *br, + struct net_device *dev); +extern int br_min_mtu(const struct net_bridge *br); + +/* br_input.c */ +extern int br_handle_frame_finish(struct sk_buff *skb); +extern int br_handle_frame(struct net_bridge_port *p, struct sk_buff **pskb); + +/* br_ioctl.c */ +extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); +extern int br_ioctl_deviceless_stub(unsigned int cmd, void __user *arg); + +/* br_netfilter.c */ +extern int br_netfilter_init(void); +extern void br_netfilter_fini(void); + +/* br_stp.c */ +extern void br_log_state(const struct net_bridge_port *p); +extern struct net_bridge_port *br_get_port(struct net_bridge *br, + u16 port_no); +extern void br_init_port(struct net_bridge_port *p); +extern void br_become_designated_port(struct net_bridge_port *p); + +/* br_stp_if.c */ +extern void br_stp_enable_bridge(struct net_bridge *br); +extern void br_stp_disable_bridge(struct net_bridge *br); +extern void br_stp_enable_port(struct net_bridge_port *p); +extern void br_stp_disable_port(struct net_bridge_port *p); +extern void br_stp_recalculate_bridge_id(struct net_bridge *br); +extern void br_stp_set_bridge_priority(struct net_bridge *br, + u16 newprio); +extern void br_stp_set_port_priority(struct net_bridge_port *p, + u8 newprio); +extern void br_stp_set_path_cost(struct net_bridge_port *p, + u32 path_cost); +extern ssize_t br_show_bridge_id(char *buf, const struct bridge_id *id); + +/* br_stp_bpdu.c */ +extern int br_stp_handle_bpdu(struct sk_buff *skb); + +/* br_stp_timer.c */ +extern void br_stp_timer_init(struct net_bridge *br); +extern void br_stp_port_timer_init(struct net_bridge_port *p); +extern unsigned long br_timer_value(const struct timer_list *timer); + +/* br.c */ +extern struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br, + unsigned char *addr); +extern void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent); + + +#ifdef CONFIG_SYSFS +/* br_sysfs_if.c */ +extern int br_sysfs_addif(struct net_bridge_port *p); +extern void br_sysfs_removeif(struct net_bridge_port *p); +extern void br_sysfs_freeif(struct net_bridge_port *p); + +/* br_sysfs_br.c */ +extern int br_sysfs_addbr(struct net_device *dev); +extern void br_sysfs_delbr(struct net_device *dev); + +#else + +#define br_sysfs_addif(p) (0) +#define br_sysfs_removeif(p) do { } while(0) +#define br_sysfs_freeif(p) kfree(p) +#define br_sysfs_addbr(dev) (0) +#define br_sysfs_delbr(dev) do { } while(0) +#endif /* CONFIG_SYSFS */ + +#endif diff --git a/net/bridge/br_private_stp.h b/net/bridge/br_private_stp.h new file mode 100644 index 000000000000..e29f01ac1adf --- /dev/null +++ b/net/bridge/br_private_stp.h @@ -0,0 +1,58 @@ +/* + * Linux ethernet bridge + * + * Authors: + * Lennert Buytenhek <buytenh@gnu.org> + * + * $Id: br_private_stp.h,v 1.3 2001/02/05 06:03:47 davem Exp $ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _BR_PRIVATE_STP_H +#define _BR_PRIVATE_STP_H + +#define BPDU_TYPE_CONFIG 0 +#define BPDU_TYPE_TCN 0x80 + +struct br_config_bpdu +{ + unsigned topology_change:1; + unsigned topology_change_ack:1; + bridge_id root; + int root_path_cost; + bridge_id bridge_id; + port_id port_id; + int message_age; + int max_age; + int hello_time; + int forward_delay; +}; + +/* called under bridge lock */ +static inline int br_is_designated_port(const struct net_bridge_port *p) +{ + return !memcmp(&p->designated_bridge, &p->br->bridge_id, 8) && + (p->designated_port == p->port_id); +} + + +/* br_stp.c */ +extern void br_become_root_bridge(struct net_bridge *br); +extern void br_config_bpdu_generation(struct net_bridge *); +extern void br_configuration_update(struct net_bridge *); +extern void br_port_state_selection(struct net_bridge *); +extern void br_received_config_bpdu(struct net_bridge_port *p, struct br_config_bpdu *bpdu); +extern void br_received_tcn_bpdu(struct net_bridge_port *p); +extern void br_transmit_config(struct net_bridge_port *p); +extern void br_transmit_tcn(struct net_bridge *br); +extern void br_topology_change_detection(struct net_bridge *br); + +/* br_stp_bpdu.c */ +extern void br_send_config_bpdu(struct net_bridge_port *, struct br_config_bpdu *); +extern void br_send_tcn_bpdu(struct net_bridge_port *); + +#endif diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c new file mode 100644 index 000000000000..04ca0639a95a --- /dev/null +++ b/net/bridge/br_stp.c @@ -0,0 +1,459 @@ +/* + * Spanning tree protocol; generic parts + * Linux ethernet bridge + * + * Authors: + * Lennert Buytenhek <buytenh@gnu.org> + * + * $Id: br_stp.c,v 1.4 2000/06/19 10:13:35 davem Exp $ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/kernel.h> +#include <linux/smp_lock.h> + +#include "br_private.h" +#include "br_private_stp.h" + +/* since time values in bpdu are in jiffies and then scaled (1/256) + * before sending, make sure that is at least one. + */ +#define MESSAGE_AGE_INCR ((HZ < 256) ? 1 : (HZ/256)) + +static const char *br_port_state_names[] = { + [BR_STATE_DISABLED] = "disabled", + [BR_STATE_LISTENING] = "listening", + [BR_STATE_LEARNING] = "learning", + [BR_STATE_FORWARDING] = "forwarding", + [BR_STATE_BLOCKING] = "blocking", +}; + +void br_log_state(const struct net_bridge_port *p) +{ + pr_info("%s: port %d(%s) entering %s state\n", + p->br->dev->name, p->port_no, p->dev->name, + br_port_state_names[p->state]); + +} + +/* called under bridge lock */ +struct net_bridge_port *br_get_port(struct net_bridge *br, u16 port_no) +{ + struct net_bridge_port *p; + + list_for_each_entry_rcu(p, &br->port_list, list) { + if (p->port_no == port_no) + return p; + } + + return NULL; +} + +/* called under bridge lock */ +static int br_should_become_root_port(const struct net_bridge_port *p, + u16 root_port) +{ + struct net_bridge *br; + struct net_bridge_port *rp; + int t; + + br = p->br; + if (p->state == BR_STATE_DISABLED || + br_is_designated_port(p)) + return 0; + + if (memcmp(&br->bridge_id, &p->designated_root, 8) <= 0) + return 0; + + if (!root_port) + return 1; + + rp = br_get_port(br, root_port); + + t = memcmp(&p->designated_root, &rp->designated_root, 8); + if (t < 0) + return 1; + else if (t > 0) + return 0; + + if (p->designated_cost + p->path_cost < + rp->designated_cost + rp->path_cost) + return 1; + else if (p->designated_cost + p->path_cost > + rp->designated_cost + rp->path_cost) + return 0; + + t = memcmp(&p->designated_bridge, &rp->designated_bridge, 8); + if (t < 0) + return 1; + else if (t > 0) + return 0; + + if (p->designated_port < rp->designated_port) + return 1; + else if (p->designated_port > rp->designated_port) + return 0; + + if (p->port_id < rp->port_id) + return 1; + + return 0; +} + +/* called under bridge lock */ +static void br_root_selection(struct net_bridge *br) +{ + struct net_bridge_port *p; + u16 root_port = 0; + + list_for_each_entry(p, &br->port_list, list) { + if (br_should_become_root_port(p, root_port)) + root_port = p->port_no; + + } + + br->root_port = root_port; + + if (!root_port) { + br->designated_root = br->bridge_id; + br->root_path_cost = 0; + } else { + p = br_get_port(br, root_port); + br->designated_root = p->designated_root; + br->root_path_cost = p->designated_cost + p->path_cost; + } +} + +/* called under bridge lock */ +void br_become_root_bridge(struct net_bridge *br) +{ + br->max_age = br->bridge_max_age; + br->hello_time = br->bridge_hello_time; + br->forward_delay = br->bridge_forward_delay; + br_topology_change_detection(br); + del_timer(&br->tcn_timer); + + if (br->dev->flags & IFF_UP) { + br_config_bpdu_generation(br); + mod_timer(&br->hello_timer, jiffies + br->hello_time); + } +} + +/* called under bridge lock */ +void br_transmit_config(struct net_bridge_port *p) +{ + struct br_config_bpdu bpdu; + struct net_bridge *br; + + + if (timer_pending(&p->hold_timer)) { + p->config_pending = 1; + return; + } + + br = p->br; + + bpdu.topology_change = br->topology_change; + bpdu.topology_change_ack = p->topology_change_ack; + bpdu.root = br->designated_root; + bpdu.root_path_cost = br->root_path_cost; + bpdu.bridge_id = br->bridge_id; + bpdu.port_id = p->port_id; + if (br_is_root_bridge(br)) + bpdu.message_age = 0; + else { + struct net_bridge_port *root + = br_get_port(br, br->root_port); + bpdu.message_age = br->max_age + - (root->message_age_timer.expires - jiffies) + + MESSAGE_AGE_INCR; + } + bpdu.max_age = br->max_age; + bpdu.hello_time = br->hello_time; + bpdu.forward_delay = br->forward_delay; + + if (bpdu.message_age < br->max_age) { + br_send_config_bpdu(p, &bpdu); + p->topology_change_ack = 0; + p->config_pending = 0; + mod_timer(&p->hold_timer, jiffies + BR_HOLD_TIME); + } +} + +/* called under bridge lock */ +static inline void br_record_config_information(struct net_bridge_port *p, + const struct br_config_bpdu *bpdu) +{ + p->designated_root = bpdu->root; + p->designated_cost = bpdu->root_path_cost; + p->designated_bridge = bpdu->bridge_id; + p->designated_port = bpdu->port_id; + + mod_timer(&p->message_age_timer, jiffies + + (p->br->max_age - bpdu->message_age)); +} + +/* called under bridge lock */ +static inline void br_record_config_timeout_values(struct net_bridge *br, + const struct br_config_bpdu *bpdu) +{ + br->max_age = bpdu->max_age; + br->hello_time = bpdu->hello_time; + br->forward_delay = bpdu->forward_delay; + br->topology_change = bpdu->topology_change; +} + +/* called under bridge lock */ +void br_transmit_tcn(struct net_bridge *br) +{ + br_send_tcn_bpdu(br_get_port(br, br->root_port)); +} + +/* called under bridge lock */ +static int br_should_become_designated_port(const struct net_bridge_port *p) +{ + struct net_bridge *br; + int t; + + br = p->br; + if (br_is_designated_port(p)) + return 1; + + if (memcmp(&p->designated_root, &br->designated_root, 8)) + return 1; + + if (br->root_path_cost < p->designated_cost) + return 1; + else if (br->root_path_cost > p->designated_cost) + return 0; + + t = memcmp(&br->bridge_id, &p->designated_bridge, 8); + if (t < 0) + return 1; + else if (t > 0) + return 0; + + if (p->port_id < p->designated_port) + return 1; + + return 0; +} + +/* called under bridge lock */ +static void br_designated_port_selection(struct net_bridge *br) +{ + struct net_bridge_port *p; + + list_for_each_entry(p, &br->port_list, list) { + if (p->state != BR_STATE_DISABLED && + br_should_become_designated_port(p)) + br_become_designated_port(p); + + } +} + +/* called under bridge lock */ +static int br_supersedes_port_info(struct net_bridge_port *p, struct br_config_bpdu *bpdu) +{ + int t; + + t = memcmp(&bpdu->root, &p->designated_root, 8); + if (t < 0) + return 1; + else if (t > 0) + return 0; + + if (bpdu->root_path_cost < p->designated_cost) + return 1; + else if (bpdu->root_path_cost > p->designated_cost) + return 0; + + t = memcmp(&bpdu->bridge_id, &p->designated_bridge, 8); + if (t < 0) + return 1; + else if (t > 0) + return 0; + + if (memcmp(&bpdu->bridge_id, &p->br->bridge_id, 8)) + return 1; + + if (bpdu->port_id <= p->designated_port) + return 1; + + return 0; +} + +/* called under bridge lock */ +static inline void br_topology_change_acknowledged(struct net_bridge *br) +{ + br->topology_change_detected = 0; + del_timer(&br->tcn_timer); +} + +/* called under bridge lock */ +void br_topology_change_detection(struct net_bridge *br) +{ + int isroot = br_is_root_bridge(br); + + pr_info("%s: topology change detected, %s\n", br->dev->name, + isroot ? "propagating" : "sending tcn bpdu"); + + if (isroot) { + br->topology_change = 1; + mod_timer(&br->topology_change_timer, jiffies + + br->bridge_forward_delay + br->bridge_max_age); + } else if (!br->topology_change_detected) { + br_transmit_tcn(br); + mod_timer(&br->tcn_timer, jiffies + br->bridge_hello_time); + } + + br->topology_change_detected = 1; +} + +/* called under bridge lock */ +void br_config_bpdu_generation(struct net_bridge *br) +{ + struct net_bridge_port *p; + + list_for_each_entry(p, &br->port_list, list) { + if (p->state != BR_STATE_DISABLED && + br_is_designated_port(p)) + br_transmit_config(p); + } +} + +/* called under bridge lock */ +static inline void br_reply(struct net_bridge_port *p) +{ + br_transmit_config(p); +} + +/* called under bridge lock */ +void br_configuration_update(struct net_bridge *br) +{ + br_root_selection(br); + br_designated_port_selection(br); +} + +/* called under bridge lock */ +void br_become_designated_port(struct net_bridge_port *p) +{ + struct net_bridge *br; + + br = p->br; + p->designated_root = br->designated_root; + p->designated_cost = br->root_path_cost; + p->designated_bridge = br->bridge_id; + p->designated_port = p->port_id; +} + + +/* called under bridge lock */ +static void br_make_blocking(struct net_bridge_port *p) +{ + if (p->state != BR_STATE_DISABLED && + p->state != BR_STATE_BLOCKING) { + if (p->state == BR_STATE_FORWARDING || + p->state == BR_STATE_LEARNING) + br_topology_change_detection(p->br); + + p->state = BR_STATE_BLOCKING; + br_log_state(p); + del_timer(&p->forward_delay_timer); + } +} + +/* called under bridge lock */ +static void br_make_forwarding(struct net_bridge_port *p) +{ + if (p->state == BR_STATE_BLOCKING) { + if (p->br->stp_enabled) { + p->state = BR_STATE_LISTENING; + } else { + p->state = BR_STATE_LEARNING; + } + br_log_state(p); + mod_timer(&p->forward_delay_timer, jiffies + p->br->forward_delay); } +} + +/* called under bridge lock */ +void br_port_state_selection(struct net_bridge *br) +{ + struct net_bridge_port *p; + + list_for_each_entry(p, &br->port_list, list) { + if (p->state != BR_STATE_DISABLED) { + if (p->port_no == br->root_port) { + p->config_pending = 0; + p->topology_change_ack = 0; + br_make_forwarding(p); + } else if (br_is_designated_port(p)) { + del_timer(&p->message_age_timer); + br_make_forwarding(p); + } else { + p->config_pending = 0; + p->topology_change_ack = 0; + br_make_blocking(p); + } + } + + } +} + +/* called under bridge lock */ +static inline void br_topology_change_acknowledge(struct net_bridge_port *p) +{ + p->topology_change_ack = 1; + br_transmit_config(p); +} + +/* called under bridge lock */ +void br_received_config_bpdu(struct net_bridge_port *p, struct br_config_bpdu *bpdu) +{ + struct net_bridge *br; + int was_root; + + br = p->br; + was_root = br_is_root_bridge(br); + + if (br_supersedes_port_info(p, bpdu)) { + br_record_config_information(p, bpdu); + br_configuration_update(br); + br_port_state_selection(br); + + if (!br_is_root_bridge(br) && was_root) { + del_timer(&br->hello_timer); + if (br->topology_change_detected) { + del_timer(&br->topology_change_timer); + br_transmit_tcn(br); + + mod_timer(&br->tcn_timer, + jiffies + br->bridge_hello_time); + } + } + + if (p->port_no == br->root_port) { + br_record_config_timeout_values(br, bpdu); + br_config_bpdu_generation(br); + if (bpdu->topology_change_ack) + br_topology_change_acknowledged(br); + } + } else if (br_is_designated_port(p)) { + br_reply(p); + } +} + +/* called under bridge lock */ +void br_received_tcn_bpdu(struct net_bridge_port *p) +{ + if (br_is_designated_port(p)) { + pr_info("%s: received tcn bpdu on port %i(%s)\n", + p->br->dev->name, p->port_no, p->dev->name); + + br_topology_change_detection(p->br); + br_topology_change_acknowledge(p); + } +} diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c new file mode 100644 index 000000000000..b91a875aca01 --- /dev/null +++ b/net/bridge/br_stp_bpdu.c @@ -0,0 +1,205 @@ +/* + * Spanning tree protocol; BPDU handling + * Linux ethernet bridge + * + * Authors: + * Lennert Buytenhek <buytenh@gnu.org> + * + * $Id: br_stp_bpdu.c,v 1.3 2001/11/10 02:35:25 davem Exp $ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/netfilter_bridge.h> + +#include "br_private.h" +#include "br_private_stp.h" + +#define JIFFIES_TO_TICKS(j) (((j) << 8) / HZ) +#define TICKS_TO_JIFFIES(j) (((j) * HZ) >> 8) + +static void br_send_bpdu(struct net_bridge_port *p, unsigned char *data, int length) +{ + struct net_device *dev; + struct sk_buff *skb; + int size; + + if (!p->br->stp_enabled) + return; + + size = length + 2*ETH_ALEN + 2; + if (size < 60) + size = 60; + + dev = p->dev; + + if ((skb = dev_alloc_skb(size)) == NULL) { + printk(KERN_INFO "br: memory squeeze!\n"); + return; + } + + skb->dev = dev; + skb->protocol = htons(ETH_P_802_2); + skb->mac.raw = skb_put(skb, size); + memcpy(skb->mac.raw, bridge_ula, ETH_ALEN); + memcpy(skb->mac.raw+ETH_ALEN, dev->dev_addr, ETH_ALEN); + skb->mac.raw[2*ETH_ALEN] = 0; + skb->mac.raw[2*ETH_ALEN+1] = length; + skb->nh.raw = skb->mac.raw + 2*ETH_ALEN + 2; + memcpy(skb->nh.raw, data, length); + memset(skb->nh.raw + length, 0xa5, size - length - 2*ETH_ALEN - 2); + + NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, + dev_queue_xmit); +} + +static __inline__ void br_set_ticks(unsigned char *dest, int jiff) +{ + __u16 ticks; + + ticks = JIFFIES_TO_TICKS(jiff); + dest[0] = (ticks >> 8) & 0xFF; + dest[1] = ticks & 0xFF; +} + +static __inline__ int br_get_ticks(unsigned char *dest) +{ + return TICKS_TO_JIFFIES((dest[0] << 8) | dest[1]); +} + +/* called under bridge lock */ +void br_send_config_bpdu(struct net_bridge_port *p, struct br_config_bpdu *bpdu) +{ + unsigned char buf[38]; + + buf[0] = 0x42; + buf[1] = 0x42; + buf[2] = 0x03; + buf[3] = 0; + buf[4] = 0; + buf[5] = 0; + buf[6] = BPDU_TYPE_CONFIG; + buf[7] = (bpdu->topology_change ? 0x01 : 0) | + (bpdu->topology_change_ack ? 0x80 : 0); + buf[8] = bpdu->root.prio[0]; + buf[9] = bpdu->root.prio[1]; + buf[10] = bpdu->root.addr[0]; + buf[11] = bpdu->root.addr[1]; + buf[12] = bpdu->root.addr[2]; + buf[13] = bpdu->root.addr[3]; + buf[14] = bpdu->root.addr[4]; + buf[15] = bpdu->root.addr[5]; + buf[16] = (bpdu->root_path_cost >> 24) & 0xFF; + buf[17] = (bpdu->root_path_cost >> 16) & 0xFF; + buf[18] = (bpdu->root_path_cost >> 8) & 0xFF; + buf[19] = bpdu->root_path_cost & 0xFF; + buf[20] = bpdu->bridge_id.prio[0]; + buf[21] = bpdu->bridge_id.prio[1]; + buf[22] = bpdu->bridge_id.addr[0]; + buf[23] = bpdu->bridge_id.addr[1]; + buf[24] = bpdu->bridge_id.addr[2]; + buf[25] = bpdu->bridge_id.addr[3]; + buf[26] = bpdu->bridge_id.addr[4]; + buf[27] = bpdu->bridge_id.addr[5]; + buf[28] = (bpdu->port_id >> 8) & 0xFF; + buf[29] = bpdu->port_id & 0xFF; + + br_set_ticks(buf+30, bpdu->message_age); + br_set_ticks(buf+32, bpdu->max_age); + br_set_ticks(buf+34, bpdu->hello_time); + br_set_ticks(buf+36, bpdu->forward_delay); + + br_send_bpdu(p, buf, 38); +} + +/* called under bridge lock */ +void br_send_tcn_bpdu(struct net_bridge_port *p) +{ + unsigned char buf[7]; + + buf[0] = 0x42; + buf[1] = 0x42; + buf[2] = 0x03; + buf[3] = 0; + buf[4] = 0; + buf[5] = 0; + buf[6] = BPDU_TYPE_TCN; + br_send_bpdu(p, buf, 7); +} + +static const unsigned char header[6] = {0x42, 0x42, 0x03, 0x00, 0x00, 0x00}; + +/* NO locks */ +int br_stp_handle_bpdu(struct sk_buff *skb) +{ + struct net_bridge_port *p = skb->dev->br_port; + struct net_bridge *br = p->br; + unsigned char *buf; + + /* need at least the 802 and STP headers */ + if (!pskb_may_pull(skb, sizeof(header)+1) || + memcmp(skb->data, header, sizeof(header))) + goto err; + + buf = skb_pull(skb, sizeof(header)); + + spin_lock_bh(&br->lock); + if (p->state == BR_STATE_DISABLED + || !(br->dev->flags & IFF_UP) + || !br->stp_enabled) + goto out; + + if (buf[0] == BPDU_TYPE_CONFIG) { + struct br_config_bpdu bpdu; + + if (!pskb_may_pull(skb, 32)) + goto out; + + buf = skb->data; + bpdu.topology_change = (buf[1] & 0x01) ? 1 : 0; + bpdu.topology_change_ack = (buf[1] & 0x80) ? 1 : 0; + + bpdu.root.prio[0] = buf[2]; + bpdu.root.prio[1] = buf[3]; + bpdu.root.addr[0] = buf[4]; + bpdu.root.addr[1] = buf[5]; + bpdu.root.addr[2] = buf[6]; + bpdu.root.addr[3] = buf[7]; + bpdu.root.addr[4] = buf[8]; + bpdu.root.addr[5] = buf[9]; + bpdu.root_path_cost = + (buf[10] << 24) | + (buf[11] << 16) | + (buf[12] << 8) | + buf[13]; + bpdu.bridge_id.prio[0] = buf[14]; + bpdu.bridge_id.prio[1] = buf[15]; + bpdu.bridge_id.addr[0] = buf[16]; + bpdu.bridge_id.addr[1] = buf[17]; + bpdu.bridge_id.addr[2] = buf[18]; + bpdu.bridge_id.addr[3] = buf[19]; + bpdu.bridge_id.addr[4] = buf[20]; + bpdu.bridge_id.addr[5] = buf[21]; + bpdu.port_id = (buf[22] << 8) | buf[23]; + + bpdu.message_age = br_get_ticks(buf+24); + bpdu.max_age = br_get_ticks(buf+26); + bpdu.hello_time = br_get_ticks(buf+28); + bpdu.forward_delay = br_get_ticks(buf+30); + + br_received_config_bpdu(p, &bpdu); + } + + else if (buf[0] == BPDU_TYPE_TCN) { + br_received_tcn_bpdu(p); + } + out: + spin_unlock_bh(&br->lock); + err: + kfree_skb(skb); + return 0; +} diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c new file mode 100644 index 000000000000..0da11ff05fa3 --- /dev/null +++ b/net/bridge/br_stp_if.c @@ -0,0 +1,225 @@ +/* + * Spanning tree protocol; interface code + * Linux ethernet bridge + * + * Authors: + * Lennert Buytenhek <buytenh@gnu.org> + * + * $Id: br_stp_if.c,v 1.4 2001/04/14 21:14:39 davem Exp $ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/smp_lock.h> + +#include "br_private.h" +#include "br_private_stp.h" + + +/* Port id is composed of priority and port number. + * NB: least significant bits of priority are dropped to + * make room for more ports. + */ +static inline port_id br_make_port_id(__u8 priority, __u16 port_no) +{ + return ((u16)priority << BR_PORT_BITS) + | (port_no & ((1<<BR_PORT_BITS)-1)); +} + +/* called under bridge lock */ +void br_init_port(struct net_bridge_port *p) +{ + p->port_id = br_make_port_id(p->priority, p->port_no); + br_become_designated_port(p); + p->state = BR_STATE_BLOCKING; + p->topology_change_ack = 0; + p->config_pending = 0; + + br_stp_port_timer_init(p); +} + +/* called under bridge lock */ +void br_stp_enable_bridge(struct net_bridge *br) +{ + struct net_bridge_port *p; + + spin_lock_bh(&br->lock); + mod_timer(&br->hello_timer, jiffies + br->hello_time); + mod_timer(&br->gc_timer, jiffies + HZ/10); + + br_config_bpdu_generation(br); + + list_for_each_entry(p, &br->port_list, list) { + if ((p->dev->flags & IFF_UP) && netif_carrier_ok(p->dev)) + br_stp_enable_port(p); + + } + spin_unlock_bh(&br->lock); +} + +/* NO locks held */ +void br_stp_disable_bridge(struct net_bridge *br) +{ + struct net_bridge_port *p; + + spin_lock(&br->lock); + list_for_each_entry(p, &br->port_list, list) { + if (p->state != BR_STATE_DISABLED) + br_stp_disable_port(p); + + } + + br->topology_change = 0; + br->topology_change_detected = 0; + spin_unlock(&br->lock); + + del_timer_sync(&br->hello_timer); + del_timer_sync(&br->topology_change_timer); + del_timer_sync(&br->tcn_timer); + del_timer_sync(&br->gc_timer); +} + +/* called under bridge lock */ +void br_stp_enable_port(struct net_bridge_port *p) +{ + br_init_port(p); + br_port_state_selection(p->br); +} + +/* called under bridge lock */ +void br_stp_disable_port(struct net_bridge_port *p) +{ + struct net_bridge *br; + int wasroot; + + br = p->br; + printk(KERN_INFO "%s: port %i(%s) entering %s state\n", + br->dev->name, p->port_no, p->dev->name, "disabled"); + + wasroot = br_is_root_bridge(br); + br_become_designated_port(p); + p->state = BR_STATE_DISABLED; + p->topology_change_ack = 0; + p->config_pending = 0; + + del_timer(&p->message_age_timer); + del_timer(&p->forward_delay_timer); + del_timer(&p->hold_timer); + + br_configuration_update(br); + + br_port_state_selection(br); + + if (br_is_root_bridge(br) && !wasroot) + br_become_root_bridge(br); +} + +/* called under bridge lock */ +static void br_stp_change_bridge_id(struct net_bridge *br, + const unsigned char *addr) +{ + unsigned char oldaddr[6]; + struct net_bridge_port *p; + int wasroot; + + wasroot = br_is_root_bridge(br); + + memcpy(oldaddr, br->bridge_id.addr, ETH_ALEN); + memcpy(br->bridge_id.addr, addr, ETH_ALEN); + memcpy(br->dev->dev_addr, addr, ETH_ALEN); + + list_for_each_entry(p, &br->port_list, list) { + if (!memcmp(p->designated_bridge.addr, oldaddr, ETH_ALEN)) + memcpy(p->designated_bridge.addr, addr, ETH_ALEN); + + if (!memcmp(p->designated_root.addr, oldaddr, ETH_ALEN)) + memcpy(p->designated_root.addr, addr, ETH_ALEN); + + } + + br_configuration_update(br); + br_port_state_selection(br); + if (br_is_root_bridge(br) && !wasroot) + br_become_root_bridge(br); +} + +static const unsigned char br_mac_zero[6]; + +/* called under bridge lock */ +void br_stp_recalculate_bridge_id(struct net_bridge *br) +{ + const unsigned char *addr = br_mac_zero; + struct net_bridge_port *p; + + list_for_each_entry(p, &br->port_list, list) { + if (addr == br_mac_zero || + memcmp(p->dev->dev_addr, addr, ETH_ALEN) < 0) + addr = p->dev->dev_addr; + + } + + if (memcmp(br->bridge_id.addr, addr, ETH_ALEN)) + br_stp_change_bridge_id(br, addr); +} + +/* called under bridge lock */ +void br_stp_set_bridge_priority(struct net_bridge *br, u16 newprio) +{ + struct net_bridge_port *p; + int wasroot; + + wasroot = br_is_root_bridge(br); + + list_for_each_entry(p, &br->port_list, list) { + if (p->state != BR_STATE_DISABLED && + br_is_designated_port(p)) { + p->designated_bridge.prio[0] = (newprio >> 8) & 0xFF; + p->designated_bridge.prio[1] = newprio & 0xFF; + } + + } + + br->bridge_id.prio[0] = (newprio >> 8) & 0xFF; + br->bridge_id.prio[1] = newprio & 0xFF; + br_configuration_update(br); + br_port_state_selection(br); + if (br_is_root_bridge(br) && !wasroot) + br_become_root_bridge(br); +} + +/* called under bridge lock */ +void br_stp_set_port_priority(struct net_bridge_port *p, u8 newprio) +{ + port_id new_port_id = br_make_port_id(newprio, p->port_no); + + if (br_is_designated_port(p)) + p->designated_port = new_port_id; + + p->port_id = new_port_id; + p->priority = newprio; + if (!memcmp(&p->br->bridge_id, &p->designated_bridge, 8) && + p->port_id < p->designated_port) { + br_become_designated_port(p); + br_port_state_selection(p->br); + } +} + +/* called under bridge lock */ +void br_stp_set_path_cost(struct net_bridge_port *p, u32 path_cost) +{ + p->path_cost = path_cost; + br_configuration_update(p->br); + br_port_state_selection(p->br); +} + +ssize_t br_show_bridge_id(char *buf, const struct bridge_id *id) +{ + return sprintf(buf, "%.2x%.2x.%.2x%.2x%.2x%.2x%.2x%.2x\n", + id->prio[0], id->prio[1], + id->addr[0], id->addr[1], id->addr[2], + id->addr[3], id->addr[4], id->addr[5]); +} diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c new file mode 100644 index 000000000000..9bef55f56425 --- /dev/null +++ b/net/bridge/br_stp_timer.c @@ -0,0 +1,188 @@ +/* + * Spanning tree protocol; timer-related code + * Linux ethernet bridge + * + * Authors: + * Lennert Buytenhek <buytenh@gnu.org> + * + * $Id: br_stp_timer.c,v 1.3 2000/05/05 02:17:17 davem Exp $ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/times.h> +#include <linux/smp_lock.h> + +#include "br_private.h" +#include "br_private_stp.h" + +/* called under bridge lock */ +static int br_is_designated_for_some_port(const struct net_bridge *br) +{ + struct net_bridge_port *p; + + list_for_each_entry(p, &br->port_list, list) { + if (p->state != BR_STATE_DISABLED && + !memcmp(&p->designated_bridge, &br->bridge_id, 8)) + return 1; + } + + return 0; +} + +static void br_hello_timer_expired(unsigned long arg) +{ + struct net_bridge *br = (struct net_bridge *)arg; + + pr_debug("%s: hello timer expired\n", br->dev->name); + spin_lock_bh(&br->lock); + if (br->dev->flags & IFF_UP) { + br_config_bpdu_generation(br); + + mod_timer(&br->hello_timer, jiffies + br->hello_time); + } + spin_unlock_bh(&br->lock); +} + +static void br_message_age_timer_expired(unsigned long arg) +{ + struct net_bridge_port *p = (struct net_bridge_port *) arg; + struct net_bridge *br = p->br; + const bridge_id *id = &p->designated_bridge; + int was_root; + + if (p->state == BR_STATE_DISABLED) + return; + + + pr_info("%s: neighbor %.2x%.2x.%.2x:%.2x:%.2x:%.2x:%.2x:%.2x lost on port %d(%s)\n", + br->dev->name, + id->prio[0], id->prio[1], + id->addr[0], id->addr[1], id->addr[2], + id->addr[3], id->addr[4], id->addr[5], + p->port_no, p->dev->name); + + /* + * According to the spec, the message age timer cannot be + * running when we are the root bridge. So.. this was_root + * check is redundant. I'm leaving it in for now, though. + */ + spin_lock_bh(&br->lock); + if (p->state == BR_STATE_DISABLED) + goto unlock; + was_root = br_is_root_bridge(br); + + br_become_designated_port(p); + br_configuration_update(br); + br_port_state_selection(br); + if (br_is_root_bridge(br) && !was_root) + br_become_root_bridge(br); + unlock: + spin_unlock_bh(&br->lock); +} + +static void br_forward_delay_timer_expired(unsigned long arg) +{ + struct net_bridge_port *p = (struct net_bridge_port *) arg; + struct net_bridge *br = p->br; + + pr_debug("%s: %d(%s) forward delay timer\n", + br->dev->name, p->port_no, p->dev->name); + spin_lock_bh(&br->lock); + if (p->state == BR_STATE_LISTENING) { + p->state = BR_STATE_LEARNING; + mod_timer(&p->forward_delay_timer, + jiffies + br->forward_delay); + } else if (p->state == BR_STATE_LEARNING) { + p->state = BR_STATE_FORWARDING; + if (br_is_designated_for_some_port(br)) + br_topology_change_detection(br); + } + br_log_state(p); + spin_unlock_bh(&br->lock); +} + +static void br_tcn_timer_expired(unsigned long arg) +{ + struct net_bridge *br = (struct net_bridge *) arg; + + pr_debug("%s: tcn timer expired\n", br->dev->name); + spin_lock_bh(&br->lock); + if (br->dev->flags & IFF_UP) { + br_transmit_tcn(br); + + mod_timer(&br->tcn_timer,jiffies + br->bridge_hello_time); + } + spin_unlock_bh(&br->lock); +} + +static void br_topology_change_timer_expired(unsigned long arg) +{ + struct net_bridge *br = (struct net_bridge *) arg; + + pr_debug("%s: topo change timer expired\n", br->dev->name); + spin_lock_bh(&br->lock); + br->topology_change_detected = 0; + br->topology_change = 0; + spin_unlock_bh(&br->lock); +} + +static void br_hold_timer_expired(unsigned long arg) +{ + struct net_bridge_port *p = (struct net_bridge_port *) arg; + + pr_debug("%s: %d(%s) hold timer expired\n", + p->br->dev->name, p->port_no, p->dev->name); + + spin_lock_bh(&p->br->lock); + if (p->config_pending) + br_transmit_config(p); + spin_unlock_bh(&p->br->lock); +} + +static inline void br_timer_init(struct timer_list *timer, + void (*_function)(unsigned long), + unsigned long _data) +{ + init_timer(timer); + timer->function = _function; + timer->data = _data; +} + +void br_stp_timer_init(struct net_bridge *br) +{ + br_timer_init(&br->hello_timer, br_hello_timer_expired, + (unsigned long) br); + + br_timer_init(&br->tcn_timer, br_tcn_timer_expired, + (unsigned long) br); + + br_timer_init(&br->topology_change_timer, + br_topology_change_timer_expired, + (unsigned long) br); + + br_timer_init(&br->gc_timer, br_fdb_cleanup, (unsigned long) br); +} + +void br_stp_port_timer_init(struct net_bridge_port *p) +{ + br_timer_init(&p->message_age_timer, br_message_age_timer_expired, + (unsigned long) p); + + br_timer_init(&p->forward_delay_timer, br_forward_delay_timer_expired, + (unsigned long) p); + + br_timer_init(&p->hold_timer, br_hold_timer_expired, + (unsigned long) p); +} + +/* Report ticks left (in USER_HZ) used for API */ +unsigned long br_timer_value(const struct timer_list *timer) +{ + return timer_pending(timer) + ? jiffies_to_clock_t(timer->expires - jiffies) : 0; +} diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c new file mode 100644 index 000000000000..98cf53c81fad --- /dev/null +++ b/net/bridge/br_sysfs_br.c @@ -0,0 +1,364 @@ +/* + * Sysfs attributes of bridge ports + * Linux ethernet bridge + * + * Authors: + * Stephen Hemminger <shemminger@osdl.org> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/if_bridge.h> +#include <linux/rtnetlink.h> +#include <linux/spinlock.h> +#include <linux/times.h> + +#include "br_private.h" + +#define to_class_dev(obj) container_of(obj,struct class_device,kobj) +#define to_net_dev(class) container_of(class, struct net_device, class_dev) +#define to_bridge(cd) ((struct net_bridge *)(to_net_dev(cd)->priv)) + +/* + * Common code for storing bridge parameters. + */ +static ssize_t store_bridge_parm(struct class_device *cd, + const char *buf, size_t len, + void (*set)(struct net_bridge *, unsigned long)) +{ + struct net_bridge *br = to_bridge(cd); + char *endp; + unsigned long val; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + val = simple_strtoul(buf, &endp, 0); + if (endp == buf) + return -EINVAL; + + spin_lock_bh(&br->lock); + (*set)(br, val); + spin_unlock_bh(&br->lock); + return len; +} + + +static ssize_t show_forward_delay(struct class_device *cd, char *buf) +{ + struct net_bridge *br = to_bridge(cd); + return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->forward_delay)); +} + +static void set_forward_delay(struct net_bridge *br, unsigned long val) +{ + unsigned long delay = clock_t_to_jiffies(val); + br->forward_delay = delay; + if (br_is_root_bridge(br)) + br->bridge_forward_delay = delay; +} + +static ssize_t store_forward_delay(struct class_device *cd, const char *buf, + size_t len) +{ + return store_bridge_parm(cd, buf, len, set_forward_delay); +} +static CLASS_DEVICE_ATTR(forward_delay, S_IRUGO | S_IWUSR, + show_forward_delay, store_forward_delay); + +static ssize_t show_hello_time(struct class_device *cd, char *buf) +{ + return sprintf(buf, "%lu\n", + jiffies_to_clock_t(to_bridge(cd)->hello_time)); +} + +static void set_hello_time(struct net_bridge *br, unsigned long val) +{ + unsigned long t = clock_t_to_jiffies(val); + br->hello_time = t; + if (br_is_root_bridge(br)) + br->bridge_hello_time = t; +} + +static ssize_t store_hello_time(struct class_device *cd, const char *buf, + size_t len) +{ + return store_bridge_parm(cd, buf, len, set_hello_time); +} + +static CLASS_DEVICE_ATTR(hello_time, S_IRUGO | S_IWUSR, show_hello_time, + store_hello_time); + +static ssize_t show_max_age(struct class_device *cd, char *buf) +{ + return sprintf(buf, "%lu\n", + jiffies_to_clock_t(to_bridge(cd)->max_age)); +} + +static void set_max_age(struct net_bridge *br, unsigned long val) +{ + unsigned long t = clock_t_to_jiffies(val); + br->max_age = t; + if (br_is_root_bridge(br)) + br->bridge_max_age = t; +} + +static ssize_t store_max_age(struct class_device *cd, const char *buf, + size_t len) +{ + return store_bridge_parm(cd, buf, len, set_max_age); +} + +static CLASS_DEVICE_ATTR(max_age, S_IRUGO | S_IWUSR, show_max_age, + store_max_age); + +static ssize_t show_ageing_time(struct class_device *cd, char *buf) +{ + struct net_bridge *br = to_bridge(cd); + return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->ageing_time)); +} + +static void set_ageing_time(struct net_bridge *br, unsigned long val) +{ + br->ageing_time = clock_t_to_jiffies(val); +} + +static ssize_t store_ageing_time(struct class_device *cd, const char *buf, + size_t len) +{ + return store_bridge_parm(cd, buf, len, set_ageing_time); +} + +static CLASS_DEVICE_ATTR(ageing_time, S_IRUGO | S_IWUSR, show_ageing_time, + store_ageing_time); +static ssize_t show_stp_state(struct class_device *cd, char *buf) +{ + struct net_bridge *br = to_bridge(cd); + return sprintf(buf, "%d\n", br->stp_enabled); +} + +static void set_stp_state(struct net_bridge *br, unsigned long val) +{ + br->stp_enabled = val; +} + +static ssize_t store_stp_state(struct class_device *cd, + const char *buf, size_t len) +{ + return store_bridge_parm(cd, buf, len, set_stp_state); +} + +static CLASS_DEVICE_ATTR(stp_state, S_IRUGO | S_IWUSR, show_stp_state, + store_stp_state); + +static ssize_t show_priority(struct class_device *cd, char *buf) +{ + struct net_bridge *br = to_bridge(cd); + return sprintf(buf, "%d\n", + (br->bridge_id.prio[0] << 8) | br->bridge_id.prio[1]); +} + +static void set_priority(struct net_bridge *br, unsigned long val) +{ + br_stp_set_bridge_priority(br, (u16) val); +} + +static ssize_t store_priority(struct class_device *cd, + const char *buf, size_t len) +{ + return store_bridge_parm(cd, buf, len, set_priority); +} +static CLASS_DEVICE_ATTR(priority, S_IRUGO | S_IWUSR, show_priority, + store_priority); + +static ssize_t show_root_id(struct class_device *cd, char *buf) +{ + return br_show_bridge_id(buf, &to_bridge(cd)->designated_root); +} +static CLASS_DEVICE_ATTR(root_id, S_IRUGO, show_root_id, NULL); + +static ssize_t show_bridge_id(struct class_device *cd, char *buf) +{ + return br_show_bridge_id(buf, &to_bridge(cd)->bridge_id); +} +static CLASS_DEVICE_ATTR(bridge_id, S_IRUGO, show_bridge_id, NULL); + +static ssize_t show_root_port(struct class_device *cd, char *buf) +{ + return sprintf(buf, "%d\n", to_bridge(cd)->root_port); +} +static CLASS_DEVICE_ATTR(root_port, S_IRUGO, show_root_port, NULL); + +static ssize_t show_root_path_cost(struct class_device *cd, char *buf) +{ + return sprintf(buf, "%d\n", to_bridge(cd)->root_path_cost); +} +static CLASS_DEVICE_ATTR(root_path_cost, S_IRUGO, show_root_path_cost, NULL); + +static ssize_t show_topology_change(struct class_device *cd, char *buf) +{ + return sprintf(buf, "%d\n", to_bridge(cd)->topology_change); +} +static CLASS_DEVICE_ATTR(topology_change, S_IRUGO, show_topology_change, NULL); + +static ssize_t show_topology_change_detected(struct class_device *cd, char *buf) +{ + struct net_bridge *br = to_bridge(cd); + return sprintf(buf, "%d\n", br->topology_change_detected); +} +static CLASS_DEVICE_ATTR(topology_change_detected, S_IRUGO, show_topology_change_detected, NULL); + +static ssize_t show_hello_timer(struct class_device *cd, char *buf) +{ + struct net_bridge *br = to_bridge(cd); + return sprintf(buf, "%ld\n", br_timer_value(&br->hello_timer)); +} +static CLASS_DEVICE_ATTR(hello_timer, S_IRUGO, show_hello_timer, NULL); + +static ssize_t show_tcn_timer(struct class_device *cd, char *buf) +{ + struct net_bridge *br = to_bridge(cd); + return sprintf(buf, "%ld\n", br_timer_value(&br->tcn_timer)); +} +static CLASS_DEVICE_ATTR(tcn_timer, S_IRUGO, show_tcn_timer, NULL); + +static ssize_t show_topology_change_timer(struct class_device *cd, char *buf) +{ + struct net_bridge *br = to_bridge(cd); + return sprintf(buf, "%ld\n", br_timer_value(&br->topology_change_timer)); +} +static CLASS_DEVICE_ATTR(topology_change_timer, S_IRUGO, show_topology_change_timer, NULL); + +static ssize_t show_gc_timer(struct class_device *cd, char *buf) +{ + struct net_bridge *br = to_bridge(cd); + return sprintf(buf, "%ld\n", br_timer_value(&br->gc_timer)); +} +static CLASS_DEVICE_ATTR(gc_timer, S_IRUGO, show_gc_timer, NULL); + +static struct attribute *bridge_attrs[] = { + &class_device_attr_forward_delay.attr, + &class_device_attr_hello_time.attr, + &class_device_attr_max_age.attr, + &class_device_attr_ageing_time.attr, + &class_device_attr_stp_state.attr, + &class_device_attr_priority.attr, + &class_device_attr_bridge_id.attr, + &class_device_attr_root_id.attr, + &class_device_attr_root_path_cost.attr, + &class_device_attr_root_port.attr, + &class_device_attr_topology_change.attr, + &class_device_attr_topology_change_detected.attr, + &class_device_attr_hello_timer.attr, + &class_device_attr_tcn_timer.attr, + &class_device_attr_topology_change_timer.attr, + &class_device_attr_gc_timer.attr, + NULL +}; + +static struct attribute_group bridge_group = { + .name = SYSFS_BRIDGE_ATTR, + .attrs = bridge_attrs, +}; + +/* + * Export the forwarding information table as a binary file + * The records are struct __fdb_entry. + * + * Returns the number of bytes read. + */ +static ssize_t brforward_read(struct kobject *kobj, char *buf, + loff_t off, size_t count) +{ + struct class_device *cdev = to_class_dev(kobj); + struct net_bridge *br = to_bridge(cdev); + int n; + + /* must read whole records */ + if (off % sizeof(struct __fdb_entry) != 0) + return -EINVAL; + + n = br_fdb_fillbuf(br, buf, + count / sizeof(struct __fdb_entry), + off / sizeof(struct __fdb_entry)); + + if (n > 0) + n *= sizeof(struct __fdb_entry); + + return n; +} + +static struct bin_attribute bridge_forward = { + .attr = { .name = SYSFS_BRIDGE_FDB, + .mode = S_IRUGO, + .owner = THIS_MODULE, }, + .read = brforward_read, +}; + +/* + * Add entries in sysfs onto the existing network class device + * for the bridge. + * Adds a attribute group "bridge" containing tuning parameters. + * Binary attribute containing the forward table + * Sub directory to hold links to interfaces. + * + * Note: the ifobj exists only to be a subdirectory + * to hold links. The ifobj exists in same data structure + * as it's parent the bridge so reference counting works. + */ +int br_sysfs_addbr(struct net_device *dev) +{ + struct kobject *brobj = &dev->class_dev.kobj; + struct net_bridge *br = netdev_priv(dev); + int err; + + err = sysfs_create_group(brobj, &bridge_group); + if (err) { + pr_info("%s: can't create group %s/%s\n", + __FUNCTION__, dev->name, bridge_group.name); + goto out1; + } + + err = sysfs_create_bin_file(brobj, &bridge_forward); + if (err) { + pr_info("%s: can't create attribue file %s/%s\n", + __FUNCTION__, dev->name, bridge_forward.attr.name); + goto out2; + } + + + kobject_set_name(&br->ifobj, SYSFS_BRIDGE_PORT_SUBDIR); + br->ifobj.ktype = NULL; + br->ifobj.kset = NULL; + br->ifobj.parent = brobj; + + err = kobject_register(&br->ifobj); + if (err) { + pr_info("%s: can't add kobject (directory) %s/%s\n", + __FUNCTION__, dev->name, br->ifobj.name); + goto out3; + } + return 0; + out3: + sysfs_remove_bin_file(&dev->class_dev.kobj, &bridge_forward); + out2: + sysfs_remove_group(&dev->class_dev.kobj, &bridge_group); + out1: + return err; + +} + +void br_sysfs_delbr(struct net_device *dev) +{ + struct kobject *kobj = &dev->class_dev.kobj; + struct net_bridge *br = netdev_priv(dev); + + kobject_unregister(&br->ifobj); + sysfs_remove_bin_file(kobj, &bridge_forward); + sysfs_remove_group(kobj, &bridge_group); +} diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c new file mode 100644 index 000000000000..567249bf9331 --- /dev/null +++ b/net/bridge/br_sysfs_if.c @@ -0,0 +1,269 @@ +/* + * Sysfs attributes of bridge ports + * Linux ethernet bridge + * + * Authors: + * Stephen Hemminger <shemminger@osdl.org> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/if_bridge.h> +#include <linux/rtnetlink.h> +#include <linux/spinlock.h> + +#include "br_private.h" + +struct brport_attribute { + struct attribute attr; + ssize_t (*show)(struct net_bridge_port *, char *); + ssize_t (*store)(struct net_bridge_port *, unsigned long); +}; + +#define BRPORT_ATTR(_name,_mode,_show,_store) \ +struct brport_attribute brport_attr_##_name = { \ + .attr = {.name = __stringify(_name), \ + .mode = _mode, \ + .owner = THIS_MODULE, }, \ + .show = _show, \ + .store = _store, \ +}; + +static ssize_t show_path_cost(struct net_bridge_port *p, char *buf) +{ + return sprintf(buf, "%d\n", p->path_cost); +} +static ssize_t store_path_cost(struct net_bridge_port *p, unsigned long v) +{ + br_stp_set_path_cost(p, v); + return 0; +} +static BRPORT_ATTR(path_cost, S_IRUGO | S_IWUSR, + show_path_cost, store_path_cost); + +static ssize_t show_priority(struct net_bridge_port *p, char *buf) +{ + return sprintf(buf, "%d\n", p->priority); +} +static ssize_t store_priority(struct net_bridge_port *p, unsigned long v) +{ + if (v >= (1<<(16-BR_PORT_BITS))) + return -ERANGE; + br_stp_set_port_priority(p, v); + return 0; +} +static BRPORT_ATTR(priority, S_IRUGO | S_IWUSR, + show_priority, store_priority); + +static ssize_t show_designated_root(struct net_bridge_port *p, char *buf) +{ + return br_show_bridge_id(buf, &p->designated_root); +} +static BRPORT_ATTR(designated_root, S_IRUGO, show_designated_root, NULL); + +static ssize_t show_designated_bridge(struct net_bridge_port *p, char *buf) +{ + return br_show_bridge_id(buf, &p->designated_bridge); +} +static BRPORT_ATTR(designated_bridge, S_IRUGO, show_designated_bridge, NULL); + +static ssize_t show_designated_port(struct net_bridge_port *p, char *buf) +{ + return sprintf(buf, "%d\n", p->designated_port); +} +static BRPORT_ATTR(designated_port, S_IRUGO, show_designated_port, NULL); + +static ssize_t show_designated_cost(struct net_bridge_port *p, char *buf) +{ + return sprintf(buf, "%d\n", p->designated_cost); +} +static BRPORT_ATTR(designated_cost, S_IRUGO, show_designated_cost, NULL); + +static ssize_t show_port_id(struct net_bridge_port *p, char *buf) +{ + return sprintf(buf, "0x%x\n", p->port_id); +} +static BRPORT_ATTR(port_id, S_IRUGO, show_port_id, NULL); + +static ssize_t show_port_no(struct net_bridge_port *p, char *buf) +{ + return sprintf(buf, "0x%x\n", p->port_no); +} + +static BRPORT_ATTR(port_no, S_IRUGO, show_port_no, NULL); + +static ssize_t show_change_ack(struct net_bridge_port *p, char *buf) +{ + return sprintf(buf, "%d\n", p->topology_change_ack); +} +static BRPORT_ATTR(change_ack, S_IRUGO, show_change_ack, NULL); + +static ssize_t show_config_pending(struct net_bridge_port *p, char *buf) +{ + return sprintf(buf, "%d\n", p->config_pending); +} +static BRPORT_ATTR(config_pending, S_IRUGO, show_config_pending, NULL); + +static ssize_t show_port_state(struct net_bridge_port *p, char *buf) +{ + return sprintf(buf, "%d\n", p->state); +} +static BRPORT_ATTR(state, S_IRUGO, show_port_state, NULL); + +static ssize_t show_message_age_timer(struct net_bridge_port *p, + char *buf) +{ + return sprintf(buf, "%ld\n", br_timer_value(&p->message_age_timer)); +} +static BRPORT_ATTR(message_age_timer, S_IRUGO, show_message_age_timer, NULL); + +static ssize_t show_forward_delay_timer(struct net_bridge_port *p, + char *buf) +{ + return sprintf(buf, "%ld\n", br_timer_value(&p->forward_delay_timer)); +} +static BRPORT_ATTR(forward_delay_timer, S_IRUGO, show_forward_delay_timer, NULL); + +static ssize_t show_hold_timer(struct net_bridge_port *p, + char *buf) +{ + return sprintf(buf, "%ld\n", br_timer_value(&p->hold_timer)); +} +static BRPORT_ATTR(hold_timer, S_IRUGO, show_hold_timer, NULL); + +static struct brport_attribute *brport_attrs[] = { + &brport_attr_path_cost, + &brport_attr_priority, + &brport_attr_port_id, + &brport_attr_port_no, + &brport_attr_designated_root, + &brport_attr_designated_bridge, + &brport_attr_designated_port, + &brport_attr_designated_cost, + &brport_attr_state, + &brport_attr_change_ack, + &brport_attr_config_pending, + &brport_attr_message_age_timer, + &brport_attr_forward_delay_timer, + &brport_attr_hold_timer, + NULL +}; + +#define to_brport_attr(_at) container_of(_at, struct brport_attribute, attr) +#define to_brport(obj) container_of(obj, struct net_bridge_port, kobj) + +static ssize_t brport_show(struct kobject * kobj, + struct attribute * attr, char * buf) +{ + struct brport_attribute * brport_attr = to_brport_attr(attr); + struct net_bridge_port * p = to_brport(kobj); + + return brport_attr->show(p, buf); +} + +static ssize_t brport_store(struct kobject * kobj, + struct attribute * attr, + const char * buf, size_t count) +{ + struct brport_attribute * brport_attr = to_brport_attr(attr); + struct net_bridge_port * p = to_brport(kobj); + ssize_t ret = -EINVAL; + char *endp; + unsigned long val; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + val = simple_strtoul(buf, &endp, 0); + if (endp != buf) { + rtnl_lock(); + if (p->dev && p->br && brport_attr->store) { + spin_lock_bh(&p->br->lock); + ret = brport_attr->store(p, val); + spin_unlock_bh(&p->br->lock); + if (ret == 0) + ret = count; + } + rtnl_unlock(); + } + return ret; +} + +/* called from kobject_put when port ref count goes to zero. */ +static void brport_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct net_bridge_port, kobj)); +} + +static struct sysfs_ops brport_sysfs_ops = { + .show = brport_show, + .store = brport_store, +}; + +static struct kobj_type brport_ktype = { + .sysfs_ops = &brport_sysfs_ops, + .release = brport_release, +}; + + +/* + * Add sysfs entries to ethernet device added to a bridge. + * Creates a brport subdirectory with bridge attributes. + * Puts symlink in bridge's brport subdirectory + */ +int br_sysfs_addif(struct net_bridge_port *p) +{ + struct net_bridge *br = p->br; + struct brport_attribute **a; + int err; + + ASSERT_RTNL(); + + kobject_set_name(&p->kobj, SYSFS_BRIDGE_PORT_ATTR); + p->kobj.ktype = &brport_ktype; + p->kobj.parent = &(p->dev->class_dev.kobj); + p->kobj.kset = NULL; + + err = kobject_add(&p->kobj); + if(err) + goto out1; + + err = sysfs_create_link(&p->kobj, &br->dev->class_dev.kobj, + SYSFS_BRIDGE_PORT_LINK); + if (err) + goto out2; + + for (a = brport_attrs; *a; ++a) { + err = sysfs_create_file(&p->kobj, &((*a)->attr)); + if (err) + goto out2; + } + + err = sysfs_create_link(&br->ifobj, &p->kobj, p->dev->name); + if (err) + goto out2; + + return 0; + out2: + kobject_del(&p->kobj); + out1: + return err; +} + +void br_sysfs_removeif(struct net_bridge_port *p) +{ + pr_debug("br_sysfs_removeif\n"); + sysfs_remove_link(&p->br->ifobj, p->dev->name); + kobject_del(&p->kobj); +} + +void br_sysfs_freeif(struct net_bridge_port *p) +{ + pr_debug("br_sysfs_freeif\n"); + kobject_put(&p->kobj); +} diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig new file mode 100644 index 000000000000..68ccef507b49 --- /dev/null +++ b/net/bridge/netfilter/Kconfig @@ -0,0 +1,211 @@ +# +# Bridge netfilter configuration +# + +menu "Bridge: Netfilter Configuration" + depends on BRIDGE && NETFILTER + +config BRIDGE_NF_EBTABLES + tristate "Ethernet Bridge tables (ebtables) support" + help + ebtables is a general, extensible frame/packet identification + framework. Say 'Y' or 'M' here if you want to do Ethernet + filtering/NAT/brouting on the Ethernet bridge. +# +# tables +# +config BRIDGE_EBT_BROUTE + tristate "ebt: broute table support" + depends on BRIDGE_NF_EBTABLES + help + The ebtables broute table is used to define rules that decide between + bridging and routing frames, giving Linux the functionality of a + brouter. See the man page for ebtables(8) and examples on the ebtables + website. + + To compile it as a module, choose M here. If unsure, say N. + +config BRIDGE_EBT_T_FILTER + tristate "ebt: filter table support" + depends on BRIDGE_NF_EBTABLES + help + The ebtables filter table is used to define frame filtering rules at + local input, forwarding and local output. See the man page for + ebtables(8). + + To compile it as a module, choose M here. If unsure, say N. + +config BRIDGE_EBT_T_NAT + tristate "ebt: nat table support" + depends on BRIDGE_NF_EBTABLES + help + The ebtables nat table is used to define rules that alter the MAC + source address (MAC SNAT) or the MAC destination address (MAC DNAT). + See the man page for ebtables(8). + + To compile it as a module, choose M here. If unsure, say N. +# +# matches +# +config BRIDGE_EBT_802_3 + tristate "ebt: 802.3 filter support" + depends on BRIDGE_NF_EBTABLES + help + This option adds matching support for 802.3 Ethernet frames. + + To compile it as a module, choose M here. If unsure, say N. + +config BRIDGE_EBT_AMONG + tristate "ebt: among filter support" + depends on BRIDGE_NF_EBTABLES + help + This option adds the among match, which allows matching the MAC source + and/or destination address on a list of addresses. Optionally, + MAC/IP address pairs can be matched, f.e. for anti-spoofing rules. + + To compile it as a module, choose M here. If unsure, say N. + +config BRIDGE_EBT_ARP + tristate "ebt: ARP filter support" + depends on BRIDGE_NF_EBTABLES + help + This option adds the ARP match, which allows ARP and RARP header field + filtering. + + To compile it as a module, choose M here. If unsure, say N. + +config BRIDGE_EBT_IP + tristate "ebt: IP filter support" + depends on BRIDGE_NF_EBTABLES + help + This option adds the IP match, which allows basic IP header field + filtering. + + To compile it as a module, choose M here. If unsure, say N. + +config BRIDGE_EBT_LIMIT + tristate "ebt: limit match support" + depends on BRIDGE_NF_EBTABLES + help + This option adds the limit match, which allows you to control + the rate at which a rule can be matched. This match is the + equivalent of the iptables limit match. + + If you want to compile it as a module, say M here and read + <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. + +config BRIDGE_EBT_MARK + tristate "ebt: mark filter support" + depends on BRIDGE_NF_EBTABLES + help + This option adds the mark match, which allows matching frames based on + the 'nfmark' value in the frame. This can be set by the mark target. + This value is the same as the one used in the iptables mark match and + target. + + To compile it as a module, choose M here. If unsure, say N. + +config BRIDGE_EBT_PKTTYPE + tristate "ebt: packet type filter support" + depends on BRIDGE_NF_EBTABLES + help + This option adds the packet type match, which allows matching on the + type of packet based on its Ethernet "class" (as determined by + the generic networking code): broadcast, multicast, + for this host alone or for another host. + + To compile it as a module, choose M here. If unsure, say N. + +config BRIDGE_EBT_STP + tristate "ebt: STP filter support" + depends on BRIDGE_NF_EBTABLES + help + This option adds the Spanning Tree Protocol match, which + allows STP header field filtering. + + To compile it as a module, choose M here. If unsure, say N. + +config BRIDGE_EBT_VLAN + tristate "ebt: 802.1Q VLAN filter support" + depends on BRIDGE_NF_EBTABLES + help + This option adds the 802.1Q vlan match, which allows the filtering of + 802.1Q vlan fields. + + To compile it as a module, choose M here. If unsure, say N. +# +# targets +# +config BRIDGE_EBT_ARPREPLY + tristate "ebt: arp reply target support" + depends on BRIDGE_NF_EBTABLES + help + This option adds the arp reply target, which allows + automatically sending arp replies to arp requests. + + To compile it as a module, choose M here. If unsure, say N. + +config BRIDGE_EBT_DNAT + tristate "ebt: dnat target support" + depends on BRIDGE_NF_EBTABLES + help + This option adds the MAC DNAT target, which allows altering the MAC + destination address of frames. + + To compile it as a module, choose M here. If unsure, say N. + +config BRIDGE_EBT_MARK_T + tristate "ebt: mark target support" + depends on BRIDGE_NF_EBTABLES + help + This option adds the mark target, which allows marking frames by + setting the 'nfmark' value in the frame. + This value is the same as the one used in the iptables mark match and + target. + + To compile it as a module, choose M here. If unsure, say N. + +config BRIDGE_EBT_REDIRECT + tristate "ebt: redirect target support" + depends on BRIDGE_NF_EBTABLES + help + This option adds the MAC redirect target, which allows altering the MAC + destination address of a frame to that of the device it arrived on. + + To compile it as a module, choose M here. If unsure, say N. + +config BRIDGE_EBT_SNAT + tristate "ebt: snat target support" + depends on BRIDGE_NF_EBTABLES + help + This option adds the MAC SNAT target, which allows altering the MAC + source address of frames. + + To compile it as a module, choose M here. If unsure, say N. +# +# watchers +# +config BRIDGE_EBT_LOG + tristate "ebt: log support" + depends on BRIDGE_NF_EBTABLES + help + This option adds the log watcher, that you can use in any rule + in any ebtables table. It records info about the frame header + to the syslog. + + To compile it as a module, choose M here. If unsure, say N. + +config BRIDGE_EBT_ULOG + tristate "ebt: ulog support" + depends on BRIDGE_NF_EBTABLES + help + This option adds the ulog watcher, that you can use in any rule + in any ebtables table. The packet is passed to a userspace + logging daemon using netlink multicast sockets. This differs + from the log watcher in the sense that the complete packet is + sent to userspace instead of a descriptive text and that + netlink multicast sockets are used instead of the syslog. + + To compile it as a module, choose M here. If unsure, say N. + +endmenu diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile new file mode 100644 index 000000000000..8bf6d9f6e9d3 --- /dev/null +++ b/net/bridge/netfilter/Makefile @@ -0,0 +1,32 @@ +# +# Makefile for the netfilter modules for Link Layer filtering on a bridge. +# + +obj-$(CONFIG_BRIDGE_NF_EBTABLES) += ebtables.o + +# tables +obj-$(CONFIG_BRIDGE_EBT_BROUTE) += ebtable_broute.o +obj-$(CONFIG_BRIDGE_EBT_T_FILTER) += ebtable_filter.o +obj-$(CONFIG_BRIDGE_EBT_T_NAT) += ebtable_nat.o + +#matches +obj-$(CONFIG_BRIDGE_EBT_802_3) += ebt_802_3.o +obj-$(CONFIG_BRIDGE_EBT_AMONG) += ebt_among.o +obj-$(CONFIG_BRIDGE_EBT_ARP) += ebt_arp.o +obj-$(CONFIG_BRIDGE_EBT_IP) += ebt_ip.o +obj-$(CONFIG_BRIDGE_EBT_LIMIT) += ebt_limit.o +obj-$(CONFIG_BRIDGE_EBT_MARK) += ebt_mark_m.o +obj-$(CONFIG_BRIDGE_EBT_PKTTYPE) += ebt_pkttype.o +obj-$(CONFIG_BRIDGE_EBT_STP) += ebt_stp.o +obj-$(CONFIG_BRIDGE_EBT_VLAN) += ebt_vlan.o + +# targets +obj-$(CONFIG_BRIDGE_EBT_ARPREPLY) += ebt_arpreply.o +obj-$(CONFIG_BRIDGE_EBT_MARK_T) += ebt_mark.o +obj-$(CONFIG_BRIDGE_EBT_DNAT) += ebt_dnat.o +obj-$(CONFIG_BRIDGE_EBT_REDIRECT) += ebt_redirect.o +obj-$(CONFIG_BRIDGE_EBT_SNAT) += ebt_snat.o + +# watchers +obj-$(CONFIG_BRIDGE_EBT_LOG) += ebt_log.o +obj-$(CONFIG_BRIDGE_EBT_LOG) += ebt_ulog.o diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c new file mode 100644 index 000000000000..468ebdf4bc1c --- /dev/null +++ b/net/bridge/netfilter/ebt_802_3.c @@ -0,0 +1,73 @@ +/* + * 802_3 + * + * Author: + * Chris Vitale csv@bluetail.com + * + * May 2003 + * + */ + +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_802_3.h> +#include <linux/module.h> + +static int ebt_filter_802_3(const struct sk_buff *skb, const struct net_device *in, + const struct net_device *out, const void *data, unsigned int datalen) +{ + struct ebt_802_3_info *info = (struct ebt_802_3_info *)data; + struct ebt_802_3_hdr *hdr = ebt_802_3_hdr(skb); + uint16_t type = hdr->llc.ui.ctrl & IS_UI ? hdr->llc.ui.type : hdr->llc.ni.type; + + if (info->bitmask & EBT_802_3_SAP) { + if (FWINV(info->sap != hdr->llc.ui.ssap, EBT_802_3_SAP)) + return EBT_NOMATCH; + if (FWINV(info->sap != hdr->llc.ui.dsap, EBT_802_3_SAP)) + return EBT_NOMATCH; + } + + if (info->bitmask & EBT_802_3_TYPE) { + if (!(hdr->llc.ui.dsap == CHECK_TYPE && hdr->llc.ui.ssap == CHECK_TYPE)) + return EBT_NOMATCH; + if (FWINV(info->type != type, EBT_802_3_TYPE)) + return EBT_NOMATCH; + } + + return EBT_MATCH; +} + +static struct ebt_match filter_802_3; +static int ebt_802_3_check(const char *tablename, unsigned int hookmask, + const struct ebt_entry *e, void *data, unsigned int datalen) +{ + struct ebt_802_3_info *info = (struct ebt_802_3_info *)data; + + if (datalen < sizeof(struct ebt_802_3_info)) + return -EINVAL; + if (info->bitmask & ~EBT_802_3_MASK || info->invflags & ~EBT_802_3_MASK) + return -EINVAL; + + return 0; +} + +static struct ebt_match filter_802_3 = +{ + .name = EBT_802_3_MATCH, + .match = ebt_filter_802_3, + .check = ebt_802_3_check, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + return ebt_register_match(&filter_802_3); +} + +static void __exit fini(void) +{ + ebt_unregister_match(&filter_802_3); +} + +module_init(init); +module_exit(fini); +MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c new file mode 100644 index 000000000000..5a1f5e3bff15 --- /dev/null +++ b/net/bridge/netfilter/ebt_among.c @@ -0,0 +1,228 @@ +/* + * ebt_among + * + * Authors: + * Grzegorz Borowiak <grzes@gnu.univ.gda.pl> + * + * August, 2003 + * + */ + +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_among.h> +#include <linux/ip.h> +#include <linux/if_arp.h> +#include <linux/module.h> + +static int ebt_mac_wormhash_contains(const struct ebt_mac_wormhash *wh, + const char *mac, uint32_t ip) +{ + /* You may be puzzled as to how this code works. + * Some tricks were used, refer to + * include/linux/netfilter_bridge/ebt_among.h + * as there you can find a solution of this mystery. + */ + const struct ebt_mac_wormhash_tuple *p; + int start, limit, i; + uint32_t cmp[2] = { 0, 0 }; + int key = (const unsigned char) mac[5]; + + memcpy(((char *) cmp) + 2, mac, 6); + start = wh->table[key]; + limit = wh->table[key + 1]; + if (ip) { + for (i = start; i < limit; i++) { + p = &wh->pool[i]; + if (cmp[1] == p->cmp[1] && cmp[0] == p->cmp[0]) { + if (p->ip == 0 || p->ip == ip) { + return 1; + } + } + } + } else { + for (i = start; i < limit; i++) { + p = &wh->pool[i]; + if (cmp[1] == p->cmp[1] && cmp[0] == p->cmp[0]) { + if (p->ip == 0) { + return 1; + } + } + } + } + return 0; +} + +static int ebt_mac_wormhash_check_integrity(const struct ebt_mac_wormhash + *wh) +{ + int i; + + for (i = 0; i < 256; i++) { + if (wh->table[i] > wh->table[i + 1]) + return -0x100 - i; + if (wh->table[i] < 0) + return -0x200 - i; + if (wh->table[i] > wh->poolsize) + return -0x300 - i; + } + if (wh->table[256] > wh->poolsize) + return -0xc00; + return 0; +} + +static int get_ip_dst(const struct sk_buff *skb, uint32_t *addr) +{ + if (eth_hdr(skb)->h_proto == htons(ETH_P_IP)) { + struct iphdr _iph, *ih; + + ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); + if (ih == NULL) + return -1; + *addr = ih->daddr; + } else if (eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) { + struct arphdr _arph, *ah; + uint32_t buf, *bp; + + ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); + if (ah == NULL || + ah->ar_pln != sizeof(uint32_t) || + ah->ar_hln != ETH_ALEN) + return -1; + bp = skb_header_pointer(skb, sizeof(struct arphdr) + + 2 * ETH_ALEN + sizeof(uint32_t), + sizeof(uint32_t), &buf); + if (bp == NULL) + return -1; + *addr = *bp; + } + return 0; +} + +static int get_ip_src(const struct sk_buff *skb, uint32_t *addr) +{ + if (eth_hdr(skb)->h_proto == htons(ETH_P_IP)) { + struct iphdr _iph, *ih; + + ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); + if (ih == NULL) + return -1; + *addr = ih->saddr; + } else if (eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) { + struct arphdr _arph, *ah; + uint32_t buf, *bp; + + ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); + if (ah == NULL || + ah->ar_pln != sizeof(uint32_t) || + ah->ar_hln != ETH_ALEN) + return -1; + bp = skb_header_pointer(skb, sizeof(struct arphdr) + + ETH_ALEN, sizeof(uint32_t), &buf); + if (bp == NULL) + return -1; + *addr = *bp; + } + return 0; +} + +static int ebt_filter_among(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, const void *data, + unsigned int datalen) +{ + struct ebt_among_info *info = (struct ebt_among_info *) data; + const char *dmac, *smac; + const struct ebt_mac_wormhash *wh_dst, *wh_src; + uint32_t dip = 0, sip = 0; + + wh_dst = ebt_among_wh_dst(info); + wh_src = ebt_among_wh_src(info); + + if (wh_src) { + smac = eth_hdr(skb)->h_source; + if (get_ip_src(skb, &sip)) + return EBT_NOMATCH; + if (!(info->bitmask & EBT_AMONG_SRC_NEG)) { + /* we match only if it contains */ + if (!ebt_mac_wormhash_contains(wh_src, smac, sip)) + return EBT_NOMATCH; + } else { + /* we match only if it DOES NOT contain */ + if (ebt_mac_wormhash_contains(wh_src, smac, sip)) + return EBT_NOMATCH; + } + } + + if (wh_dst) { + dmac = eth_hdr(skb)->h_dest; + if (get_ip_dst(skb, &dip)) + return EBT_NOMATCH; + if (!(info->bitmask & EBT_AMONG_DST_NEG)) { + /* we match only if it contains */ + if (!ebt_mac_wormhash_contains(wh_dst, dmac, dip)) + return EBT_NOMATCH; + } else { + /* we match only if it DOES NOT contain */ + if (ebt_mac_wormhash_contains(wh_dst, dmac, dip)) + return EBT_NOMATCH; + } + } + + return EBT_MATCH; +} + +static int ebt_among_check(const char *tablename, unsigned int hookmask, + const struct ebt_entry *e, void *data, + unsigned int datalen) +{ + struct ebt_among_info *info = (struct ebt_among_info *) data; + int expected_length = sizeof(struct ebt_among_info); + const struct ebt_mac_wormhash *wh_dst, *wh_src; + int err; + + wh_dst = ebt_among_wh_dst(info); + wh_src = ebt_among_wh_src(info); + expected_length += ebt_mac_wormhash_size(wh_dst); + expected_length += ebt_mac_wormhash_size(wh_src); + + if (datalen != EBT_ALIGN(expected_length)) { + printk(KERN_WARNING + "ebtables: among: wrong size: %d" + "against expected %d, rounded to %Zd\n", + datalen, expected_length, + EBT_ALIGN(expected_length)); + return -EINVAL; + } + if (wh_dst && (err = ebt_mac_wormhash_check_integrity(wh_dst))) { + printk(KERN_WARNING + "ebtables: among: dst integrity fail: %x\n", -err); + return -EINVAL; + } + if (wh_src && (err = ebt_mac_wormhash_check_integrity(wh_src))) { + printk(KERN_WARNING + "ebtables: among: src integrity fail: %x\n", -err); + return -EINVAL; + } + return 0; +} + +static struct ebt_match filter_among = { + .name = EBT_AMONG_MATCH, + .match = ebt_filter_among, + .check = ebt_among_check, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + return ebt_register_match(&filter_among); +} + +static void __exit fini(void) +{ + ebt_unregister_match(&filter_among); +} + +module_init(init); +module_exit(fini); +MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c new file mode 100644 index 000000000000..b94c48cb6e4b --- /dev/null +++ b/net/bridge/netfilter/ebt_arp.c @@ -0,0 +1,140 @@ +/* + * ebt_arp + * + * Authors: + * Bart De Schuymer <bdschuym@pandora.be> + * Tim Gardner <timg@tpi.com> + * + * April, 2002 + * + */ + +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_arp.h> +#include <linux/if_arp.h> +#include <linux/if_ether.h> +#include <linux/module.h> + +static int ebt_filter_arp(const struct sk_buff *skb, const struct net_device *in, + const struct net_device *out, const void *data, unsigned int datalen) +{ + struct ebt_arp_info *info = (struct ebt_arp_info *)data; + struct arphdr _arph, *ah; + + ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); + if (ah == NULL) + return EBT_NOMATCH; + if (info->bitmask & EBT_ARP_OPCODE && FWINV(info->opcode != + ah->ar_op, EBT_ARP_OPCODE)) + return EBT_NOMATCH; + if (info->bitmask & EBT_ARP_HTYPE && FWINV(info->htype != + ah->ar_hrd, EBT_ARP_HTYPE)) + return EBT_NOMATCH; + if (info->bitmask & EBT_ARP_PTYPE && FWINV(info->ptype != + ah->ar_pro, EBT_ARP_PTYPE)) + return EBT_NOMATCH; + + if (info->bitmask & (EBT_ARP_SRC_IP | EBT_ARP_DST_IP)) { + uint32_t _addr, *ap; + + /* IPv4 addresses are always 4 bytes */ + if (ah->ar_pln != sizeof(uint32_t)) + return EBT_NOMATCH; + if (info->bitmask & EBT_ARP_SRC_IP) { + ap = skb_header_pointer(skb, sizeof(struct arphdr) + + ah->ar_hln, sizeof(_addr), + &_addr); + if (ap == NULL) + return EBT_NOMATCH; + if (FWINV(info->saddr != (*ap & info->smsk), + EBT_ARP_SRC_IP)) + return EBT_NOMATCH; + } + + if (info->bitmask & EBT_ARP_DST_IP) { + ap = skb_header_pointer(skb, sizeof(struct arphdr) + + 2*ah->ar_hln+sizeof(uint32_t), + sizeof(_addr), &_addr); + if (ap == NULL) + return EBT_NOMATCH; + if (FWINV(info->daddr != (*ap & info->dmsk), + EBT_ARP_DST_IP)) + return EBT_NOMATCH; + } + } + + if (info->bitmask & (EBT_ARP_SRC_MAC | EBT_ARP_DST_MAC)) { + unsigned char _mac[ETH_ALEN], *mp; + uint8_t verdict, i; + + /* MAC addresses are 6 bytes */ + if (ah->ar_hln != ETH_ALEN) + return EBT_NOMATCH; + if (info->bitmask & EBT_ARP_SRC_MAC) { + mp = skb_header_pointer(skb, sizeof(struct arphdr), + sizeof(_mac), &_mac); + if (mp == NULL) + return EBT_NOMATCH; + verdict = 0; + for (i = 0; i < 6; i++) + verdict |= (mp[i] ^ info->smaddr[i]) & + info->smmsk[i]; + if (FWINV(verdict != 0, EBT_ARP_SRC_MAC)) + return EBT_NOMATCH; + } + + if (info->bitmask & EBT_ARP_DST_MAC) { + mp = skb_header_pointer(skb, sizeof(struct arphdr) + + ah->ar_hln + ah->ar_pln, + sizeof(_mac), &_mac); + if (mp == NULL) + return EBT_NOMATCH; + verdict = 0; + for (i = 0; i < 6; i++) + verdict |= (mp[i] ^ info->dmaddr[i]) & + info->dmmsk[i]; + if (FWINV(verdict != 0, EBT_ARP_DST_MAC)) + return EBT_NOMATCH; + } + } + + return EBT_MATCH; +} + +static int ebt_arp_check(const char *tablename, unsigned int hookmask, + const struct ebt_entry *e, void *data, unsigned int datalen) +{ + struct ebt_arp_info *info = (struct ebt_arp_info *)data; + + if (datalen != EBT_ALIGN(sizeof(struct ebt_arp_info))) + return -EINVAL; + if ((e->ethproto != htons(ETH_P_ARP) && + e->ethproto != htons(ETH_P_RARP)) || + e->invflags & EBT_IPROTO) + return -EINVAL; + if (info->bitmask & ~EBT_ARP_MASK || info->invflags & ~EBT_ARP_MASK) + return -EINVAL; + return 0; +} + +static struct ebt_match filter_arp = +{ + .name = EBT_ARP_MATCH, + .match = ebt_filter_arp, + .check = ebt_arp_check, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + return ebt_register_match(&filter_arp); +} + +static void __exit fini(void) +{ + ebt_unregister_match(&filter_arp); +} + +module_init(init); +module_exit(fini); +MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c new file mode 100644 index 000000000000..b934de90f7c5 --- /dev/null +++ b/net/bridge/netfilter/ebt_arpreply.c @@ -0,0 +1,97 @@ +/* + * ebt_arpreply + * + * Authors: + * Grzegorz Borowiak <grzes@gnu.univ.gda.pl> + * Bart De Schuymer <bdschuym@pandora.be> + * + * August, 2003 + * + */ + +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_arpreply.h> +#include <linux/if_arp.h> +#include <net/arp.h> +#include <linux/module.h> + +static int ebt_target_reply(struct sk_buff **pskb, unsigned int hooknr, + const struct net_device *in, const struct net_device *out, + const void *data, unsigned int datalen) +{ + struct ebt_arpreply_info *info = (struct ebt_arpreply_info *)data; + u32 _sip, *siptr, _dip, *diptr; + struct arphdr _ah, *ap; + unsigned char _sha[ETH_ALEN], *shp; + struct sk_buff *skb = *pskb; + + ap = skb_header_pointer(skb, 0, sizeof(_ah), &_ah); + if (ap == NULL) + return EBT_DROP; + + if (ap->ar_op != htons(ARPOP_REQUEST) || + ap->ar_hln != ETH_ALEN || + ap->ar_pro != htons(ETH_P_IP) || + ap->ar_pln != 4) + return EBT_CONTINUE; + + shp = skb_header_pointer(skb, sizeof(_ah), ETH_ALEN, &_sha); + if (shp == NULL) + return EBT_DROP; + + siptr = skb_header_pointer(skb, sizeof(_ah) + ETH_ALEN, + sizeof(_sip), &_sip); + if (siptr == NULL) + return EBT_DROP; + + diptr = skb_header_pointer(skb, + sizeof(_ah) + 2 * ETH_ALEN + sizeof(_sip), + sizeof(_dip), &_dip); + if (diptr == NULL) + return EBT_DROP; + + arp_send(ARPOP_REPLY, ETH_P_ARP, *siptr, (struct net_device *)in, + *diptr, shp, info->mac, shp); + + return info->target; +} + +static int ebt_target_reply_check(const char *tablename, unsigned int hookmask, + const struct ebt_entry *e, void *data, unsigned int datalen) +{ + struct ebt_arpreply_info *info = (struct ebt_arpreply_info *)data; + + if (datalen != EBT_ALIGN(sizeof(struct ebt_arpreply_info))) + return -EINVAL; + if (BASE_CHAIN && info->target == EBT_RETURN) + return -EINVAL; + if (e->ethproto != htons(ETH_P_ARP) || + e->invflags & EBT_IPROTO) + return -EINVAL; + CLEAR_BASE_CHAIN_BIT; + if (strcmp(tablename, "nat") || hookmask & ~(1 << NF_BR_PRE_ROUTING)) + return -EINVAL; + return 0; +} + +static struct ebt_target reply_target = +{ + .name = EBT_ARPREPLY_TARGET, + .target = ebt_target_reply, + .check = ebt_target_reply_check, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + return ebt_register_target(&reply_target); +} + +static void __exit fini(void) +{ + ebt_unregister_target(&reply_target); +} + +module_init(init); +module_exit(fini); +MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c new file mode 100644 index 000000000000..f5463086c7bd --- /dev/null +++ b/net/bridge/netfilter/ebt_dnat.c @@ -0,0 +1,76 @@ +/* + * ebt_dnat + * + * Authors: + * Bart De Schuymer <bdschuym@pandora.be> + * + * June, 2002 + * + */ + +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_nat.h> +#include <linux/module.h> +#include <net/sock.h> + +static int ebt_target_dnat(struct sk_buff **pskb, unsigned int hooknr, + const struct net_device *in, const struct net_device *out, + const void *data, unsigned int datalen) +{ + struct ebt_nat_info *info = (struct ebt_nat_info *)data; + + if (skb_shared(*pskb) || skb_cloned(*pskb)) { + struct sk_buff *nskb; + + nskb = skb_copy(*pskb, GFP_ATOMIC); + if (!nskb) + return NF_DROP; + if ((*pskb)->sk) + skb_set_owner_w(nskb, (*pskb)->sk); + kfree_skb(*pskb); + *pskb = nskb; + } + memcpy(eth_hdr(*pskb)->h_dest, info->mac, ETH_ALEN); + return info->target; +} + +static int ebt_target_dnat_check(const char *tablename, unsigned int hookmask, + const struct ebt_entry *e, void *data, unsigned int datalen) +{ + struct ebt_nat_info *info = (struct ebt_nat_info *)data; + + if (BASE_CHAIN && info->target == EBT_RETURN) + return -EINVAL; + CLEAR_BASE_CHAIN_BIT; + if ( (strcmp(tablename, "nat") || + (hookmask & ~((1 << NF_BR_PRE_ROUTING) | (1 << NF_BR_LOCAL_OUT)))) && + (strcmp(tablename, "broute") || hookmask & ~(1 << NF_BR_BROUTING)) ) + return -EINVAL; + if (datalen != EBT_ALIGN(sizeof(struct ebt_nat_info))) + return -EINVAL; + if (INVALID_TARGET) + return -EINVAL; + return 0; +} + +static struct ebt_target dnat = +{ + .name = EBT_DNAT_TARGET, + .target = ebt_target_dnat, + .check = ebt_target_dnat_check, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + return ebt_register_target(&dnat); +} + +static void __exit fini(void) +{ + ebt_unregister_target(&dnat); +} + +module_init(init); +module_exit(fini); +MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c new file mode 100644 index 000000000000..7323805b9726 --- /dev/null +++ b/net/bridge/netfilter/ebt_ip.c @@ -0,0 +1,122 @@ +/* + * ebt_ip + * + * Authors: + * Bart De Schuymer <bdschuym@pandora.be> + * + * April, 2002 + * + * Changes: + * added ip-sport and ip-dport + * Innominate Security Technologies AG <mhopf@innominate.com> + * September, 2002 + */ + +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_ip.h> +#include <linux/ip.h> +#include <linux/in.h> +#include <linux/module.h> + +struct tcpudphdr { + uint16_t src; + uint16_t dst; +}; + +static int ebt_filter_ip(const struct sk_buff *skb, const struct net_device *in, + const struct net_device *out, const void *data, + unsigned int datalen) +{ + struct ebt_ip_info *info = (struct ebt_ip_info *)data; + struct iphdr _iph, *ih; + struct tcpudphdr _ports, *pptr; + + ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); + if (ih == NULL) + return EBT_NOMATCH; + if (info->bitmask & EBT_IP_TOS && + FWINV(info->tos != ih->tos, EBT_IP_TOS)) + return EBT_NOMATCH; + if (info->bitmask & EBT_IP_SOURCE && + FWINV((ih->saddr & info->smsk) != + info->saddr, EBT_IP_SOURCE)) + return EBT_NOMATCH; + if ((info->bitmask & EBT_IP_DEST) && + FWINV((ih->daddr & info->dmsk) != + info->daddr, EBT_IP_DEST)) + return EBT_NOMATCH; + if (info->bitmask & EBT_IP_PROTO) { + if (FWINV(info->protocol != ih->protocol, EBT_IP_PROTO)) + return EBT_NOMATCH; + if (!(info->bitmask & EBT_IP_DPORT) && + !(info->bitmask & EBT_IP_SPORT)) + return EBT_MATCH; + pptr = skb_header_pointer(skb, ih->ihl*4, + sizeof(_ports), &_ports); + if (pptr == NULL) + return EBT_NOMATCH; + if (info->bitmask & EBT_IP_DPORT) { + u32 dst = ntohs(pptr->dst); + if (FWINV(dst < info->dport[0] || + dst > info->dport[1], + EBT_IP_DPORT)) + return EBT_NOMATCH; + } + if (info->bitmask & EBT_IP_SPORT) { + u32 src = ntohs(pptr->src); + if (FWINV(src < info->sport[0] || + src > info->sport[1], + EBT_IP_SPORT)) + return EBT_NOMATCH; + } + } + return EBT_MATCH; +} + +static int ebt_ip_check(const char *tablename, unsigned int hookmask, + const struct ebt_entry *e, void *data, unsigned int datalen) +{ + struct ebt_ip_info *info = (struct ebt_ip_info *)data; + + if (datalen != EBT_ALIGN(sizeof(struct ebt_ip_info))) + return -EINVAL; + if (e->ethproto != htons(ETH_P_IP) || + e->invflags & EBT_IPROTO) + return -EINVAL; + if (info->bitmask & ~EBT_IP_MASK || info->invflags & ~EBT_IP_MASK) + return -EINVAL; + if (info->bitmask & (EBT_IP_DPORT | EBT_IP_SPORT)) { + if (info->invflags & EBT_IP_PROTO) + return -EINVAL; + if (info->protocol != IPPROTO_TCP && + info->protocol != IPPROTO_UDP) + return -EINVAL; + } + if (info->bitmask & EBT_IP_DPORT && info->dport[0] > info->dport[1]) + return -EINVAL; + if (info->bitmask & EBT_IP_SPORT && info->sport[0] > info->sport[1]) + return -EINVAL; + return 0; +} + +static struct ebt_match filter_ip = +{ + .name = EBT_IP_MATCH, + .match = ebt_filter_ip, + .check = ebt_ip_check, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + return ebt_register_match(&filter_ip); +} + +static void __exit fini(void) +{ + ebt_unregister_match(&filter_ip); +} + +module_init(init); +module_exit(fini); +MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c new file mode 100644 index 000000000000..637c8844cd5f --- /dev/null +++ b/net/bridge/netfilter/ebt_limit.c @@ -0,0 +1,113 @@ +/* + * ebt_limit + * + * Authors: + * Tom Marshall <tommy@home.tig-grr.com> + * + * Mostly copied from netfilter's ipt_limit.c, see that file for + * more explanation + * + * September, 2003 + * + */ + +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_limit.h> +#include <linux/module.h> + +#include <linux/netdevice.h> +#include <linux/spinlock.h> + +static DEFINE_SPINLOCK(limit_lock); + +#define MAX_CPJ (0xFFFFFFFF / (HZ*60*60*24)) + +#define _POW2_BELOW2(x) ((x)|((x)>>1)) +#define _POW2_BELOW4(x) (_POW2_BELOW2(x)|_POW2_BELOW2((x)>>2)) +#define _POW2_BELOW8(x) (_POW2_BELOW4(x)|_POW2_BELOW4((x)>>4)) +#define _POW2_BELOW16(x) (_POW2_BELOW8(x)|_POW2_BELOW8((x)>>8)) +#define _POW2_BELOW32(x) (_POW2_BELOW16(x)|_POW2_BELOW16((x)>>16)) +#define POW2_BELOW32(x) ((_POW2_BELOW32(x)>>1) + 1) + +#define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ) + +static int ebt_limit_match(const struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + const void *data, unsigned int datalen) +{ + struct ebt_limit_info *info = (struct ebt_limit_info *)data; + unsigned long now = jiffies; + + spin_lock_bh(&limit_lock); + info->credit += (now - xchg(&info->prev, now)) * CREDITS_PER_JIFFY; + if (info->credit > info->credit_cap) + info->credit = info->credit_cap; + + if (info->credit >= info->cost) { + /* We're not limited. */ + info->credit -= info->cost; + spin_unlock_bh(&limit_lock); + return EBT_MATCH; + } + + spin_unlock_bh(&limit_lock); + return EBT_NOMATCH; +} + +/* Precision saver. */ +static u_int32_t +user2credits(u_int32_t user) +{ + /* If multiplying would overflow... */ + if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY)) + /* Divide first. */ + return (user / EBT_LIMIT_SCALE) * HZ * CREDITS_PER_JIFFY; + + return (user * HZ * CREDITS_PER_JIFFY) / EBT_LIMIT_SCALE; +} + +static int ebt_limit_check(const char *tablename, unsigned int hookmask, + const struct ebt_entry *e, void *data, unsigned int datalen) +{ + struct ebt_limit_info *info = (struct ebt_limit_info *)data; + + if (datalen != EBT_ALIGN(sizeof(struct ebt_limit_info))) + return -EINVAL; + + /* Check for overflow. */ + if (info->burst == 0 || + user2credits(info->avg * info->burst) < user2credits(info->avg)) { + printk("Overflow in ebt_limit, try lower: %u/%u\n", + info->avg, info->burst); + return -EINVAL; + } + + /* User avg in seconds * EBT_LIMIT_SCALE: convert to jiffies * 128. */ + info->prev = jiffies; + info->credit = user2credits(info->avg * info->burst); + info->credit_cap = user2credits(info->avg * info->burst); + info->cost = user2credits(info->avg); + return 0; +} + +static struct ebt_match ebt_limit_reg = +{ + .name = EBT_LIMIT_MATCH, + .match = ebt_limit_match, + .check = ebt_limit_check, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + return ebt_register_match(&ebt_limit_reg); +} + +static void __exit fini(void) +{ + ebt_unregister_match(&ebt_limit_reg); +} + +module_init(init); +module_exit(fini); +MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c new file mode 100644 index 000000000000..e4ae34b88925 --- /dev/null +++ b/net/bridge/netfilter/ebt_log.c @@ -0,0 +1,171 @@ +/* + * ebt_log + * + * Authors: + * Bart De Schuymer <bdschuym@pandora.be> + * + * April, 2002 + * + */ + +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_log.h> +#include <linux/module.h> +#include <linux/ip.h> +#include <linux/if_arp.h> +#include <linux/spinlock.h> + +static DEFINE_SPINLOCK(ebt_log_lock); + +static int ebt_log_check(const char *tablename, unsigned int hookmask, + const struct ebt_entry *e, void *data, unsigned int datalen) +{ + struct ebt_log_info *info = (struct ebt_log_info *)data; + + if (datalen != EBT_ALIGN(sizeof(struct ebt_log_info))) + return -EINVAL; + if (info->bitmask & ~EBT_LOG_MASK) + return -EINVAL; + if (info->loglevel >= 8) + return -EINVAL; + info->prefix[EBT_LOG_PREFIX_SIZE - 1] = '\0'; + return 0; +} + +struct tcpudphdr +{ + uint16_t src; + uint16_t dst; +}; + +struct arppayload +{ + unsigned char mac_src[ETH_ALEN]; + unsigned char ip_src[4]; + unsigned char mac_dst[ETH_ALEN]; + unsigned char ip_dst[4]; +}; + +static void print_MAC(unsigned char *p) +{ + int i; + + for (i = 0; i < ETH_ALEN; i++, p++) + printk("%02x%c", *p, i == ETH_ALEN - 1 ? ' ':':'); +} + +#define myNIPQUAD(a) a[0], a[1], a[2], a[3] +static void ebt_log(const struct sk_buff *skb, unsigned int hooknr, + const struct net_device *in, const struct net_device *out, + const void *data, unsigned int datalen) +{ + struct ebt_log_info *info = (struct ebt_log_info *)data; + char level_string[4] = "< >"; + union {struct iphdr iph; struct tcpudphdr ports; + struct arphdr arph; struct arppayload arpp;} u; + + level_string[1] = '0' + info->loglevel; + spin_lock_bh(&ebt_log_lock); + printk(level_string); + printk("%s IN=%s OUT=%s ", info->prefix, in ? in->name : "", + out ? out->name : ""); + + printk("MAC source = "); + print_MAC(eth_hdr(skb)->h_source); + printk("MAC dest = "); + print_MAC(eth_hdr(skb)->h_dest); + + printk("proto = 0x%04x", ntohs(eth_hdr(skb)->h_proto)); + + if ((info->bitmask & EBT_LOG_IP) && eth_hdr(skb)->h_proto == + htons(ETH_P_IP)){ + struct iphdr _iph, *ih; + + ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); + if (ih == NULL) { + printk(" INCOMPLETE IP header"); + goto out; + } + printk(" IP SRC=%u.%u.%u.%u IP DST=%u.%u.%u.%u,", + NIPQUAD(ih->saddr), NIPQUAD(ih->daddr)); + printk(" IP tos=0x%02X, IP proto=%d", u.iph.tos, + ih->protocol); + if (ih->protocol == IPPROTO_TCP || + ih->protocol == IPPROTO_UDP) { + struct tcpudphdr _ports, *pptr; + + pptr = skb_header_pointer(skb, ih->ihl*4, + sizeof(_ports), &_ports); + if (pptr == NULL) { + printk(" INCOMPLETE TCP/UDP header"); + goto out; + } + printk(" SPT=%u DPT=%u", ntohs(pptr->src), + ntohs(pptr->dst)); + } + goto out; + } + + if ((info->bitmask & EBT_LOG_ARP) && + ((eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) || + (eth_hdr(skb)->h_proto == htons(ETH_P_RARP)))) { + struct arphdr _arph, *ah; + + ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); + if (ah == NULL) { + printk(" INCOMPLETE ARP header"); + goto out; + } + printk(" ARP HTYPE=%d, PTYPE=0x%04x, OPCODE=%d", + ntohs(ah->ar_hrd), ntohs(ah->ar_pro), + ntohs(ah->ar_op)); + + /* If it's for Ethernet and the lengths are OK, + * then log the ARP payload */ + if (ah->ar_hrd == htons(1) && + ah->ar_hln == ETH_ALEN && + ah->ar_pln == sizeof(uint32_t)) { + struct arppayload _arpp, *ap; + + ap = skb_header_pointer(skb, sizeof(u.arph), + sizeof(_arpp), &_arpp); + if (ap == NULL) { + printk(" INCOMPLETE ARP payload"); + goto out; + } + printk(" ARP MAC SRC="); + print_MAC(ap->mac_src); + printk(" ARP IP SRC=%u.%u.%u.%u", + myNIPQUAD(ap->ip_src)); + printk(" ARP MAC DST="); + print_MAC(ap->mac_dst); + printk(" ARP IP DST=%u.%u.%u.%u", + myNIPQUAD(ap->ip_dst)); + } + } +out: + printk("\n"); + spin_unlock_bh(&ebt_log_lock); +} + +static struct ebt_watcher log = +{ + .name = EBT_LOG_WATCHER, + .watcher = ebt_log, + .check = ebt_log_check, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + return ebt_register_watcher(&log); +} + +static void __exit fini(void) +{ + ebt_unregister_watcher(&log); +} + +module_init(init); +module_exit(fini); +MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c new file mode 100644 index 000000000000..02c632b4d325 --- /dev/null +++ b/net/bridge/netfilter/ebt_mark.c @@ -0,0 +1,68 @@ +/* + * ebt_mark + * + * Authors: + * Bart De Schuymer <bdschuym@pandora.be> + * + * July, 2002 + * + */ + +/* The mark target can be used in any chain, + * I believe adding a mangle table just for marking is total overkill. + * Marking a frame doesn't really change anything in the frame anyway. + */ + +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_mark_t.h> +#include <linux/module.h> + +static int ebt_target_mark(struct sk_buff **pskb, unsigned int hooknr, + const struct net_device *in, const struct net_device *out, + const void *data, unsigned int datalen) +{ + struct ebt_mark_t_info *info = (struct ebt_mark_t_info *)data; + + if ((*pskb)->nfmark != info->mark) { + (*pskb)->nfmark = info->mark; + (*pskb)->nfcache |= NFC_ALTERED; + } + return info->target; +} + +static int ebt_target_mark_check(const char *tablename, unsigned int hookmask, + const struct ebt_entry *e, void *data, unsigned int datalen) +{ + struct ebt_mark_t_info *info = (struct ebt_mark_t_info *)data; + + if (datalen != EBT_ALIGN(sizeof(struct ebt_mark_t_info))) + return -EINVAL; + if (BASE_CHAIN && info->target == EBT_RETURN) + return -EINVAL; + CLEAR_BASE_CHAIN_BIT; + if (INVALID_TARGET) + return -EINVAL; + return 0; +} + +static struct ebt_target mark_target = +{ + .name = EBT_MARK_TARGET, + .target = ebt_target_mark, + .check = ebt_target_mark_check, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + return ebt_register_target(&mark_target); +} + +static void __exit fini(void) +{ + ebt_unregister_target(&mark_target); +} + +module_init(init); +module_exit(fini); +MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_mark_m.c b/net/bridge/netfilter/ebt_mark_m.c new file mode 100644 index 000000000000..625102de1495 --- /dev/null +++ b/net/bridge/netfilter/ebt_mark_m.c @@ -0,0 +1,62 @@ +/* + * ebt_mark_m + * + * Authors: + * Bart De Schuymer <bdschuym@pandora.be> + * + * July, 2002 + * + */ + +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_mark_m.h> +#include <linux/module.h> + +static int ebt_filter_mark(const struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, const void *data, + unsigned int datalen) +{ + struct ebt_mark_m_info *info = (struct ebt_mark_m_info *) data; + + if (info->bitmask & EBT_MARK_OR) + return !(!!(skb->nfmark & info->mask) ^ info->invert); + return !(((skb->nfmark & info->mask) == info->mark) ^ info->invert); +} + +static int ebt_mark_check(const char *tablename, unsigned int hookmask, + const struct ebt_entry *e, void *data, unsigned int datalen) +{ + struct ebt_mark_m_info *info = (struct ebt_mark_m_info *) data; + + if (datalen != EBT_ALIGN(sizeof(struct ebt_mark_m_info))) + return -EINVAL; + if (info->bitmask & ~EBT_MARK_MASK) + return -EINVAL; + if ((info->bitmask & EBT_MARK_OR) && (info->bitmask & EBT_MARK_AND)) + return -EINVAL; + if (!info->bitmask) + return -EINVAL; + return 0; +} + +static struct ebt_match filter_mark = +{ + .name = EBT_MARK_MATCH, + .match = ebt_filter_mark, + .check = ebt_mark_check, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + return ebt_register_match(&filter_mark); +} + +static void __exit fini(void) +{ + ebt_unregister_match(&filter_mark); +} + +module_init(init); +module_exit(fini); +MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_pkttype.c b/net/bridge/netfilter/ebt_pkttype.c new file mode 100644 index 000000000000..ecd3b42b19b0 --- /dev/null +++ b/net/bridge/netfilter/ebt_pkttype.c @@ -0,0 +1,59 @@ +/* + * ebt_pkttype + * + * Authors: + * Bart De Schuymer <bdschuym@pandora.be> + * + * April, 2003 + * + */ + +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_pkttype.h> +#include <linux/module.h> + +static int ebt_filter_pkttype(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *data, + unsigned int datalen) +{ + struct ebt_pkttype_info *info = (struct ebt_pkttype_info *)data; + + return (skb->pkt_type != info->pkt_type) ^ info->invert; +} + +static int ebt_pkttype_check(const char *tablename, unsigned int hookmask, + const struct ebt_entry *e, void *data, unsigned int datalen) +{ + struct ebt_pkttype_info *info = (struct ebt_pkttype_info *)data; + + if (datalen != EBT_ALIGN(sizeof(struct ebt_pkttype_info))) + return -EINVAL; + if (info->invert != 0 && info->invert != 1) + return -EINVAL; + /* Allow any pkt_type value */ + return 0; +} + +static struct ebt_match filter_pkttype = +{ + .name = EBT_PKTTYPE_MATCH, + .match = ebt_filter_pkttype, + .check = ebt_pkttype_check, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + return ebt_register_match(&filter_pkttype); +} + +static void __exit fini(void) +{ + ebt_unregister_match(&filter_pkttype); +} + +module_init(init); +module_exit(fini); +MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c new file mode 100644 index 000000000000..1538b4386662 --- /dev/null +++ b/net/bridge/netfilter/ebt_redirect.c @@ -0,0 +1,81 @@ +/* + * ebt_redirect + * + * Authors: + * Bart De Schuymer <bdschuym@pandora.be> + * + * April, 2002 + * + */ + +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_redirect.h> +#include <linux/module.h> +#include <net/sock.h> +#include "../br_private.h" + +static int ebt_target_redirect(struct sk_buff **pskb, unsigned int hooknr, + const struct net_device *in, const struct net_device *out, + const void *data, unsigned int datalen) +{ + struct ebt_redirect_info *info = (struct ebt_redirect_info *)data; + + if (skb_shared(*pskb) || skb_cloned(*pskb)) { + struct sk_buff *nskb; + + nskb = skb_copy(*pskb, GFP_ATOMIC); + if (!nskb) + return NF_DROP; + if ((*pskb)->sk) + skb_set_owner_w(nskb, (*pskb)->sk); + kfree_skb(*pskb); + *pskb = nskb; + } + if (hooknr != NF_BR_BROUTING) + memcpy(eth_hdr(*pskb)->h_dest, + in->br_port->br->dev->dev_addr, ETH_ALEN); + else + memcpy(eth_hdr(*pskb)->h_dest, in->dev_addr, ETH_ALEN); + (*pskb)->pkt_type = PACKET_HOST; + return info->target; +} + +static int ebt_target_redirect_check(const char *tablename, unsigned int hookmask, + const struct ebt_entry *e, void *data, unsigned int datalen) +{ + struct ebt_redirect_info *info = (struct ebt_redirect_info *)data; + + if (datalen != EBT_ALIGN(sizeof(struct ebt_redirect_info))) + return -EINVAL; + if (BASE_CHAIN && info->target == EBT_RETURN) + return -EINVAL; + CLEAR_BASE_CHAIN_BIT; + if ( (strcmp(tablename, "nat") || hookmask & ~(1 << NF_BR_PRE_ROUTING)) && + (strcmp(tablename, "broute") || hookmask & ~(1 << NF_BR_BROUTING)) ) + return -EINVAL; + if (INVALID_TARGET) + return -EINVAL; + return 0; +} + +static struct ebt_target redirect_target = +{ + .name = EBT_REDIRECT_TARGET, + .target = ebt_target_redirect, + .check = ebt_target_redirect_check, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + return ebt_register_target(&redirect_target); +} + +static void __exit fini(void) +{ + ebt_unregister_target(&redirect_target); +} + +module_init(init); +module_exit(fini); +MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c new file mode 100644 index 000000000000..1529bdcb9a48 --- /dev/null +++ b/net/bridge/netfilter/ebt_snat.c @@ -0,0 +1,76 @@ +/* + * ebt_snat + * + * Authors: + * Bart De Schuymer <bdschuym@pandora.be> + * + * June, 2002 + * + */ + +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_nat.h> +#include <linux/module.h> +#include <net/sock.h> + +static int ebt_target_snat(struct sk_buff **pskb, unsigned int hooknr, + const struct net_device *in, const struct net_device *out, + const void *data, unsigned int datalen) +{ + struct ebt_nat_info *info = (struct ebt_nat_info *) data; + + if (skb_shared(*pskb) || skb_cloned(*pskb)) { + struct sk_buff *nskb; + + nskb = skb_copy(*pskb, GFP_ATOMIC); + if (!nskb) + return NF_DROP; + if ((*pskb)->sk) + skb_set_owner_w(nskb, (*pskb)->sk); + kfree_skb(*pskb); + *pskb = nskb; + } + memcpy(eth_hdr(*pskb)->h_source, info->mac, ETH_ALEN); + return info->target; +} + +static int ebt_target_snat_check(const char *tablename, unsigned int hookmask, + const struct ebt_entry *e, void *data, unsigned int datalen) +{ + struct ebt_nat_info *info = (struct ebt_nat_info *) data; + + if (datalen != EBT_ALIGN(sizeof(struct ebt_nat_info))) + return -EINVAL; + if (BASE_CHAIN && info->target == EBT_RETURN) + return -EINVAL; + CLEAR_BASE_CHAIN_BIT; + if (strcmp(tablename, "nat")) + return -EINVAL; + if (hookmask & ~(1 << NF_BR_POST_ROUTING)) + return -EINVAL; + if (INVALID_TARGET) + return -EINVAL; + return 0; +} + +static struct ebt_target snat = +{ + .name = EBT_SNAT_TARGET, + .target = ebt_target_snat, + .check = ebt_target_snat_check, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + return ebt_register_target(&snat); +} + +static void __exit fini(void) +{ + ebt_unregister_target(&snat); +} + +module_init(init); +module_exit(fini); +MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c new file mode 100644 index 000000000000..f8a8cdec16ee --- /dev/null +++ b/net/bridge/netfilter/ebt_stp.c @@ -0,0 +1,194 @@ +/* + * ebt_stp + * + * Authors: + * Bart De Schuymer <bdschuym@pandora.be> + * Stephen Hemminger <shemminger@osdl.org> + * + * July, 2003 + */ + +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_stp.h> +#include <linux/module.h> + +#define BPDU_TYPE_CONFIG 0 +#define BPDU_TYPE_TCN 0x80 + +struct stp_header { + uint8_t dsap; + uint8_t ssap; + uint8_t ctrl; + uint8_t pid; + uint8_t vers; + uint8_t type; +}; + +struct stp_config_pdu { + uint8_t flags; + uint8_t root[8]; + uint8_t root_cost[4]; + uint8_t sender[8]; + uint8_t port[2]; + uint8_t msg_age[2]; + uint8_t max_age[2]; + uint8_t hello_time[2]; + uint8_t forward_delay[2]; +}; + +#define NR16(p) (p[0] << 8 | p[1]) +#define NR32(p) ((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]) + +static int ebt_filter_config(struct ebt_stp_info *info, + struct stp_config_pdu *stpc) +{ + struct ebt_stp_config_info *c; + uint16_t v16; + uint32_t v32; + int verdict, i; + + c = &info->config; + if ((info->bitmask & EBT_STP_FLAGS) && + FWINV(c->flags != stpc->flags, EBT_STP_FLAGS)) + return EBT_NOMATCH; + if (info->bitmask & EBT_STP_ROOTPRIO) { + v16 = NR16(stpc->root); + if (FWINV(v16 < c->root_priol || + v16 > c->root_priou, EBT_STP_ROOTPRIO)) + return EBT_NOMATCH; + } + if (info->bitmask & EBT_STP_ROOTADDR) { + verdict = 0; + for (i = 0; i < 6; i++) + verdict |= (stpc->root[2+i] ^ c->root_addr[i]) & + c->root_addrmsk[i]; + if (FWINV(verdict != 0, EBT_STP_ROOTADDR)) + return EBT_NOMATCH; + } + if (info->bitmask & EBT_STP_ROOTCOST) { + v32 = NR32(stpc->root_cost); + if (FWINV(v32 < c->root_costl || + v32 > c->root_costu, EBT_STP_ROOTCOST)) + return EBT_NOMATCH; + } + if (info->bitmask & EBT_STP_SENDERPRIO) { + v16 = NR16(stpc->sender); + if (FWINV(v16 < c->sender_priol || + v16 > c->sender_priou, EBT_STP_SENDERPRIO)) + return EBT_NOMATCH; + } + if (info->bitmask & EBT_STP_SENDERADDR) { + verdict = 0; + for (i = 0; i < 6; i++) + verdict |= (stpc->sender[2+i] ^ c->sender_addr[i]) & + c->sender_addrmsk[i]; + if (FWINV(verdict != 0, EBT_STP_SENDERADDR)) + return EBT_NOMATCH; + } + if (info->bitmask & EBT_STP_PORT) { + v16 = NR16(stpc->port); + if (FWINV(v16 < c->portl || + v16 > c->portu, EBT_STP_PORT)) + return EBT_NOMATCH; + } + if (info->bitmask & EBT_STP_MSGAGE) { + v16 = NR16(stpc->msg_age); + if (FWINV(v16 < c->msg_agel || + v16 > c->msg_ageu, EBT_STP_MSGAGE)) + return EBT_NOMATCH; + } + if (info->bitmask & EBT_STP_MAXAGE) { + v16 = NR16(stpc->max_age); + if (FWINV(v16 < c->max_agel || + v16 > c->max_ageu, EBT_STP_MAXAGE)) + return EBT_NOMATCH; + } + if (info->bitmask & EBT_STP_HELLOTIME) { + v16 = NR16(stpc->hello_time); + if (FWINV(v16 < c->hello_timel || + v16 > c->hello_timeu, EBT_STP_HELLOTIME)) + return EBT_NOMATCH; + } + if (info->bitmask & EBT_STP_FWDD) { + v16 = NR16(stpc->forward_delay); + if (FWINV(v16 < c->forward_delayl || + v16 > c->forward_delayu, EBT_STP_FWDD)) + return EBT_NOMATCH; + } + return EBT_MATCH; +} + +static int ebt_filter_stp(const struct sk_buff *skb, const struct net_device *in, + const struct net_device *out, const void *data, unsigned int datalen) +{ + struct ebt_stp_info *info = (struct ebt_stp_info *)data; + struct stp_header _stph, *sp; + uint8_t header[6] = {0x42, 0x42, 0x03, 0x00, 0x00, 0x00}; + + sp = skb_header_pointer(skb, 0, sizeof(_stph), &_stph); + if (sp == NULL) + return EBT_NOMATCH; + + /* The stp code only considers these */ + if (memcmp(sp, header, sizeof(header))) + return EBT_NOMATCH; + + if (info->bitmask & EBT_STP_TYPE + && FWINV(info->type != sp->type, EBT_STP_TYPE)) + return EBT_NOMATCH; + + if (sp->type == BPDU_TYPE_CONFIG && + info->bitmask & EBT_STP_CONFIG_MASK) { + struct stp_config_pdu _stpc, *st; + + st = skb_header_pointer(skb, sizeof(_stph), + sizeof(_stpc), &_stpc); + if (st == NULL) + return EBT_NOMATCH; + return ebt_filter_config(info, st); + } + return EBT_MATCH; +} + +static int ebt_stp_check(const char *tablename, unsigned int hookmask, + const struct ebt_entry *e, void *data, unsigned int datalen) +{ + struct ebt_stp_info *info = (struct ebt_stp_info *)data; + int len = EBT_ALIGN(sizeof(struct ebt_stp_info)); + uint8_t bridge_ula[6] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }; + uint8_t msk[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; + + if (info->bitmask & ~EBT_STP_MASK || info->invflags & ~EBT_STP_MASK || + !(info->bitmask & EBT_STP_MASK)) + return -EINVAL; + if (datalen != len) + return -EINVAL; + /* Make sure the match only receives stp frames */ + if (memcmp(e->destmac, bridge_ula, ETH_ALEN) || + memcmp(e->destmsk, msk, ETH_ALEN) || !(e->bitmask & EBT_DESTMAC)) + return -EINVAL; + + return 0; +} + +static struct ebt_match filter_stp = +{ + .name = EBT_STP_MATCH, + .match = ebt_filter_stp, + .check = ebt_stp_check, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + return ebt_register_match(&filter_stp); +} + +static void __exit fini(void) +{ + ebt_unregister_match(&filter_stp); +} + +module_init(init); +module_exit(fini); +MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c new file mode 100644 index 000000000000..01af4fcef26d --- /dev/null +++ b/net/bridge/netfilter/ebt_ulog.c @@ -0,0 +1,295 @@ +/* + * netfilter module for userspace bridged Ethernet frames logging daemons + * + * Authors: + * Bart De Schuymer <bdschuym@pandora.be> + * + * November, 2004 + * + * Based on ipt_ULOG.c, which is + * (C) 2000-2002 by Harald Welte <laforge@netfilter.org> + * + * This module accepts two parameters: + * + * nlbufsiz: + * The parameter specifies how big the buffer for each netlink multicast + * group is. e.g. If you say nlbufsiz=8192, up to eight kb of packets will + * get accumulated in the kernel until they are sent to userspace. It is + * NOT possible to allocate more than 128kB, and it is strongly discouraged, + * because atomically allocating 128kB inside the network rx softirq is not + * reliable. Please also keep in mind that this buffer size is allocated for + * each nlgroup you are using, so the total kernel memory usage increases + * by that factor. + * + * flushtimeout: + * Specify, after how many hundredths of a second the queue should be + * flushed even if it is not full yet. + * + */ + +#include <linux/module.h> +#include <linux/config.h> +#include <linux/spinlock.h> +#include <linux/socket.h> +#include <linux/skbuff.h> +#include <linux/kernel.h> +#include <linux/timer.h> +#include <linux/netlink.h> +#include <linux/netdevice.h> +#include <linux/module.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_ulog.h> +#include <net/sock.h> +#include "../br_private.h" + +#define PRINTR(format, args...) do { if (net_ratelimit()) \ + printk(format , ## args); } while (0) + +static unsigned int nlbufsiz = 4096; +module_param(nlbufsiz, uint, 0600); +MODULE_PARM_DESC(nlbufsiz, "netlink buffer size (number of bytes) " + "(defaults to 4096)"); + +static unsigned int flushtimeout = 10; +module_param(flushtimeout, uint, 0600); +MODULE_PARM_DESC(flushtimeout, "buffer flush timeout (hundredths ofa second) " + "(defaults to 10)"); + +typedef struct { + unsigned int qlen; /* number of nlmsgs' in the skb */ + struct nlmsghdr *lastnlh; /* netlink header of last msg in skb */ + struct sk_buff *skb; /* the pre-allocated skb */ + struct timer_list timer; /* the timer function */ + spinlock_t lock; /* the per-queue lock */ +} ebt_ulog_buff_t; + +static ebt_ulog_buff_t ulog_buffers[EBT_ULOG_MAXNLGROUPS]; +static struct sock *ebtulognl; + +/* send one ulog_buff_t to userspace */ +static void ulog_send(unsigned int nlgroup) +{ + ebt_ulog_buff_t *ub = &ulog_buffers[nlgroup]; + + if (timer_pending(&ub->timer)) + del_timer(&ub->timer); + + /* last nlmsg needs NLMSG_DONE */ + if (ub->qlen > 1) + ub->lastnlh->nlmsg_type = NLMSG_DONE; + + NETLINK_CB(ub->skb).dst_groups = 1 << nlgroup; + netlink_broadcast(ebtulognl, ub->skb, 0, 1 << nlgroup, GFP_ATOMIC); + + ub->qlen = 0; + ub->skb = NULL; +} + +/* timer function to flush queue in flushtimeout time */ +static void ulog_timer(unsigned long data) +{ + spin_lock_bh(&ulog_buffers[data].lock); + if (ulog_buffers[data].skb) + ulog_send(data); + spin_unlock_bh(&ulog_buffers[data].lock); +} + +static struct sk_buff *ulog_alloc_skb(unsigned int size) +{ + struct sk_buff *skb; + + skb = alloc_skb(nlbufsiz, GFP_ATOMIC); + if (!skb) { + PRINTR(KERN_ERR "ebt_ulog: can't alloc whole buffer " + "of size %ub!\n", nlbufsiz); + if (size < nlbufsiz) { + /* try to allocate only as much as we need for + * current packet */ + skb = alloc_skb(size, GFP_ATOMIC); + if (!skb) + PRINTR(KERN_ERR "ebt_ulog: can't even allocate " + "buffer of size %ub\n", size); + } + } + + return skb; +} + +static void ebt_ulog(const struct sk_buff *skb, unsigned int hooknr, + const struct net_device *in, const struct net_device *out, + const void *data, unsigned int datalen) +{ + ebt_ulog_packet_msg_t *pm; + size_t size, copy_len; + struct nlmsghdr *nlh; + struct ebt_ulog_info *uloginfo = (struct ebt_ulog_info *)data; + unsigned int group = uloginfo->nlgroup; + ebt_ulog_buff_t *ub = &ulog_buffers[group]; + spinlock_t *lock = &ub->lock; + + if ((uloginfo->cprange == 0) || + (uloginfo->cprange > skb->len + ETH_HLEN)) + copy_len = skb->len + ETH_HLEN; + else + copy_len = uloginfo->cprange; + + size = NLMSG_SPACE(sizeof(*pm) + copy_len); + if (size > nlbufsiz) { + PRINTR("ebt_ulog: Size %Zd needed, but nlbufsiz=%d\n", + size, nlbufsiz); + return; + } + + spin_lock_bh(lock); + + if (!ub->skb) { + if (!(ub->skb = ulog_alloc_skb(size))) + goto alloc_failure; + } else if (size > skb_tailroom(ub->skb)) { + ulog_send(group); + + if (!(ub->skb = ulog_alloc_skb(size))) + goto alloc_failure; + } + + nlh = NLMSG_PUT(ub->skb, 0, ub->qlen, 0, + size - NLMSG_ALIGN(sizeof(*nlh))); + ub->qlen++; + + pm = NLMSG_DATA(nlh); + + /* Fill in the ulog data */ + pm->version = EBT_ULOG_VERSION; + do_gettimeofday(&pm->stamp); + if (ub->qlen == 1) + ub->skb->stamp = pm->stamp; + pm->data_len = copy_len; + pm->mark = skb->nfmark; + pm->hook = hooknr; + if (uloginfo->prefix != NULL) + strcpy(pm->prefix, uloginfo->prefix); + else + *(pm->prefix) = '\0'; + + if (in) { + strcpy(pm->physindev, in->name); + /* If in isn't a bridge, then physindev==indev */ + if (in->br_port) + strcpy(pm->indev, in->br_port->br->dev->name); + else + strcpy(pm->indev, in->name); + } else + pm->indev[0] = pm->physindev[0] = '\0'; + + if (out) { + /* If out exists, then out is a bridge port */ + strcpy(pm->physoutdev, out->name); + strcpy(pm->outdev, out->br_port->br->dev->name); + } else + pm->outdev[0] = pm->physoutdev[0] = '\0'; + + if (skb_copy_bits(skb, -ETH_HLEN, pm->data, copy_len) < 0) + BUG(); + + if (ub->qlen > 1) + ub->lastnlh->nlmsg_flags |= NLM_F_MULTI; + + ub->lastnlh = nlh; + + if (ub->qlen >= uloginfo->qthreshold) + ulog_send(group); + else if (!timer_pending(&ub->timer)) { + ub->timer.expires = jiffies + flushtimeout * HZ / 100; + add_timer(&ub->timer); + } + +unlock: + spin_unlock_bh(lock); + + return; + +nlmsg_failure: + printk(KERN_CRIT "ebt_ulog: error during NLMSG_PUT. This should " + "not happen, please report to author.\n"); + goto unlock; +alloc_failure: + goto unlock; +} + +static int ebt_ulog_check(const char *tablename, unsigned int hookmask, + const struct ebt_entry *e, void *data, unsigned int datalen) +{ + struct ebt_ulog_info *uloginfo = (struct ebt_ulog_info *)data; + + if (datalen != EBT_ALIGN(sizeof(struct ebt_ulog_info)) || + uloginfo->nlgroup > 31) + return -EINVAL; + + uloginfo->prefix[EBT_ULOG_PREFIX_LEN - 1] = '\0'; + + if (uloginfo->qthreshold > EBT_ULOG_MAX_QLEN) + uloginfo->qthreshold = EBT_ULOG_MAX_QLEN; + + return 0; +} + +static struct ebt_watcher ulog = { + .name = EBT_ULOG_WATCHER, + .watcher = ebt_ulog, + .check = ebt_ulog_check, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + int i, ret = 0; + + if (nlbufsiz >= 128*1024) { + printk(KERN_NOTICE "ebt_ulog: Netlink buffer has to be <= 128kB," + " please try a smaller nlbufsiz parameter.\n"); + return -EINVAL; + } + + /* initialize ulog_buffers */ + for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) { + init_timer(&ulog_buffers[i].timer); + ulog_buffers[i].timer.function = ulog_timer; + ulog_buffers[i].timer.data = i; + spin_lock_init(&ulog_buffers[i].lock); + } + + ebtulognl = netlink_kernel_create(NETLINK_NFLOG, NULL); + if (!ebtulognl) + ret = -ENOMEM; + else if ((ret = ebt_register_watcher(&ulog))) + sock_release(ebtulognl->sk_socket); + + return ret; +} + +static void __exit fini(void) +{ + ebt_ulog_buff_t *ub; + int i; + + ebt_unregister_watcher(&ulog); + for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) { + ub = &ulog_buffers[i]; + if (timer_pending(&ub->timer)) + del_timer(&ub->timer); + spin_lock_bh(&ub->lock); + if (ub->skb) { + kfree_skb(ub->skb); + ub->skb = NULL; + } + spin_unlock_bh(&ub->lock); + } + sock_release(ebtulognl->sk_socket); +} + +module_init(init); +module_exit(fini); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>"); +MODULE_DESCRIPTION("ebtables userspace logging module for bridged Ethernet" + " frames"); diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c new file mode 100644 index 000000000000..db60d734908b --- /dev/null +++ b/net/bridge/netfilter/ebt_vlan.c @@ -0,0 +1,195 @@ +/* + * Description: EBTables 802.1Q match extension kernelspace module. + * Authors: Nick Fedchik <nick@fedchik.org.ua> + * Bart De Schuymer <bdschuym@pandora.be> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/if_ether.h> +#include <linux/if_vlan.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_vlan.h> + +static int debug; +#define MODULE_VERS "0.6" + +module_param(debug, int, 0); +MODULE_PARM_DESC(debug, "debug=1 is turn on debug messages"); +MODULE_AUTHOR("Nick Fedchik <nick@fedchik.org.ua>"); +MODULE_DESCRIPTION("802.1Q match module (ebtables extension), v" + MODULE_VERS); +MODULE_LICENSE("GPL"); + + +#define DEBUG_MSG(args...) if (debug) printk (KERN_DEBUG "ebt_vlan: " args) +#define INV_FLAG(_inv_flag_) (info->invflags & _inv_flag_) ? "!" : "" +#define GET_BITMASK(_BIT_MASK_) info->bitmask & _BIT_MASK_ +#define SET_BITMASK(_BIT_MASK_) info->bitmask |= _BIT_MASK_ +#define EXIT_ON_MISMATCH(_MATCH_,_MASK_) {if (!((info->_MATCH_ == _MATCH_)^!!(info->invflags & _MASK_))) return EBT_NOMATCH;} + +static int +ebt_filter_vlan(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *data, unsigned int datalen) +{ + struct ebt_vlan_info *info = (struct ebt_vlan_info *) data; + struct vlan_hdr _frame, *fp; + + unsigned short TCI; /* Whole TCI, given from parsed frame */ + unsigned short id; /* VLAN ID, given from frame TCI */ + unsigned char prio; /* user_priority, given from frame TCI */ + /* VLAN encapsulated Type/Length field, given from orig frame */ + unsigned short encap; + + fp = skb_header_pointer(skb, 0, sizeof(_frame), &_frame); + if (fp == NULL) + return EBT_NOMATCH; + + /* Tag Control Information (TCI) consists of the following elements: + * - User_priority. The user_priority field is three bits in length, + * interpreted as a binary number. + * - Canonical Format Indicator (CFI). The Canonical Format Indicator + * (CFI) is a single bit flag value. Currently ignored. + * - VLAN Identifier (VID). The VID is encoded as + * an unsigned binary number. */ + TCI = ntohs(fp->h_vlan_TCI); + id = TCI & VLAN_VID_MASK; + prio = (TCI >> 13) & 0x7; + encap = fp->h_vlan_encapsulated_proto; + + /* Checking VLAN Identifier (VID) */ + if (GET_BITMASK(EBT_VLAN_ID)) + EXIT_ON_MISMATCH(id, EBT_VLAN_ID); + + /* Checking user_priority */ + if (GET_BITMASK(EBT_VLAN_PRIO)) + EXIT_ON_MISMATCH(prio, EBT_VLAN_PRIO); + + /* Checking Encapsulated Proto (Length/Type) field */ + if (GET_BITMASK(EBT_VLAN_ENCAP)) + EXIT_ON_MISMATCH(encap, EBT_VLAN_ENCAP); + + return EBT_MATCH; +} + +static int +ebt_check_vlan(const char *tablename, + unsigned int hooknr, + const struct ebt_entry *e, void *data, unsigned int datalen) +{ + struct ebt_vlan_info *info = (struct ebt_vlan_info *) data; + + /* Parameters buffer overflow check */ + if (datalen != EBT_ALIGN(sizeof(struct ebt_vlan_info))) { + DEBUG_MSG + ("passed size %d is not eq to ebt_vlan_info (%Zd)\n", + datalen, sizeof(struct ebt_vlan_info)); + return -EINVAL; + } + + /* Is it 802.1Q frame checked? */ + if (e->ethproto != htons(ETH_P_8021Q)) { + DEBUG_MSG + ("passed entry proto %2.4X is not 802.1Q (8100)\n", + (unsigned short) ntohs(e->ethproto)); + return -EINVAL; + } + + /* Check for bitmask range + * True if even one bit is out of mask */ + if (info->bitmask & ~EBT_VLAN_MASK) { + DEBUG_MSG("bitmask %2X is out of mask (%2X)\n", + info->bitmask, EBT_VLAN_MASK); + return -EINVAL; + } + + /* Check for inversion flags range */ + if (info->invflags & ~EBT_VLAN_MASK) { + DEBUG_MSG("inversion flags %2X is out of mask (%2X)\n", + info->invflags, EBT_VLAN_MASK); + return -EINVAL; + } + + /* Reserved VLAN ID (VID) values + * ----------------------------- + * 0 - The null VLAN ID. + * 1 - The default Port VID (PVID) + * 0x0FFF - Reserved for implementation use. + * if_vlan.h: VLAN_GROUP_ARRAY_LEN 4096. */ + if (GET_BITMASK(EBT_VLAN_ID)) { + if (!!info->id) { /* if id!=0 => check vid range */ + if (info->id > VLAN_GROUP_ARRAY_LEN) { + DEBUG_MSG + ("id %d is out of range (1-4096)\n", + info->id); + return -EINVAL; + } + /* Note: This is valid VLAN-tagged frame point. + * Any value of user_priority are acceptable, + * but should be ignored according to 802.1Q Std. + * So we just drop the prio flag. */ + info->bitmask &= ~EBT_VLAN_PRIO; + } + /* Else, id=0 (null VLAN ID) => user_priority range (any?) */ + } + + if (GET_BITMASK(EBT_VLAN_PRIO)) { + if ((unsigned char) info->prio > 7) { + DEBUG_MSG("prio %d is out of range (0-7)\n", + info->prio); + return -EINVAL; + } + } + /* Check for encapsulated proto range - it is possible to be + * any value for u_short range. + * if_ether.h: ETH_ZLEN 60 - Min. octets in frame sans FCS */ + if (GET_BITMASK(EBT_VLAN_ENCAP)) { + if ((unsigned short) ntohs(info->encap) < ETH_ZLEN) { + DEBUG_MSG + ("encap frame length %d is less than minimal\n", + ntohs(info->encap)); + return -EINVAL; + } + } + + return 0; +} + +static struct ebt_match filter_vlan = { + .name = EBT_VLAN_MATCH, + .match = ebt_filter_vlan, + .check = ebt_check_vlan, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + DEBUG_MSG("ebtables 802.1Q extension module v" + MODULE_VERS "\n"); + DEBUG_MSG("module debug=%d\n", !!debug); + return ebt_register_match(&filter_vlan); +} + +static void __exit fini(void) +{ + ebt_unregister_match(&filter_vlan); +} + +module_init(init); +module_exit(fini); diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c new file mode 100644 index 000000000000..1767c94cd3de --- /dev/null +++ b/net/bridge/netfilter/ebtable_broute.c @@ -0,0 +1,86 @@ +/* + * ebtable_broute + * + * Authors: + * Bart De Schuymer <bdschuym@pandora.be> + * + * April, 2002 + * + * This table lets you choose between routing and bridging for frames + * entering on a bridge enslaved nic. This table is traversed before any + * other ebtables table. See net/bridge/br_input.c. + */ + +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/module.h> +#include <linux/if_bridge.h> + +/* EBT_ACCEPT means the frame will be bridged + * EBT_DROP means the frame will be routed + */ +static struct ebt_entries initial_chain = { + .name = "BROUTING", + .policy = EBT_ACCEPT, +}; + +static struct ebt_replace initial_table = +{ + .name = "broute", + .valid_hooks = 1 << NF_BR_BROUTING, + .entries_size = sizeof(struct ebt_entries), + .hook_entry = { + [NF_BR_BROUTING] = &initial_chain, + }, + .entries = (char *)&initial_chain, +}; + +static int check(const struct ebt_table_info *info, unsigned int valid_hooks) +{ + if (valid_hooks & ~(1 << NF_BR_BROUTING)) + return -EINVAL; + return 0; +} + +static struct ebt_table broute_table = +{ + .name = "broute", + .table = &initial_table, + .valid_hooks = 1 << NF_BR_BROUTING, + .lock = RW_LOCK_UNLOCKED, + .check = check, + .me = THIS_MODULE, +}; + +static int ebt_broute(struct sk_buff **pskb) +{ + int ret; + + ret = ebt_do_table(NF_BR_BROUTING, pskb, (*pskb)->dev, NULL, + &broute_table); + if (ret == NF_DROP) + return 1; /* route it */ + return 0; /* bridge it */ +} + +static int __init init(void) +{ + int ret; + + ret = ebt_register_table(&broute_table); + if (ret < 0) + return ret; + /* see br_input.c */ + br_should_route_hook = ebt_broute; + return ret; +} + +static void __exit fini(void) +{ + br_should_route_hook = NULL; + synchronize_net(); + ebt_unregister_table(&broute_table); +} + +module_init(init); +module_exit(fini); +MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c new file mode 100644 index 000000000000..c18666e0392b --- /dev/null +++ b/net/bridge/netfilter/ebtable_filter.c @@ -0,0 +1,123 @@ +/* + * ebtable_filter + * + * Authors: + * Bart De Schuymer <bdschuym@pandora.be> + * + * April, 2002 + * + */ + +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/module.h> + +#define FILTER_VALID_HOOKS ((1 << NF_BR_LOCAL_IN) | (1 << NF_BR_FORWARD) | \ + (1 << NF_BR_LOCAL_OUT)) + +static struct ebt_entries initial_chains[] = +{ + { + .name = "INPUT", + .policy = EBT_ACCEPT, + }, + { + .name = "FORWARD", + .policy = EBT_ACCEPT, + }, + { + .name = "OUTPUT", + .policy = EBT_ACCEPT, + }, +}; + +static struct ebt_replace initial_table = +{ + .name = "filter", + .valid_hooks = FILTER_VALID_HOOKS, + .entries_size = 3 * sizeof(struct ebt_entries), + .hook_entry = { + [NF_BR_LOCAL_IN] = &initial_chains[0], + [NF_BR_FORWARD] = &initial_chains[1], + [NF_BR_LOCAL_OUT] = &initial_chains[2], + }, + .entries = (char *)initial_chains, +}; + +static int check(const struct ebt_table_info *info, unsigned int valid_hooks) +{ + if (valid_hooks & ~FILTER_VALID_HOOKS) + return -EINVAL; + return 0; +} + +static struct ebt_table frame_filter = +{ + .name = "filter", + .table = &initial_table, + .valid_hooks = FILTER_VALID_HOOKS, + .lock = RW_LOCK_UNLOCKED, + .check = check, + .me = THIS_MODULE, +}; + +static unsigned int +ebt_hook (unsigned int hook, struct sk_buff **pskb, const struct net_device *in, + const struct net_device *out, int (*okfn)(struct sk_buff *)) +{ + return ebt_do_table(hook, pskb, in, out, &frame_filter); +} + +static struct nf_hook_ops ebt_ops_filter[] = { + { + .hook = ebt_hook, + .owner = THIS_MODULE, + .pf = PF_BRIDGE, + .hooknum = NF_BR_LOCAL_IN, + .priority = NF_BR_PRI_FILTER_BRIDGED, + }, + { + .hook = ebt_hook, + .owner = THIS_MODULE, + .pf = PF_BRIDGE, + .hooknum = NF_BR_FORWARD, + .priority = NF_BR_PRI_FILTER_BRIDGED, + }, + { + .hook = ebt_hook, + .owner = THIS_MODULE, + .pf = PF_BRIDGE, + .hooknum = NF_BR_LOCAL_OUT, + .priority = NF_BR_PRI_FILTER_OTHER, + }, +}; + +static int __init init(void) +{ + int i, j, ret; + + ret = ebt_register_table(&frame_filter); + if (ret < 0) + return ret; + for (i = 0; i < ARRAY_SIZE(ebt_ops_filter); i++) + if ((ret = nf_register_hook(&ebt_ops_filter[i])) < 0) + goto cleanup; + return ret; +cleanup: + for (j = 0; j < i; j++) + nf_unregister_hook(&ebt_ops_filter[j]); + ebt_unregister_table(&frame_filter); + return ret; +} + +static void __exit fini(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ebt_ops_filter); i++) + nf_unregister_hook(&ebt_ops_filter[i]); + ebt_unregister_table(&frame_filter); +} + +module_init(init); +module_exit(fini); +MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c new file mode 100644 index 000000000000..828cac2cc4a3 --- /dev/null +++ b/net/bridge/netfilter/ebtable_nat.c @@ -0,0 +1,130 @@ +/* + * ebtable_nat + * + * Authors: + * Bart De Schuymer <bdschuym@pandora.be> + * + * April, 2002 + * + */ + +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/module.h> + +#define NAT_VALID_HOOKS ((1 << NF_BR_PRE_ROUTING) | (1 << NF_BR_LOCAL_OUT) | \ + (1 << NF_BR_POST_ROUTING)) + +static struct ebt_entries initial_chains[] = +{ + { + .name = "PREROUTING", + .policy = EBT_ACCEPT, + }, + { + .name = "OUTPUT", + .policy = EBT_ACCEPT, + }, + { + .name = "POSTROUTING", + .policy = EBT_ACCEPT, + } +}; + +static struct ebt_replace initial_table = +{ + .name = "nat", + .valid_hooks = NAT_VALID_HOOKS, + .entries_size = 3 * sizeof(struct ebt_entries), + .hook_entry = { + [NF_BR_PRE_ROUTING] = &initial_chains[0], + [NF_BR_LOCAL_OUT] = &initial_chains[1], + [NF_BR_POST_ROUTING] = &initial_chains[2], + }, + .entries = (char *)initial_chains, +}; + +static int check(const struct ebt_table_info *info, unsigned int valid_hooks) +{ + if (valid_hooks & ~NAT_VALID_HOOKS) + return -EINVAL; + return 0; +} + +static struct ebt_table frame_nat = +{ + .name = "nat", + .table = &initial_table, + .valid_hooks = NAT_VALID_HOOKS, + .lock = RW_LOCK_UNLOCKED, + .check = check, + .me = THIS_MODULE, +}; + +static unsigned int +ebt_nat_dst(unsigned int hook, struct sk_buff **pskb, const struct net_device *in + , const struct net_device *out, int (*okfn)(struct sk_buff *)) +{ + return ebt_do_table(hook, pskb, in, out, &frame_nat); +} + +static unsigned int +ebt_nat_src(unsigned int hook, struct sk_buff **pskb, const struct net_device *in + , const struct net_device *out, int (*okfn)(struct sk_buff *)) +{ + return ebt_do_table(hook, pskb, in, out, &frame_nat); +} + +static struct nf_hook_ops ebt_ops_nat[] = { + { + .hook = ebt_nat_dst, + .owner = THIS_MODULE, + .pf = PF_BRIDGE, + .hooknum = NF_BR_LOCAL_OUT, + .priority = NF_BR_PRI_NAT_DST_OTHER, + }, + { + .hook = ebt_nat_src, + .owner = THIS_MODULE, + .pf = PF_BRIDGE, + .hooknum = NF_BR_POST_ROUTING, + .priority = NF_BR_PRI_NAT_SRC, + }, + { + .hook = ebt_nat_dst, + .owner = THIS_MODULE, + .pf = PF_BRIDGE, + .hooknum = NF_BR_PRE_ROUTING, + .priority = NF_BR_PRI_NAT_DST_BRIDGED, + }, +}; + +static int __init init(void) +{ + int i, ret, j; + + ret = ebt_register_table(&frame_nat); + if (ret < 0) + return ret; + for (i = 0; i < ARRAY_SIZE(ebt_ops_nat); i++) + if ((ret = nf_register_hook(&ebt_ops_nat[i])) < 0) + goto cleanup; + return ret; +cleanup: + for (j = 0; j < i; j++) + nf_unregister_hook(&ebt_ops_nat[j]); + ebt_unregister_table(&frame_nat); + return ret; +} + +static void __exit fini(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ebt_ops_nat); i++) + nf_unregister_hook(&ebt_ops_nat[i]); + ebt_unregister_table(&frame_nat); +} + +module_init(init); +module_exit(fini); +MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c new file mode 100644 index 000000000000..18ebc664769b --- /dev/null +++ b/net/bridge/netfilter/ebtables.c @@ -0,0 +1,1507 @@ +/* + * ebtables + * + * Author: + * Bart De Schuymer <bdschuym@pandora.be> + * + * ebtables.c,v 2.0, July, 2002 + * + * This code is stongly inspired on the iptables code which is + * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* used for print_string */ +#include <linux/sched.h> +#include <linux/tty.h> + +#include <linux/kmod.h> +#include <linux/module.h> +#include <linux/vmalloc.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/spinlock.h> +#include <asm/uaccess.h> +#include <linux/smp.h> +#include <net/sock.h> +/* needed for logical [in,out]-dev filtering */ +#include "../br_private.h" + +/* list_named_find */ +#define ASSERT_READ_LOCK(x) +#define ASSERT_WRITE_LOCK(x) +#include <linux/netfilter_ipv4/listhelp.h> + +#if 0 +/* use this for remote debugging + * Copyright (C) 1998 by Ori Pomerantz + * Print the string to the appropriate tty, the one + * the current task uses + */ +static void print_string(char *str) +{ + struct tty_struct *my_tty; + + /* The tty for the current task */ + my_tty = current->signal->tty; + if (my_tty != NULL) { + my_tty->driver->write(my_tty, 0, str, strlen(str)); + my_tty->driver->write(my_tty, 0, "\015\012", 2); + } +} + +#define BUGPRINT(args) print_string(args); +#else +#define BUGPRINT(format, args...) printk("kernel msg: ebtables bug: please "\ + "report to author: "format, ## args) +/* #define BUGPRINT(format, args...) */ +#endif +#define MEMPRINT(format, args...) printk("kernel msg: ebtables "\ + ": out of memory: "format, ## args) +/* #define MEMPRINT(format, args...) */ + + + +/* + * Each cpu has its own set of counters, so there is no need for write_lock in + * the softirq + * For reading or updating the counters, the user context needs to + * get a write_lock + */ + +/* The size of each set of counters is altered to get cache alignment */ +#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) +#define COUNTER_OFFSET(n) (SMP_ALIGN(n * sizeof(struct ebt_counter))) +#define COUNTER_BASE(c, n, cpu) ((struct ebt_counter *)(((char *)c) + \ + COUNTER_OFFSET(n) * cpu)) + + + +static DECLARE_MUTEX(ebt_mutex); +static LIST_HEAD(ebt_tables); +static LIST_HEAD(ebt_targets); +static LIST_HEAD(ebt_matches); +static LIST_HEAD(ebt_watchers); + +static struct ebt_target ebt_standard_target = +{ {NULL, NULL}, EBT_STANDARD_TARGET, NULL, NULL, NULL, NULL}; + +static inline int ebt_do_watcher (struct ebt_entry_watcher *w, + const struct sk_buff *skb, unsigned int hooknr, const struct net_device *in, + const struct net_device *out) +{ + w->u.watcher->watcher(skb, hooknr, in, out, w->data, + w->watcher_size); + /* watchers don't give a verdict */ + return 0; +} + +static inline int ebt_do_match (struct ebt_entry_match *m, + const struct sk_buff *skb, const struct net_device *in, + const struct net_device *out) +{ + return m->u.match->match(skb, in, out, m->data, + m->match_size); +} + +static inline int ebt_dev_check(char *entry, const struct net_device *device) +{ + int i = 0; + char *devname = device->name; + + if (*entry == '\0') + return 0; + if (!device) + return 1; + /* 1 is the wildcard token */ + while (entry[i] != '\0' && entry[i] != 1 && entry[i] == devname[i]) + i++; + return (devname[i] != entry[i] && entry[i] != 1); +} + +#define FWINV2(bool,invflg) ((bool) ^ !!(e->invflags & invflg)) +/* process standard matches */ +static inline int ebt_basic_match(struct ebt_entry *e, struct ethhdr *h, + const struct net_device *in, const struct net_device *out) +{ + int verdict, i; + + if (e->bitmask & EBT_802_3) { + if (FWINV2(ntohs(h->h_proto) >= 1536, EBT_IPROTO)) + return 1; + } else if (!(e->bitmask & EBT_NOPROTO) && + FWINV2(e->ethproto != h->h_proto, EBT_IPROTO)) + return 1; + + if (FWINV2(ebt_dev_check(e->in, in), EBT_IIN)) + return 1; + if (FWINV2(ebt_dev_check(e->out, out), EBT_IOUT)) + return 1; + if ((!in || !in->br_port) ? 0 : FWINV2(ebt_dev_check( + e->logical_in, in->br_port->br->dev), EBT_ILOGICALIN)) + return 1; + if ((!out || !out->br_port) ? 0 : FWINV2(ebt_dev_check( + e->logical_out, out->br_port->br->dev), EBT_ILOGICALOUT)) + return 1; + + if (e->bitmask & EBT_SOURCEMAC) { + verdict = 0; + for (i = 0; i < 6; i++) + verdict |= (h->h_source[i] ^ e->sourcemac[i]) & + e->sourcemsk[i]; + if (FWINV2(verdict != 0, EBT_ISOURCE) ) + return 1; + } + if (e->bitmask & EBT_DESTMAC) { + verdict = 0; + for (i = 0; i < 6; i++) + verdict |= (h->h_dest[i] ^ e->destmac[i]) & + e->destmsk[i]; + if (FWINV2(verdict != 0, EBT_IDEST) ) + return 1; + } + return 0; +} + +/* Do some firewalling */ +unsigned int ebt_do_table (unsigned int hook, struct sk_buff **pskb, + const struct net_device *in, const struct net_device *out, + struct ebt_table *table) +{ + int i, nentries; + struct ebt_entry *point; + struct ebt_counter *counter_base, *cb_base; + struct ebt_entry_target *t; + int verdict, sp = 0; + struct ebt_chainstack *cs; + struct ebt_entries *chaininfo; + char *base; + struct ebt_table_info *private; + + read_lock_bh(&table->lock); + private = table->private; + cb_base = COUNTER_BASE(private->counters, private->nentries, + smp_processor_id()); + if (private->chainstack) + cs = private->chainstack[smp_processor_id()]; + else + cs = NULL; + chaininfo = private->hook_entry[hook]; + nentries = private->hook_entry[hook]->nentries; + point = (struct ebt_entry *)(private->hook_entry[hook]->data); + counter_base = cb_base + private->hook_entry[hook]->counter_offset; + /* base for chain jumps */ + base = private->entries; + i = 0; + while (i < nentries) { + if (ebt_basic_match(point, eth_hdr(*pskb), in, out)) + goto letscontinue; + + if (EBT_MATCH_ITERATE(point, ebt_do_match, *pskb, in, out) != 0) + goto letscontinue; + + /* increase counter */ + (*(counter_base + i)).pcnt++; + (*(counter_base + i)).bcnt+=(**pskb).len; + + /* these should only watch: not modify, nor tell us + what to do with the packet */ + EBT_WATCHER_ITERATE(point, ebt_do_watcher, *pskb, hook, in, + out); + + t = (struct ebt_entry_target *) + (((char *)point) + point->target_offset); + /* standard target */ + if (!t->u.target->target) + verdict = ((struct ebt_standard_target *)t)->verdict; + else + verdict = t->u.target->target(pskb, hook, + in, out, t->data, t->target_size); + if (verdict == EBT_ACCEPT) { + read_unlock_bh(&table->lock); + return NF_ACCEPT; + } + if (verdict == EBT_DROP) { + read_unlock_bh(&table->lock); + return NF_DROP; + } + if (verdict == EBT_RETURN) { +letsreturn: +#ifdef CONFIG_NETFILTER_DEBUG + if (sp == 0) { + BUGPRINT("RETURN on base chain"); + /* act like this is EBT_CONTINUE */ + goto letscontinue; + } +#endif + sp--; + /* put all the local variables right */ + i = cs[sp].n; + chaininfo = cs[sp].chaininfo; + nentries = chaininfo->nentries; + point = cs[sp].e; + counter_base = cb_base + + chaininfo->counter_offset; + continue; + } + if (verdict == EBT_CONTINUE) + goto letscontinue; +#ifdef CONFIG_NETFILTER_DEBUG + if (verdict < 0) { + BUGPRINT("bogus standard verdict\n"); + read_unlock_bh(&table->lock); + return NF_DROP; + } +#endif + /* jump to a udc */ + cs[sp].n = i + 1; + cs[sp].chaininfo = chaininfo; + cs[sp].e = (struct ebt_entry *) + (((char *)point) + point->next_offset); + i = 0; + chaininfo = (struct ebt_entries *) (base + verdict); +#ifdef CONFIG_NETFILTER_DEBUG + if (chaininfo->distinguisher) { + BUGPRINT("jump to non-chain\n"); + read_unlock_bh(&table->lock); + return NF_DROP; + } +#endif + nentries = chaininfo->nentries; + point = (struct ebt_entry *)chaininfo->data; + counter_base = cb_base + chaininfo->counter_offset; + sp++; + continue; +letscontinue: + point = (struct ebt_entry *) + (((char *)point) + point->next_offset); + i++; + } + + /* I actually like this :) */ + if (chaininfo->policy == EBT_RETURN) + goto letsreturn; + if (chaininfo->policy == EBT_ACCEPT) { + read_unlock_bh(&table->lock); + return NF_ACCEPT; + } + read_unlock_bh(&table->lock); + return NF_DROP; +} + +/* If it succeeds, returns element and locks mutex */ +static inline void * +find_inlist_lock_noload(struct list_head *head, const char *name, int *error, + struct semaphore *mutex) +{ + void *ret; + + *error = down_interruptible(mutex); + if (*error != 0) + return NULL; + + ret = list_named_find(head, name); + if (!ret) { + *error = -ENOENT; + up(mutex); + } + return ret; +} + +#ifndef CONFIG_KMOD +#define find_inlist_lock(h,n,p,e,m) find_inlist_lock_noload((h),(n),(e),(m)) +#else +static void * +find_inlist_lock(struct list_head *head, const char *name, const char *prefix, + int *error, struct semaphore *mutex) +{ + void *ret; + + ret = find_inlist_lock_noload(head, name, error, mutex); + if (!ret) { + request_module("%s%s", prefix, name); + ret = find_inlist_lock_noload(head, name, error, mutex); + } + return ret; +} +#endif + +static inline struct ebt_table * +find_table_lock(const char *name, int *error, struct semaphore *mutex) +{ + return find_inlist_lock(&ebt_tables, name, "ebtable_", error, mutex); +} + +static inline struct ebt_match * +find_match_lock(const char *name, int *error, struct semaphore *mutex) +{ + return find_inlist_lock(&ebt_matches, name, "ebt_", error, mutex); +} + +static inline struct ebt_watcher * +find_watcher_lock(const char *name, int *error, struct semaphore *mutex) +{ + return find_inlist_lock(&ebt_watchers, name, "ebt_", error, mutex); +} + +static inline struct ebt_target * +find_target_lock(const char *name, int *error, struct semaphore *mutex) +{ + return find_inlist_lock(&ebt_targets, name, "ebt_", error, mutex); +} + +static inline int +ebt_check_match(struct ebt_entry_match *m, struct ebt_entry *e, + const char *name, unsigned int hookmask, unsigned int *cnt) +{ + struct ebt_match *match; + int ret; + + if (((char *)m) + m->match_size + sizeof(struct ebt_entry_match) > + ((char *)e) + e->watchers_offset) + return -EINVAL; + match = find_match_lock(m->u.name, &ret, &ebt_mutex); + if (!match) + return ret; + m->u.match = match; + if (!try_module_get(match->me)) { + up(&ebt_mutex); + return -ENOENT; + } + up(&ebt_mutex); + if (match->check && + match->check(name, hookmask, e, m->data, m->match_size) != 0) { + BUGPRINT("match->check failed\n"); + module_put(match->me); + return -EINVAL; + } + (*cnt)++; + return 0; +} + +static inline int +ebt_check_watcher(struct ebt_entry_watcher *w, struct ebt_entry *e, + const char *name, unsigned int hookmask, unsigned int *cnt) +{ + struct ebt_watcher *watcher; + int ret; + + if (((char *)w) + w->watcher_size + sizeof(struct ebt_entry_watcher) > + ((char *)e) + e->target_offset) + return -EINVAL; + watcher = find_watcher_lock(w->u.name, &ret, &ebt_mutex); + if (!watcher) + return ret; + w->u.watcher = watcher; + if (!try_module_get(watcher->me)) { + up(&ebt_mutex); + return -ENOENT; + } + up(&ebt_mutex); + if (watcher->check && + watcher->check(name, hookmask, e, w->data, w->watcher_size) != 0) { + BUGPRINT("watcher->check failed\n"); + module_put(watcher->me); + return -EINVAL; + } + (*cnt)++; + return 0; +} + +/* + * this one is very careful, as it is the first function + * to parse the userspace data + */ +static inline int +ebt_check_entry_size_and_hooks(struct ebt_entry *e, + struct ebt_table_info *newinfo, char *base, char *limit, + struct ebt_entries **hook_entries, unsigned int *n, unsigned int *cnt, + unsigned int *totalcnt, unsigned int *udc_cnt, unsigned int valid_hooks) +{ + int i; + + for (i = 0; i < NF_BR_NUMHOOKS; i++) { + if ((valid_hooks & (1 << i)) == 0) + continue; + if ( (char *)hook_entries[i] - base == + (char *)e - newinfo->entries) + break; + } + /* beginning of a new chain + if i == NF_BR_NUMHOOKS it must be a user defined chain */ + if (i != NF_BR_NUMHOOKS || !(e->bitmask & EBT_ENTRY_OR_ENTRIES)) { + if ((e->bitmask & EBT_ENTRY_OR_ENTRIES) != 0) { + /* we make userspace set this right, + so there is no misunderstanding */ + BUGPRINT("EBT_ENTRY_OR_ENTRIES shouldn't be set " + "in distinguisher\n"); + return -EINVAL; + } + /* this checks if the previous chain has as many entries + as it said it has */ + if (*n != *cnt) { + BUGPRINT("nentries does not equal the nr of entries " + "in the chain\n"); + return -EINVAL; + } + /* before we look at the struct, be sure it is not too big */ + if ((char *)hook_entries[i] + sizeof(struct ebt_entries) + > limit) { + BUGPRINT("entries_size too small\n"); + return -EINVAL; + } + if (((struct ebt_entries *)e)->policy != EBT_DROP && + ((struct ebt_entries *)e)->policy != EBT_ACCEPT) { + /* only RETURN from udc */ + if (i != NF_BR_NUMHOOKS || + ((struct ebt_entries *)e)->policy != EBT_RETURN) { + BUGPRINT("bad policy\n"); + return -EINVAL; + } + } + if (i == NF_BR_NUMHOOKS) /* it's a user defined chain */ + (*udc_cnt)++; + else + newinfo->hook_entry[i] = (struct ebt_entries *)e; + if (((struct ebt_entries *)e)->counter_offset != *totalcnt) { + BUGPRINT("counter_offset != totalcnt"); + return -EINVAL; + } + *n = ((struct ebt_entries *)e)->nentries; + *cnt = 0; + return 0; + } + /* a plain old entry, heh */ + if (sizeof(struct ebt_entry) > e->watchers_offset || + e->watchers_offset > e->target_offset || + e->target_offset >= e->next_offset) { + BUGPRINT("entry offsets not in right order\n"); + return -EINVAL; + } + /* this is not checked anywhere else */ + if (e->next_offset - e->target_offset < sizeof(struct ebt_entry_target)) { + BUGPRINT("target size too small\n"); + return -EINVAL; + } + + (*cnt)++; + (*totalcnt)++; + return 0; +} + +struct ebt_cl_stack +{ + struct ebt_chainstack cs; + int from; + unsigned int hookmask; +}; + +/* + * we need these positions to check that the jumps to a different part of the + * entries is a jump to the beginning of a new chain. + */ +static inline int +ebt_get_udc_positions(struct ebt_entry *e, struct ebt_table_info *newinfo, + struct ebt_entries **hook_entries, unsigned int *n, unsigned int valid_hooks, + struct ebt_cl_stack *udc) +{ + int i; + + /* we're only interested in chain starts */ + if (e->bitmask & EBT_ENTRY_OR_ENTRIES) + return 0; + for (i = 0; i < NF_BR_NUMHOOKS; i++) { + if ((valid_hooks & (1 << i)) == 0) + continue; + if (newinfo->hook_entry[i] == (struct ebt_entries *)e) + break; + } + /* only care about udc */ + if (i != NF_BR_NUMHOOKS) + return 0; + + udc[*n].cs.chaininfo = (struct ebt_entries *)e; + /* these initialisations are depended on later in check_chainloops() */ + udc[*n].cs.n = 0; + udc[*n].hookmask = 0; + + (*n)++; + return 0; +} + +static inline int +ebt_cleanup_match(struct ebt_entry_match *m, unsigned int *i) +{ + if (i && (*i)-- == 0) + return 1; + if (m->u.match->destroy) + m->u.match->destroy(m->data, m->match_size); + module_put(m->u.match->me); + + return 0; +} + +static inline int +ebt_cleanup_watcher(struct ebt_entry_watcher *w, unsigned int *i) +{ + if (i && (*i)-- == 0) + return 1; + if (w->u.watcher->destroy) + w->u.watcher->destroy(w->data, w->watcher_size); + module_put(w->u.watcher->me); + + return 0; +} + +static inline int +ebt_cleanup_entry(struct ebt_entry *e, unsigned int *cnt) +{ + struct ebt_entry_target *t; + + if ((e->bitmask & EBT_ENTRY_OR_ENTRIES) == 0) + return 0; + /* we're done */ + if (cnt && (*cnt)-- == 0) + return 1; + EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, NULL); + EBT_MATCH_ITERATE(e, ebt_cleanup_match, NULL); + t = (struct ebt_entry_target *)(((char *)e) + e->target_offset); + if (t->u.target->destroy) + t->u.target->destroy(t->data, t->target_size); + module_put(t->u.target->me); + + return 0; +} + +static inline int +ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo, + const char *name, unsigned int *cnt, unsigned int valid_hooks, + struct ebt_cl_stack *cl_s, unsigned int udc_cnt) +{ + struct ebt_entry_target *t; + struct ebt_target *target; + unsigned int i, j, hook = 0, hookmask = 0; + int ret; + + /* don't mess with the struct ebt_entries */ + if ((e->bitmask & EBT_ENTRY_OR_ENTRIES) == 0) + return 0; + + if (e->bitmask & ~EBT_F_MASK) { + BUGPRINT("Unknown flag for bitmask\n"); + return -EINVAL; + } + if (e->invflags & ~EBT_INV_MASK) { + BUGPRINT("Unknown flag for inv bitmask\n"); + return -EINVAL; + } + if ( (e->bitmask & EBT_NOPROTO) && (e->bitmask & EBT_802_3) ) { + BUGPRINT("NOPROTO & 802_3 not allowed\n"); + return -EINVAL; + } + /* what hook do we belong to? */ + for (i = 0; i < NF_BR_NUMHOOKS; i++) { + if ((valid_hooks & (1 << i)) == 0) + continue; + if ((char *)newinfo->hook_entry[i] < (char *)e) + hook = i; + else + break; + } + /* (1 << NF_BR_NUMHOOKS) tells the check functions the rule is on + a base chain */ + if (i < NF_BR_NUMHOOKS) + hookmask = (1 << hook) | (1 << NF_BR_NUMHOOKS); + else { + for (i = 0; i < udc_cnt; i++) + if ((char *)(cl_s[i].cs.chaininfo) > (char *)e) + break; + if (i == 0) + hookmask = (1 << hook) | (1 << NF_BR_NUMHOOKS); + else + hookmask = cl_s[i - 1].hookmask; + } + i = 0; + ret = EBT_MATCH_ITERATE(e, ebt_check_match, e, name, hookmask, &i); + if (ret != 0) + goto cleanup_matches; + j = 0; + ret = EBT_WATCHER_ITERATE(e, ebt_check_watcher, e, name, hookmask, &j); + if (ret != 0) + goto cleanup_watchers; + t = (struct ebt_entry_target *)(((char *)e) + e->target_offset); + target = find_target_lock(t->u.name, &ret, &ebt_mutex); + if (!target) + goto cleanup_watchers; + if (!try_module_get(target->me)) { + up(&ebt_mutex); + ret = -ENOENT; + goto cleanup_watchers; + } + up(&ebt_mutex); + + t->u.target = target; + if (t->u.target == &ebt_standard_target) { + if (e->target_offset + sizeof(struct ebt_standard_target) > + e->next_offset) { + BUGPRINT("Standard target size too big\n"); + ret = -EFAULT; + goto cleanup_watchers; + } + if (((struct ebt_standard_target *)t)->verdict < + -NUM_STANDARD_TARGETS) { + BUGPRINT("Invalid standard target\n"); + ret = -EFAULT; + goto cleanup_watchers; + } + } else if ((e->target_offset + t->target_size + + sizeof(struct ebt_entry_target) > e->next_offset) || + (t->u.target->check && + t->u.target->check(name, hookmask, e, t->data, t->target_size) != 0)){ + module_put(t->u.target->me); + ret = -EFAULT; + goto cleanup_watchers; + } + (*cnt)++; + return 0; +cleanup_watchers: + EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, &j); +cleanup_matches: + EBT_MATCH_ITERATE(e, ebt_cleanup_match, &i); + return ret; +} + +/* + * checks for loops and sets the hook mask for udc + * the hook mask for udc tells us from which base chains the udc can be + * accessed. This mask is a parameter to the check() functions of the extensions + */ +static int check_chainloops(struct ebt_entries *chain, struct ebt_cl_stack *cl_s, + unsigned int udc_cnt, unsigned int hooknr, char *base) +{ + int i, chain_nr = -1, pos = 0, nentries = chain->nentries, verdict; + struct ebt_entry *e = (struct ebt_entry *)chain->data; + struct ebt_entry_target *t; + + while (pos < nentries || chain_nr != -1) { + /* end of udc, go back one 'recursion' step */ + if (pos == nentries) { + /* put back values of the time when this chain was called */ + e = cl_s[chain_nr].cs.e; + if (cl_s[chain_nr].from != -1) + nentries = + cl_s[cl_s[chain_nr].from].cs.chaininfo->nentries; + else + nentries = chain->nentries; + pos = cl_s[chain_nr].cs.n; + /* make sure we won't see a loop that isn't one */ + cl_s[chain_nr].cs.n = 0; + chain_nr = cl_s[chain_nr].from; + if (pos == nentries) + continue; + } + t = (struct ebt_entry_target *) + (((char *)e) + e->target_offset); + if (strcmp(t->u.name, EBT_STANDARD_TARGET)) + goto letscontinue; + if (e->target_offset + sizeof(struct ebt_standard_target) > + e->next_offset) { + BUGPRINT("Standard target size too big\n"); + return -1; + } + verdict = ((struct ebt_standard_target *)t)->verdict; + if (verdict >= 0) { /* jump to another chain */ + struct ebt_entries *hlp2 = + (struct ebt_entries *)(base + verdict); + for (i = 0; i < udc_cnt; i++) + if (hlp2 == cl_s[i].cs.chaininfo) + break; + /* bad destination or loop */ + if (i == udc_cnt) { + BUGPRINT("bad destination\n"); + return -1; + } + if (cl_s[i].cs.n) { + BUGPRINT("loop\n"); + return -1; + } + /* this can't be 0, so the above test is correct */ + cl_s[i].cs.n = pos + 1; + pos = 0; + cl_s[i].cs.e = ((void *)e + e->next_offset); + e = (struct ebt_entry *)(hlp2->data); + nentries = hlp2->nentries; + cl_s[i].from = chain_nr; + chain_nr = i; + /* this udc is accessible from the base chain for hooknr */ + cl_s[i].hookmask |= (1 << hooknr); + continue; + } +letscontinue: + e = (void *)e + e->next_offset; + pos++; + } + return 0; +} + +/* do the parsing of the table/chains/entries/matches/watchers/targets, heh */ +static int translate_table(struct ebt_replace *repl, + struct ebt_table_info *newinfo) +{ + unsigned int i, j, k, udc_cnt; + int ret; + struct ebt_cl_stack *cl_s = NULL; /* used in the checking for chain loops */ + + i = 0; + while (i < NF_BR_NUMHOOKS && !(repl->valid_hooks & (1 << i))) + i++; + if (i == NF_BR_NUMHOOKS) { + BUGPRINT("No valid hooks specified\n"); + return -EINVAL; + } + if (repl->hook_entry[i] != (struct ebt_entries *)repl->entries) { + BUGPRINT("Chains don't start at beginning\n"); + return -EINVAL; + } + /* make sure chains are ordered after each other in same order + as their corresponding hooks */ + for (j = i + 1; j < NF_BR_NUMHOOKS; j++) { + if (!(repl->valid_hooks & (1 << j))) + continue; + if ( repl->hook_entry[j] <= repl->hook_entry[i] ) { + BUGPRINT("Hook order must be followed\n"); + return -EINVAL; + } + i = j; + } + + for (i = 0; i < NF_BR_NUMHOOKS; i++) + newinfo->hook_entry[i] = NULL; + + newinfo->entries_size = repl->entries_size; + newinfo->nentries = repl->nentries; + + /* do some early checkings and initialize some things */ + i = 0; /* holds the expected nr. of entries for the chain */ + j = 0; /* holds the up to now counted entries for the chain */ + k = 0; /* holds the total nr. of entries, should equal + newinfo->nentries afterwards */ + udc_cnt = 0; /* will hold the nr. of user defined chains (udc) */ + ret = EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size, + ebt_check_entry_size_and_hooks, newinfo, repl->entries, + repl->entries + repl->entries_size, repl->hook_entry, &i, &j, &k, + &udc_cnt, repl->valid_hooks); + + if (ret != 0) + return ret; + + if (i != j) { + BUGPRINT("nentries does not equal the nr of entries in the " + "(last) chain\n"); + return -EINVAL; + } + if (k != newinfo->nentries) { + BUGPRINT("Total nentries is wrong\n"); + return -EINVAL; + } + + /* check if all valid hooks have a chain */ + for (i = 0; i < NF_BR_NUMHOOKS; i++) { + if (newinfo->hook_entry[i] == NULL && + (repl->valid_hooks & (1 << i))) { + BUGPRINT("Valid hook without chain\n"); + return -EINVAL; + } + } + + /* get the location of the udc, put them in an array + while we're at it, allocate the chainstack */ + if (udc_cnt) { + /* this will get free'd in do_replace()/ebt_register_table() + if an error occurs */ + newinfo->chainstack = (struct ebt_chainstack **) + vmalloc(num_possible_cpus() * sizeof(struct ebt_chainstack)); + if (!newinfo->chainstack) + return -ENOMEM; + for (i = 0; i < num_possible_cpus(); i++) { + newinfo->chainstack[i] = + vmalloc(udc_cnt * sizeof(struct ebt_chainstack)); + if (!newinfo->chainstack[i]) { + while (i) + vfree(newinfo->chainstack[--i]); + vfree(newinfo->chainstack); + newinfo->chainstack = NULL; + return -ENOMEM; + } + } + + cl_s = (struct ebt_cl_stack *) + vmalloc(udc_cnt * sizeof(struct ebt_cl_stack)); + if (!cl_s) + return -ENOMEM; + i = 0; /* the i'th udc */ + EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size, + ebt_get_udc_positions, newinfo, repl->hook_entry, &i, + repl->valid_hooks, cl_s); + /* sanity check */ + if (i != udc_cnt) { + BUGPRINT("i != udc_cnt\n"); + vfree(cl_s); + return -EFAULT; + } + } + + /* Check for loops */ + for (i = 0; i < NF_BR_NUMHOOKS; i++) + if (repl->valid_hooks & (1 << i)) + if (check_chainloops(newinfo->hook_entry[i], + cl_s, udc_cnt, i, newinfo->entries)) { + if (cl_s) + vfree(cl_s); + return -EINVAL; + } + + /* we now know the following (along with E=mc): + - the nr of entries in each chain is right + - the size of the allocated space is right + - all valid hooks have a corresponding chain + - there are no loops + - wrong data can still be on the level of a single entry + - could be there are jumps to places that are not the + beginning of a chain. This can only occur in chains that + are not accessible from any base chains, so we don't care. */ + + /* used to know what we need to clean up if something goes wrong */ + i = 0; + ret = EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size, + ebt_check_entry, newinfo, repl->name, &i, repl->valid_hooks, + cl_s, udc_cnt); + if (ret != 0) { + EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size, + ebt_cleanup_entry, &i); + } + if (cl_s) + vfree(cl_s); + return ret; +} + +/* called under write_lock */ +static void get_counters(struct ebt_counter *oldcounters, + struct ebt_counter *counters, unsigned int nentries) +{ + int i, cpu; + struct ebt_counter *counter_base; + + /* counters of cpu 0 */ + memcpy(counters, oldcounters, + sizeof(struct ebt_counter) * nentries); + /* add other counters to those of cpu 0 */ + for (cpu = 1; cpu < num_possible_cpus(); cpu++) { + counter_base = COUNTER_BASE(oldcounters, nentries, cpu); + for (i = 0; i < nentries; i++) { + counters[i].pcnt += counter_base[i].pcnt; + counters[i].bcnt += counter_base[i].bcnt; + } + } +} + +/* replace the table */ +static int do_replace(void __user *user, unsigned int len) +{ + int ret, i, countersize; + struct ebt_table_info *newinfo; + struct ebt_replace tmp; + struct ebt_table *t; + struct ebt_counter *counterstmp = NULL; + /* used to be able to unlock earlier */ + struct ebt_table_info *table; + + if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) + return -EFAULT; + + if (len != sizeof(tmp) + tmp.entries_size) { + BUGPRINT("Wrong len argument\n"); + return -EINVAL; + } + + if (tmp.entries_size == 0) { + BUGPRINT("Entries_size never zero\n"); + return -EINVAL; + } + countersize = COUNTER_OFFSET(tmp.nentries) * num_possible_cpus(); + newinfo = (struct ebt_table_info *) + vmalloc(sizeof(struct ebt_table_info) + countersize); + if (!newinfo) + return -ENOMEM; + + if (countersize) + memset(newinfo->counters, 0, countersize); + + newinfo->entries = (char *)vmalloc(tmp.entries_size); + if (!newinfo->entries) { + ret = -ENOMEM; + goto free_newinfo; + } + if (copy_from_user( + newinfo->entries, tmp.entries, tmp.entries_size) != 0) { + BUGPRINT("Couldn't copy entries from userspace\n"); + ret = -EFAULT; + goto free_entries; + } + + /* the user wants counters back + the check on the size is done later, when we have the lock */ + if (tmp.num_counters) { + counterstmp = (struct ebt_counter *) + vmalloc(tmp.num_counters * sizeof(struct ebt_counter)); + if (!counterstmp) { + ret = -ENOMEM; + goto free_entries; + } + } + else + counterstmp = NULL; + + /* this can get initialized by translate_table() */ + newinfo->chainstack = NULL; + ret = translate_table(&tmp, newinfo); + + if (ret != 0) + goto free_counterstmp; + + t = find_table_lock(tmp.name, &ret, &ebt_mutex); + if (!t) { + ret = -ENOENT; + goto free_iterate; + } + + /* the table doesn't like it */ + if (t->check && (ret = t->check(newinfo, tmp.valid_hooks))) + goto free_unlock; + + if (tmp.num_counters && tmp.num_counters != t->private->nentries) { + BUGPRINT("Wrong nr. of counters requested\n"); + ret = -EINVAL; + goto free_unlock; + } + + /* we have the mutex lock, so no danger in reading this pointer */ + table = t->private; + /* make sure the table can only be rmmod'ed if it contains no rules */ + if (!table->nentries && newinfo->nentries && !try_module_get(t->me)) { + ret = -ENOENT; + goto free_unlock; + } else if (table->nentries && !newinfo->nentries) + module_put(t->me); + /* we need an atomic snapshot of the counters */ + write_lock_bh(&t->lock); + if (tmp.num_counters) + get_counters(t->private->counters, counterstmp, + t->private->nentries); + + t->private = newinfo; + write_unlock_bh(&t->lock); + up(&ebt_mutex); + /* so, a user can change the chains while having messed up her counter + allocation. Only reason why this is done is because this way the lock + is held only once, while this doesn't bring the kernel into a + dangerous state. */ + if (tmp.num_counters && + copy_to_user(tmp.counters, counterstmp, + tmp.num_counters * sizeof(struct ebt_counter))) { + BUGPRINT("Couldn't copy counters to userspace\n"); + ret = -EFAULT; + } + else + ret = 0; + + /* decrease module count and free resources */ + EBT_ENTRY_ITERATE(table->entries, table->entries_size, + ebt_cleanup_entry, NULL); + + vfree(table->entries); + if (table->chainstack) { + for (i = 0; i < num_possible_cpus(); i++) + vfree(table->chainstack[i]); + vfree(table->chainstack); + } + vfree(table); + + if (counterstmp) + vfree(counterstmp); + return ret; + +free_unlock: + up(&ebt_mutex); +free_iterate: + EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size, + ebt_cleanup_entry, NULL); +free_counterstmp: + if (counterstmp) + vfree(counterstmp); + /* can be initialized in translate_table() */ + if (newinfo->chainstack) { + for (i = 0; i < num_possible_cpus(); i++) + vfree(newinfo->chainstack[i]); + vfree(newinfo->chainstack); + } +free_entries: + if (newinfo->entries) + vfree(newinfo->entries); +free_newinfo: + if (newinfo) + vfree(newinfo); + return ret; +} + +int ebt_register_target(struct ebt_target *target) +{ + int ret; + + ret = down_interruptible(&ebt_mutex); + if (ret != 0) + return ret; + if (!list_named_insert(&ebt_targets, target)) { + up(&ebt_mutex); + return -EEXIST; + } + up(&ebt_mutex); + + return 0; +} + +void ebt_unregister_target(struct ebt_target *target) +{ + down(&ebt_mutex); + LIST_DELETE(&ebt_targets, target); + up(&ebt_mutex); +} + +int ebt_register_match(struct ebt_match *match) +{ + int ret; + + ret = down_interruptible(&ebt_mutex); + if (ret != 0) + return ret; + if (!list_named_insert(&ebt_matches, match)) { + up(&ebt_mutex); + return -EEXIST; + } + up(&ebt_mutex); + + return 0; +} + +void ebt_unregister_match(struct ebt_match *match) +{ + down(&ebt_mutex); + LIST_DELETE(&ebt_matches, match); + up(&ebt_mutex); +} + +int ebt_register_watcher(struct ebt_watcher *watcher) +{ + int ret; + + ret = down_interruptible(&ebt_mutex); + if (ret != 0) + return ret; + if (!list_named_insert(&ebt_watchers, watcher)) { + up(&ebt_mutex); + return -EEXIST; + } + up(&ebt_mutex); + + return 0; +} + +void ebt_unregister_watcher(struct ebt_watcher *watcher) +{ + down(&ebt_mutex); + LIST_DELETE(&ebt_watchers, watcher); + up(&ebt_mutex); +} + +int ebt_register_table(struct ebt_table *table) +{ + struct ebt_table_info *newinfo; + int ret, i, countersize; + + if (!table || !table->table ||!table->table->entries || + table->table->entries_size == 0 || + table->table->counters || table->private) { + BUGPRINT("Bad table data for ebt_register_table!!!\n"); + return -EINVAL; + } + + countersize = COUNTER_OFFSET(table->table->nentries) * num_possible_cpus(); + newinfo = (struct ebt_table_info *) + vmalloc(sizeof(struct ebt_table_info) + countersize); + ret = -ENOMEM; + if (!newinfo) + return -ENOMEM; + + newinfo->entries = (char *)vmalloc(table->table->entries_size); + if (!(newinfo->entries)) + goto free_newinfo; + + memcpy(newinfo->entries, table->table->entries, + table->table->entries_size); + + if (countersize) + memset(newinfo->counters, 0, countersize); + + /* fill in newinfo and parse the entries */ + newinfo->chainstack = NULL; + ret = translate_table(table->table, newinfo); + if (ret != 0) { + BUGPRINT("Translate_table failed\n"); + goto free_chainstack; + } + + if (table->check && table->check(newinfo, table->valid_hooks)) { + BUGPRINT("The table doesn't like its own initial data, lol\n"); + return -EINVAL; + } + + table->private = newinfo; + rwlock_init(&table->lock); + ret = down_interruptible(&ebt_mutex); + if (ret != 0) + goto free_chainstack; + + if (list_named_find(&ebt_tables, table->name)) { + ret = -EEXIST; + BUGPRINT("Table name already exists\n"); + goto free_unlock; + } + + /* Hold a reference count if the chains aren't empty */ + if (newinfo->nentries && !try_module_get(table->me)) { + ret = -ENOENT; + goto free_unlock; + } + list_prepend(&ebt_tables, table); + up(&ebt_mutex); + return 0; +free_unlock: + up(&ebt_mutex); +free_chainstack: + if (newinfo->chainstack) { + for (i = 0; i < num_possible_cpus(); i++) + vfree(newinfo->chainstack[i]); + vfree(newinfo->chainstack); + } + vfree(newinfo->entries); +free_newinfo: + vfree(newinfo); + return ret; +} + +void ebt_unregister_table(struct ebt_table *table) +{ + int i; + + if (!table) { + BUGPRINT("Request to unregister NULL table!!!\n"); + return; + } + down(&ebt_mutex); + LIST_DELETE(&ebt_tables, table); + up(&ebt_mutex); + if (table->private->entries) + vfree(table->private->entries); + if (table->private->chainstack) { + for (i = 0; i < num_possible_cpus(); i++) + vfree(table->private->chainstack[i]); + vfree(table->private->chainstack); + } + vfree(table->private); +} + +/* userspace just supplied us with counters */ +static int update_counters(void __user *user, unsigned int len) +{ + int i, ret; + struct ebt_counter *tmp; + struct ebt_replace hlp; + struct ebt_table *t; + + if (copy_from_user(&hlp, user, sizeof(hlp))) + return -EFAULT; + + if (len != sizeof(hlp) + hlp.num_counters * sizeof(struct ebt_counter)) + return -EINVAL; + if (hlp.num_counters == 0) + return -EINVAL; + + if ( !(tmp = (struct ebt_counter *) + vmalloc(hlp.num_counters * sizeof(struct ebt_counter))) ){ + MEMPRINT("Update_counters && nomemory\n"); + return -ENOMEM; + } + + t = find_table_lock(hlp.name, &ret, &ebt_mutex); + if (!t) + goto free_tmp; + + if (hlp.num_counters != t->private->nentries) { + BUGPRINT("Wrong nr of counters\n"); + ret = -EINVAL; + goto unlock_mutex; + } + + if ( copy_from_user(tmp, hlp.counters, + hlp.num_counters * sizeof(struct ebt_counter)) ) { + BUGPRINT("Updata_counters && !cfu\n"); + ret = -EFAULT; + goto unlock_mutex; + } + + /* we want an atomic add of the counters */ + write_lock_bh(&t->lock); + + /* we add to the counters of the first cpu */ + for (i = 0; i < hlp.num_counters; i++) { + t->private->counters[i].pcnt += tmp[i].pcnt; + t->private->counters[i].bcnt += tmp[i].bcnt; + } + + write_unlock_bh(&t->lock); + ret = 0; +unlock_mutex: + up(&ebt_mutex); +free_tmp: + vfree(tmp); + return ret; +} + +static inline int ebt_make_matchname(struct ebt_entry_match *m, + char *base, char *ubase) +{ + char *hlp = ubase - base + (char *)m; + if (copy_to_user(hlp, m->u.match->name, EBT_FUNCTION_MAXNAMELEN)) + return -EFAULT; + return 0; +} + +static inline int ebt_make_watchername(struct ebt_entry_watcher *w, + char *base, char *ubase) +{ + char *hlp = ubase - base + (char *)w; + if (copy_to_user(hlp , w->u.watcher->name, EBT_FUNCTION_MAXNAMELEN)) + return -EFAULT; + return 0; +} + +static inline int ebt_make_names(struct ebt_entry *e, char *base, char *ubase) +{ + int ret; + char *hlp; + struct ebt_entry_target *t; + + if ((e->bitmask & EBT_ENTRY_OR_ENTRIES) == 0) + return 0; + + hlp = ubase - base + (char *)e + e->target_offset; + t = (struct ebt_entry_target *)(((char *)e) + e->target_offset); + + ret = EBT_MATCH_ITERATE(e, ebt_make_matchname, base, ubase); + if (ret != 0) + return ret; + ret = EBT_WATCHER_ITERATE(e, ebt_make_watchername, base, ubase); + if (ret != 0) + return ret; + if (copy_to_user(hlp, t->u.target->name, EBT_FUNCTION_MAXNAMELEN)) + return -EFAULT; + return 0; +} + +/* called with ebt_mutex down */ +static int copy_everything_to_user(struct ebt_table *t, void __user *user, + int *len, int cmd) +{ + struct ebt_replace tmp; + struct ebt_counter *counterstmp, *oldcounters; + unsigned int entries_size, nentries; + char *entries; + + if (cmd == EBT_SO_GET_ENTRIES) { + entries_size = t->private->entries_size; + nentries = t->private->nentries; + entries = t->private->entries; + oldcounters = t->private->counters; + } else { + entries_size = t->table->entries_size; + nentries = t->table->nentries; + entries = t->table->entries; + oldcounters = t->table->counters; + } + + if (copy_from_user(&tmp, user, sizeof(tmp))) { + BUGPRINT("Cfu didn't work\n"); + return -EFAULT; + } + + if (*len != sizeof(struct ebt_replace) + entries_size + + (tmp.num_counters? nentries * sizeof(struct ebt_counter): 0)) { + BUGPRINT("Wrong size\n"); + return -EINVAL; + } + + if (tmp.nentries != nentries) { + BUGPRINT("Nentries wrong\n"); + return -EINVAL; + } + + if (tmp.entries_size != entries_size) { + BUGPRINT("Wrong size\n"); + return -EINVAL; + } + + /* userspace might not need the counters */ + if (tmp.num_counters) { + if (tmp.num_counters != nentries) { + BUGPRINT("Num_counters wrong\n"); + return -EINVAL; + } + counterstmp = (struct ebt_counter *) + vmalloc(nentries * sizeof(struct ebt_counter)); + if (!counterstmp) { + MEMPRINT("Couldn't copy counters, out of memory\n"); + return -ENOMEM; + } + write_lock_bh(&t->lock); + get_counters(oldcounters, counterstmp, nentries); + write_unlock_bh(&t->lock); + + if (copy_to_user(tmp.counters, counterstmp, + nentries * sizeof(struct ebt_counter))) { + BUGPRINT("Couldn't copy counters to userspace\n"); + vfree(counterstmp); + return -EFAULT; + } + vfree(counterstmp); + } + + if (copy_to_user(tmp.entries, entries, entries_size)) { + BUGPRINT("Couldn't copy entries to userspace\n"); + return -EFAULT; + } + /* set the match/watcher/target names right */ + return EBT_ENTRY_ITERATE(entries, entries_size, + ebt_make_names, entries, tmp.entries); +} + +static int do_ebt_set_ctl(struct sock *sk, + int cmd, void __user *user, unsigned int len) +{ + int ret; + + switch(cmd) { + case EBT_SO_SET_ENTRIES: + ret = do_replace(user, len); + break; + case EBT_SO_SET_COUNTERS: + ret = update_counters(user, len); + break; + default: + ret = -EINVAL; + } + return ret; +} + +static int do_ebt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) +{ + int ret; + struct ebt_replace tmp; + struct ebt_table *t; + + if (copy_from_user(&tmp, user, sizeof(tmp))) + return -EFAULT; + + t = find_table_lock(tmp.name, &ret, &ebt_mutex); + if (!t) + return ret; + + switch(cmd) { + case EBT_SO_GET_INFO: + case EBT_SO_GET_INIT_INFO: + if (*len != sizeof(struct ebt_replace)){ + ret = -EINVAL; + up(&ebt_mutex); + break; + } + if (cmd == EBT_SO_GET_INFO) { + tmp.nentries = t->private->nentries; + tmp.entries_size = t->private->entries_size; + tmp.valid_hooks = t->valid_hooks; + } else { + tmp.nentries = t->table->nentries; + tmp.entries_size = t->table->entries_size; + tmp.valid_hooks = t->table->valid_hooks; + } + up(&ebt_mutex); + if (copy_to_user(user, &tmp, *len) != 0){ + BUGPRINT("c2u Didn't work\n"); + ret = -EFAULT; + break; + } + ret = 0; + break; + + case EBT_SO_GET_ENTRIES: + case EBT_SO_GET_INIT_ENTRIES: + ret = copy_everything_to_user(t, user, len, cmd); + up(&ebt_mutex); + break; + + default: + up(&ebt_mutex); + ret = -EINVAL; + } + + return ret; +} + +static struct nf_sockopt_ops ebt_sockopts = +{ { NULL, NULL }, PF_INET, EBT_BASE_CTL, EBT_SO_SET_MAX + 1, do_ebt_set_ctl, + EBT_BASE_CTL, EBT_SO_GET_MAX + 1, do_ebt_get_ctl, 0, NULL +}; + +static int __init init(void) +{ + int ret; + + down(&ebt_mutex); + list_named_insert(&ebt_targets, &ebt_standard_target); + up(&ebt_mutex); + if ((ret = nf_register_sockopt(&ebt_sockopts)) < 0) + return ret; + + printk(KERN_NOTICE "Ebtables v2.0 registered\n"); + return 0; +} + +static void __exit fini(void) +{ + nf_unregister_sockopt(&ebt_sockopts); + printk(KERN_NOTICE "Ebtables v2.0 unregistered\n"); +} + +EXPORT_SYMBOL(ebt_register_table); +EXPORT_SYMBOL(ebt_unregister_table); +EXPORT_SYMBOL(ebt_register_match); +EXPORT_SYMBOL(ebt_unregister_match); +EXPORT_SYMBOL(ebt_register_watcher); +EXPORT_SYMBOL(ebt_unregister_watcher); +EXPORT_SYMBOL(ebt_register_target); +EXPORT_SYMBOL(ebt_unregister_target); +EXPORT_SYMBOL(ebt_do_table); +module_init(init); +module_exit(fini); +MODULE_LICENSE("GPL"); |