diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /net/ipv4/multipath_wrandom.c | |
download | lwn-1da177e4c3f41524e886b7f1b8a0c1fc7321cac2.tar.gz lwn-1da177e4c3f41524e886b7f1b8a0c1fc7321cac2.zip |
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'net/ipv4/multipath_wrandom.c')
-rw-r--r-- | net/ipv4/multipath_wrandom.c | 344 |
1 files changed, 344 insertions, 0 deletions
diff --git a/net/ipv4/multipath_wrandom.c b/net/ipv4/multipath_wrandom.c new file mode 100644 index 000000000000..10b23e1bece6 --- /dev/null +++ b/net/ipv4/multipath_wrandom.c @@ -0,0 +1,344 @@ +/* + * Weighted random policy for multipath. + * + * + * Version: $Id: multipath_wrandom.c,v 1.1.2.3 2004/09/22 07:51:40 elueck Exp $ + * + * Authors: Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/config.h> +#include <asm/system.h> +#include <asm/uaccess.h> +#include <linux/types.h> +#include <linux/sched.h> +#include <linux/errno.h> +#include <linux/timer.h> +#include <linux/mm.h> +#include <linux/kernel.h> +#include <linux/fcntl.h> +#include <linux/stat.h> +#include <linux/socket.h> +#include <linux/in.h> +#include <linux/inet.h> +#include <linux/netdevice.h> +#include <linux/inetdevice.h> +#include <linux/igmp.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/mroute.h> +#include <linux/init.h> +#include <net/ip.h> +#include <net/protocol.h> +#include <linux/skbuff.h> +#include <net/sock.h> +#include <net/icmp.h> +#include <net/udp.h> +#include <net/raw.h> +#include <linux/notifier.h> +#include <linux/if_arp.h> +#include <linux/netfilter_ipv4.h> +#include <net/ipip.h> +#include <net/checksum.h> +#include <net/ip_fib.h> +#include <net/ip_mp_alg.h> + +#define MULTIPATH_STATE_SIZE 15 + +struct multipath_candidate { + struct multipath_candidate *next; + int power; + struct rtable *rt; +}; + +struct multipath_dest { + struct list_head list; + + const struct fib_nh *nh_info; + __u32 netmask; + __u32 network; + unsigned char prefixlen; + + struct rcu_head rcu; +}; + +struct multipath_bucket { + struct list_head head; + spinlock_t lock; +}; + +struct multipath_route { + struct list_head list; + + int oif; + __u32 gw; + struct list_head dests; + + struct rcu_head rcu; +}; + +/* state: primarily weight per route information */ +static struct multipath_bucket state[MULTIPATH_STATE_SIZE]; + +/* interface to random number generation */ +static unsigned int RANDOM_SEED = 93186752; + +static inline unsigned int random(unsigned int ubound) +{ + static unsigned int a = 1588635695, + q = 2, + r = 1117695901; + RANDOM_SEED = a*(RANDOM_SEED % q) - r*(RANDOM_SEED / q); + return RANDOM_SEED % ubound; +} + +static unsigned char __multipath_lookup_weight(const struct flowi *fl, + const struct rtable *rt) +{ + const int state_idx = rt->idev->dev->ifindex % MULTIPATH_STATE_SIZE; + struct multipath_route *r; + struct multipath_route *target_route = NULL; + struct multipath_dest *d; + int weight = 1; + + /* lookup the weight information for a certain route */ + rcu_read_lock(); + + /* find state entry for gateway or add one if necessary */ + list_for_each_entry_rcu(r, &state[state_idx].head, list) { + if (r->gw == rt->rt_gateway && + r->oif == rt->idev->dev->ifindex) { + target_route = r; + break; + } + } + + if (!target_route) { + /* this should not happen... but we are prepared */ + printk( KERN_CRIT"%s: missing state for gateway: %u and " \ + "device %d\n", __FUNCTION__, rt->rt_gateway, + rt->idev->dev->ifindex); + goto out; + } + + /* find state entry for destination */ + list_for_each_entry_rcu(d, &target_route->dests, list) { + __u32 targetnetwork = fl->fl4_dst & + (0xFFFFFFFF >> (32 - d->prefixlen)); + + if ((targetnetwork & d->netmask) == d->network) { + weight = d->nh_info->nh_weight; + goto out; + } + } + +out: + rcu_read_unlock(); + return weight; +} + +static void wrandom_init_state(void) +{ + int i; + + for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) { + INIT_LIST_HEAD(&state[i].head); + spin_lock_init(&state[i].lock); + } +} + +static void wrandom_select_route(const struct flowi *flp, + struct rtable *first, + struct rtable **rp) +{ + struct rtable *rt; + struct rtable *decision; + struct multipath_candidate *first_mpc = NULL; + struct multipath_candidate *mpc, *last_mpc = NULL; + int power = 0; + int last_power; + int selector; + const size_t size_mpc = sizeof(struct multipath_candidate); + + /* collect all candidates and identify their weights */ + for (rt = rcu_dereference(first); rt; + rt = rcu_dereference(rt->u.rt_next)) { + if ((rt->u.dst.flags & DST_BALANCED) != 0 && + multipath_comparekeys(&rt->fl, flp)) { + struct multipath_candidate* mpc = + (struct multipath_candidate*) + kmalloc(size_mpc, GFP_KERNEL); + + if (!mpc) + return; + + power += __multipath_lookup_weight(flp, rt) * 10000; + + mpc->power = power; + mpc->rt = rt; + mpc->next = NULL; + + if (!first_mpc) + first_mpc = mpc; + else + last_mpc->next = mpc; + + last_mpc = mpc; + } + } + + /* choose a weighted random candidate */ + decision = first; + selector = random(power); + last_power = 0; + + /* select candidate, adjust GC data and cleanup local state */ + decision = first; + last_mpc = NULL; + for (mpc = first_mpc; mpc; mpc = mpc->next) { + mpc->rt->u.dst.lastuse = jiffies; + if (last_power <= selector && selector < mpc->power) + decision = mpc->rt; + + last_power = mpc->power; + if (last_mpc) + kfree(last_mpc); + + last_mpc = mpc; + } + + if (last_mpc) { + /* concurrent __multipath_flush may lead to !last_mpc */ + kfree(last_mpc); + } + + decision->u.dst.__use++; + *rp = decision; +} + +static void wrandom_set_nhinfo(__u32 network, + __u32 netmask, + unsigned char prefixlen, + const struct fib_nh *nh) +{ + const int state_idx = nh->nh_oif % MULTIPATH_STATE_SIZE; + struct multipath_route *r, *target_route = NULL; + struct multipath_dest *d, *target_dest = NULL; + + /* store the weight information for a certain route */ + spin_lock(&state[state_idx].lock); + + /* find state entry for gateway or add one if necessary */ + list_for_each_entry_rcu(r, &state[state_idx].head, list) { + if (r->gw == nh->nh_gw && r->oif == nh->nh_oif) { + target_route = r; + break; + } + } + + if (!target_route) { + const size_t size_rt = sizeof(struct multipath_route); + target_route = (struct multipath_route *) + kmalloc(size_rt, GFP_KERNEL); + + target_route->gw = nh->nh_gw; + target_route->oif = nh->nh_oif; + memset(&target_route->rcu, 0, sizeof(struct rcu_head)); + INIT_LIST_HEAD(&target_route->dests); + + list_add_rcu(&target_route->list, &state[state_idx].head); + } + + /* find state entry for destination or add one if necessary */ + list_for_each_entry_rcu(d, &target_route->dests, list) { + if (d->nh_info == nh) { + target_dest = d; + break; + } + } + + if (!target_dest) { + const size_t size_dst = sizeof(struct multipath_dest); + target_dest = (struct multipath_dest*) + kmalloc(size_dst, GFP_KERNEL); + + target_dest->nh_info = nh; + target_dest->network = network; + target_dest->netmask = netmask; + target_dest->prefixlen = prefixlen; + memset(&target_dest->rcu, 0, sizeof(struct rcu_head)); + + list_add_rcu(&target_dest->list, &target_route->dests); + } + /* else: we already stored this info for another destination => + * we are finished + */ + + spin_unlock(&state[state_idx].lock); +} + +static void __multipath_free(struct rcu_head *head) +{ + struct multipath_route *rt = container_of(head, struct multipath_route, + rcu); + kfree(rt); +} + +static void __multipath_free_dst(struct rcu_head *head) +{ + struct multipath_dest *dst = container_of(head, + struct multipath_dest, + rcu); + kfree(dst); +} + +static void wrandom_flush(void) +{ + int i; + + /* defere delete to all entries */ + for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) { + struct multipath_route *r; + + spin_lock(&state[i].lock); + list_for_each_entry_rcu(r, &state[i].head, list) { + struct multipath_dest *d; + list_for_each_entry_rcu(d, &r->dests, list) { + list_del_rcu(&d->list); + call_rcu(&d->rcu, + __multipath_free_dst); + } + list_del_rcu(&r->list); + call_rcu(&r->rcu, + __multipath_free); + } + + spin_unlock(&state[i].lock); + } +} + +static struct ip_mp_alg_ops wrandom_ops = { + .mp_alg_select_route = wrandom_select_route, + .mp_alg_flush = wrandom_flush, + .mp_alg_set_nhinfo = wrandom_set_nhinfo, +}; + +static int __init wrandom_init(void) +{ + wrandom_init_state(); + + return multipath_alg_register(&wrandom_ops, IP_MP_ALG_WRANDOM); +} + +static void __exit wrandom_exit(void) +{ + multipath_alg_unregister(&wrandom_ops, IP_MP_ALG_WRANDOM); +} + +module_init(wrandom_init); +module_exit(wrandom_exit); |