With help from Chris Wedgwood.
Signed-off-by: David S. Miller <davem@davemloft.net>
---------------------------
-What: Multipath cached routing support in ipv4
-When: in 2.6.23
-Why: Code was merged, then submitter immediately disappeared leaving
- us with no maintainer and lots of bugs. The code should not have
- been merged in the first place, and many aspects of it's
- implementation are blocking more critical core networking
- development. It's marked EXPERIMENTAL and no distribution
- enables it because it cause obscure crashes due to unfixable bugs
- (interfaces don't return errors so memory allocation can't be
- handled, calling contexts of these interfaces make handling
- errors impossible too because they get called after we've
- totally commited to creating a route object, for example).
- This problem has existed for years and no forward progress
- has ever been made, and nobody steps up to try and salvage
- this code, so we're going to finally just get rid of it.
-Who: David S. Miller <davem@davemloft.net>
-
----------------------------
-
What: read_dev_chars(), read_conf_data{,_lpm}() (s390 common I/O layer)
When: December 2007
Why: These functions are a leftover from 2.4 times. They have several
header-y += in_route.h
header-y += ioctl.h
header-y += ipmi_msgdefs.h
-header-y += ip_mp_alg.h
header-y += ipsec.h
header-y += ipx.h
header-y += irda.h
+++ /dev/null
-/* ip_mp_alg.h: IPV4 multipath algorithm support, user-visible values.
- *
- * Copyright (C) 2004, 2005 Einar Lueck <elueck@de.ibm.com>
- * Copyright (C) 2005 David S. Miller <davem@davemloft.net>
- */
-
-#ifndef _LINUX_IP_MP_ALG_H
-#define _LINUX_IP_MP_ALG_H
-
-enum ip_mp_alg {
- IP_MP_ALG_NONE,
- IP_MP_ALG_RR,
- IP_MP_ALG_DRR,
- IP_MP_ALG_RANDOM,
- IP_MP_ALG_WRANDOM,
- __IP_MP_ALG_MAX
-};
-
-#define IP_MP_ALG_MAX (__IP_MP_ALG_MAX - 1)
-
-#endif /* _LINUX_IP_MP_ALG_H */
-
RTA_FLOW,
RTA_CACHEINFO,
RTA_SESSION,
- RTA_MP_ALGO,
+ RTA_MP_ALGO, /* no longer used */
RTA_TABLE,
__RTA_MAX
};
#define DST_NOXFRM 2
#define DST_NOPOLICY 4
#define DST_NOHASH 8
-#define DST_BALANCED 0x10
unsigned long expires;
unsigned short header_len; /* more space at head required */
int fc_mx_len;
int fc_mp_len;
u32 fc_flow;
- u32 fc_mp_alg;
u32 fc_nlflags;
struct nl_info fc_nlinfo;
};
int fib_nhs;
#ifdef CONFIG_IP_ROUTE_MULTIPATH
int fib_power;
-#endif
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
- u32 fib_mp_alg;
#endif
struct fib_nh fib_nh[0];
#define fib_dev fib_nh[0].nh_dev
unsigned char nh_sel;
unsigned char type;
unsigned char scope;
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
- __be32 network;
- __be32 netmask;
-#endif
struct fib_info *fi;
#ifdef CONFIG_IP_MULTIPLE_TABLES
struct fib_rule *r;
#define FIB_RES_DEV(res) (FIB_RES_NH(res).nh_dev)
#define FIB_RES_OIF(res) (FIB_RES_NH(res).nh_oif)
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-#define FIB_RES_NETWORK(res) ((res).network)
-#define FIB_RES_NETMASK(res) ((res).netmask)
-#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
-#define FIB_RES_NETWORK(res) (0)
-#define FIB_RES_NETMASK(res) (0)
-#endif /* CONFIG_IP_ROUTE_MULTIPATH_WRANDOM */
-
struct fib_table {
struct hlist_node tb_hlist;
u32 tb_id;
+++ /dev/null
-/* ip_mp_alg.h: IPV4 multipath algorithm support.
- *
- * Copyright (C) 2004, 2005 Einar Lueck <elueck@de.ibm.com>
- * Copyright (C) 2005 David S. Miller <davem@davemloft.net>
- */
-
-#ifndef _NET_IP_MP_ALG_H
-#define _NET_IP_MP_ALG_H
-
-#include <linux/ip_mp_alg.h>
-#include <net/flow.h>
-#include <net/route.h>
-
-struct fib_nh;
-
-struct ip_mp_alg_ops {
- void (*mp_alg_select_route)(const struct flowi *flp,
- struct rtable *rth, struct rtable **rp);
- void (*mp_alg_flush)(void);
- void (*mp_alg_set_nhinfo)(__be32 network, __be32 netmask,
- unsigned char prefixlen,
- const struct fib_nh *nh);
- void (*mp_alg_remove)(struct rtable *rth);
-};
-
-extern int multipath_alg_register(struct ip_mp_alg_ops *, enum ip_mp_alg);
-extern void multipath_alg_unregister(struct ip_mp_alg_ops *, enum ip_mp_alg);
-
-extern struct ip_mp_alg_ops *ip_mp_alg_table[];
-
-static inline int multipath_select_route(const struct flowi *flp,
- struct rtable *rth,
- struct rtable **rp)
-{
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
- struct ip_mp_alg_ops *ops = ip_mp_alg_table[rth->rt_multipath_alg];
-
- /* mp_alg_select_route _MUST_ be implemented */
- if (ops && (rth->u.dst.flags & DST_BALANCED)) {
- ops->mp_alg_select_route(flp, rth, rp);
- return 1;
- }
-#endif
- return 0;
-}
-
-static inline void multipath_flush(void)
-{
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
- int i;
-
- for (i = IP_MP_ALG_NONE; i <= IP_MP_ALG_MAX; i++) {
- struct ip_mp_alg_ops *ops = ip_mp_alg_table[i];
-
- if (ops && ops->mp_alg_flush)
- ops->mp_alg_flush();
- }
-#endif
-}
-
-static inline void multipath_set_nhinfo(struct rtable *rth,
- __be32 network, __be32 netmask,
- unsigned char prefixlen,
- const struct fib_nh *nh)
-{
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
- struct ip_mp_alg_ops *ops = ip_mp_alg_table[rth->rt_multipath_alg];
-
- if (ops && ops->mp_alg_set_nhinfo)
- ops->mp_alg_set_nhinfo(network, netmask, prefixlen, nh);
-#endif
-}
-
-static inline void multipath_remove(struct rtable *rth)
-{
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
- struct ip_mp_alg_ops *ops = ip_mp_alg_table[rth->rt_multipath_alg];
-
- if (ops && ops->mp_alg_remove &&
- (rth->u.dst.flags & DST_BALANCED))
- ops->mp_alg_remove(rth);
-#endif
-}
-
-static inline int multipath_comparekeys(const struct flowi *flp1,
- const struct flowi *flp2)
-{
- return flp1->fl4_dst == flp2->fl4_dst &&
- flp1->fl4_src == flp2->fl4_src &&
- flp1->oif == flp2->oif &&
- flp1->mark == flp2->mark &&
- !((flp1->fl4_tos ^ flp2->fl4_tos) &
- (IPTOS_RT_MASK | RTO_ONLINK));
-}
-
-#endif /* _NET_IP_MP_ALG_H */
unsigned rt_flags;
__u16 rt_type;
- __u16 rt_multipath_alg;
__be32 rt_dst; /* Path destination */
__be32 rt_src; /* Path source */
equal "cost" and chooses one of them in a non-deterministic fashion
if a matching packet arrives.
-config IP_ROUTE_MULTIPATH_CACHED
- bool "IP: equal cost multipath with caching support (EXPERIMENTAL)"
- depends on IP_ROUTE_MULTIPATH
- help
- Normally, equal cost multipath routing is not supported by the
- routing cache. If you say Y here, alternative routes are cached
- and on cache lookup a route is chosen in a configurable fashion.
-
- If unsure, say N.
-
-config IP_ROUTE_MULTIPATH_RR
- tristate "MULTIPATH: round robin algorithm"
- depends on IP_ROUTE_MULTIPATH_CACHED
- help
- Multipath routes are chosen according to Round Robin
-
-config IP_ROUTE_MULTIPATH_RANDOM
- tristate "MULTIPATH: random algorithm"
- depends on IP_ROUTE_MULTIPATH_CACHED
- help
- Multipath routes are chosen in a random fashion. Actually,
- there is no weight for a route. The advantage of this policy
- is that it is implemented stateless and therefore introduces only
- a very small delay.
-
-config IP_ROUTE_MULTIPATH_WRANDOM
- tristate "MULTIPATH: weighted random algorithm"
- depends on IP_ROUTE_MULTIPATH_CACHED
- help
- Multipath routes are chosen in a weighted random fashion.
- The per route weights are the weights visible via ip route 2. As the
- corresponding state management introduces some overhead routing delay
- is increased.
-
-config IP_ROUTE_MULTIPATH_DRR
- tristate "MULTIPATH: interface round robin algorithm"
- depends on IP_ROUTE_MULTIPATH_CACHED
- help
- Connections are distributed in a round robin fashion over the
- available interfaces. This policy makes sense if the connections
- should be primarily distributed on interfaces and not on routes.
-
config IP_ROUTE_VERBOSE
bool "IP: verbose route monitoring"
depends on IP_ADVANCED_ROUTER
obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o
obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o
obj-$(CONFIG_IP_PNP) += ipconfig.o
-obj-$(CONFIG_IP_ROUTE_MULTIPATH_RR) += multipath_rr.o
-obj-$(CONFIG_IP_ROUTE_MULTIPATH_RANDOM) += multipath_random.o
-obj-$(CONFIG_IP_ROUTE_MULTIPATH_WRANDOM) += multipath_wrandom.o
-obj-$(CONFIG_IP_ROUTE_MULTIPATH_DRR) += multipath_drr.o
obj-$(CONFIG_NETFILTER) += netfilter.o netfilter/
obj-$(CONFIG_IP_VS) += ipvs/
obj-$(CONFIG_INET_DIAG) += inet_diag.o
-obj-$(CONFIG_IP_ROUTE_MULTIPATH_CACHED) += multipath.o
obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o
obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o
obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o
[RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
[RTA_PROTOINFO] = { .type = NLA_U32 },
[RTA_FLOW] = { .type = NLA_U32 },
- [RTA_MP_ALGO] = { .type = NLA_U32 },
};
static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
case RTA_FLOW:
cfg->fc_flow = nla_get_u32(attr);
break;
- case RTA_MP_ALGO:
- cfg->fc_mp_alg = nla_get_u32(attr);
- break;
case RTA_TABLE:
cfg->fc_table = nla_get_u32(attr);
break;
#include <net/tcp.h>
#include <net/sock.h>
#include <net/ip_fib.h>
-#include <net/ip_mp_alg.h>
#include <net/netlink.h>
#include <net/nexthop.h>
goto err_inval;
}
#endif
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
- if (cfg->fc_mp_alg) {
- if (cfg->fc_mp_alg < IP_MP_ALG_NONE ||
- cfg->fc_mp_alg > IP_MP_ALG_MAX)
- goto err_inval;
- }
-#endif
err = -ENOBUFS;
if (fib_info_cnt >= fib_hash_size) {
#endif
}
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
- fi->fib_mp_alg = cfg->fc_mp_alg;
-#endif
-
if (fib_props[cfg->fc_type].error) {
if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
goto err_inval;
res->type = fa->fa_type;
res->scope = fa->fa_scope;
res->fi = fa->fa_info;
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
- res->netmask = mask;
- res->network = zone & inet_make_mask(prefixlen);
-#endif
atomic_inc(&res->fi->fib_clntref);
return 0;
}
+++ /dev/null
-/* multipath.c: IPV4 multipath algorithm support.
- *
- * Copyright (C) 2004, 2005 Einar Lueck <elueck@de.ibm.com>
- * Copyright (C) 2005 David S. Miller <davem@davemloft.net>
- */
-
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/netdevice.h>
-#include <linux/spinlock.h>
-
-#include <net/ip_mp_alg.h>
-
-static DEFINE_SPINLOCK(alg_table_lock);
-struct ip_mp_alg_ops *ip_mp_alg_table[IP_MP_ALG_MAX + 1];
-
-int multipath_alg_register(struct ip_mp_alg_ops *ops, enum ip_mp_alg n)
-{
- struct ip_mp_alg_ops **slot;
- int err;
-
- if (n < IP_MP_ALG_NONE || n > IP_MP_ALG_MAX ||
- !ops->mp_alg_select_route)
- return -EINVAL;
-
- spin_lock(&alg_table_lock);
- slot = &ip_mp_alg_table[n];
- if (*slot != NULL) {
- err = -EBUSY;
- } else {
- *slot = ops;
- err = 0;
- }
- spin_unlock(&alg_table_lock);
-
- return err;
-}
-EXPORT_SYMBOL(multipath_alg_register);
-
-void multipath_alg_unregister(struct ip_mp_alg_ops *ops, enum ip_mp_alg n)
-{
- struct ip_mp_alg_ops **slot;
-
- if (n < IP_MP_ALG_NONE || n > IP_MP_ALG_MAX)
- return;
-
- spin_lock(&alg_table_lock);
- slot = &ip_mp_alg_table[n];
- if (*slot == ops)
- *slot = NULL;
- spin_unlock(&alg_table_lock);
-
- synchronize_net();
-}
-EXPORT_SYMBOL(multipath_alg_unregister);
+++ /dev/null
-/*
- * Device round robin policy for multipath.
- *
- *
- * Version: $Id: multipath_drr.c,v 1.1.2.1 2004/09/16 07:42:34 elueck Exp $
- *
- * Authors: Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/timer.h>
-#include <linux/mm.h>
-#include <linux/kernel.h>
-#include <linux/fcntl.h>
-#include <linux/stat.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/igmp.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/module.h>
-#include <linux/mroute.h>
-#include <linux/init.h>
-#include <net/ip.h>
-#include <net/protocol.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/icmp.h>
-#include <net/udp.h>
-#include <net/raw.h>
-#include <linux/notifier.h>
-#include <linux/if_arp.h>
-#include <linux/netfilter_ipv4.h>
-#include <net/ipip.h>
-#include <net/checksum.h>
-#include <net/ip_mp_alg.h>
-
-struct multipath_device {
- int ifi; /* interface index of device */
- atomic_t usecount;
- int allocated;
-};
-
-#define MULTIPATH_MAX_DEVICECANDIDATES 10
-
-static struct multipath_device state[MULTIPATH_MAX_DEVICECANDIDATES];
-static DEFINE_SPINLOCK(state_lock);
-
-static int inline __multipath_findslot(void)
-{
- int i;
-
- for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++) {
- if (state[i].allocated == 0)
- return i;
- }
- return -1;
-}
-
-static int inline __multipath_finddev(int ifindex)
-{
- int i;
-
- for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++) {
- if (state[i].allocated != 0 &&
- state[i].ifi == ifindex)
- return i;
- }
- return -1;
-}
-
-static int drr_dev_event(struct notifier_block *this,
- unsigned long event, void *ptr)
-{
- struct net_device *dev = ptr;
- int devidx;
-
- switch (event) {
- case NETDEV_UNREGISTER:
- case NETDEV_DOWN:
- spin_lock_bh(&state_lock);
-
- devidx = __multipath_finddev(dev->ifindex);
- if (devidx != -1) {
- state[devidx].allocated = 0;
- state[devidx].ifi = 0;
- atomic_set(&state[devidx].usecount, 0);
- }
-
- spin_unlock_bh(&state_lock);
- break;
- }
-
- return NOTIFY_DONE;
-}
-
-static struct notifier_block drr_dev_notifier = {
- .notifier_call = drr_dev_event,
-};
-
-
-static void drr_safe_inc(atomic_t *usecount)
-{
- int n;
-
- atomic_inc(usecount);
-
- n = atomic_read(usecount);
- if (n <= 0) {
- int i;
-
- spin_lock_bh(&state_lock);
-
- for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++)
- atomic_set(&state[i].usecount, 0);
-
- spin_unlock_bh(&state_lock);
- }
-}
-
-static void drr_select_route(const struct flowi *flp,
- struct rtable *first, struct rtable **rp)
-{
- struct rtable *nh, *result, *cur_min;
- int min_usecount = -1;
- int devidx = -1;
- int cur_min_devidx = -1;
-
- /* 1. make sure all alt. nexthops have the same GC related data */
- /* 2. determine the new candidate to be returned */
- result = NULL;
- cur_min = NULL;
- for (nh = rcu_dereference(first); nh;
- nh = rcu_dereference(nh->u.dst.rt_next)) {
- if ((nh->u.dst.flags & DST_BALANCED) != 0 &&
- multipath_comparekeys(&nh->fl, flp)) {
- int nh_ifidx = nh->u.dst.dev->ifindex;
-
- nh->u.dst.lastuse = jiffies;
- nh->u.dst.__use++;
- if (result != NULL)
- continue;
-
- /* search for the output interface */
-
- /* this is not SMP safe, only add/remove are
- * SMP safe as wrong usecount updates have no big
- * impact
- */
- devidx = __multipath_finddev(nh_ifidx);
- if (devidx == -1) {
- /* add the interface to the array
- * SMP safe
- */
- spin_lock_bh(&state_lock);
-
- /* due to SMP: search again */
- devidx = __multipath_finddev(nh_ifidx);
- if (devidx == -1) {
- /* add entry for device */
- devidx = __multipath_findslot();
- if (devidx == -1) {
- /* unlikely but possible */
- continue;
- }
-
- state[devidx].allocated = 1;
- state[devidx].ifi = nh_ifidx;
- atomic_set(&state[devidx].usecount, 0);
- min_usecount = 0;
- }
-
- spin_unlock_bh(&state_lock);
- }
-
- if (min_usecount == 0) {
- /* if the device has not been used it is
- * the primary target
- */
- drr_safe_inc(&state[devidx].usecount);
- result = nh;
- } else {
- int count =
- atomic_read(&state[devidx].usecount);
-
- if (min_usecount == -1 ||
- count < min_usecount) {
- cur_min = nh;
- cur_min_devidx = devidx;
- min_usecount = count;
- }
- }
- }
- }
-
- if (!result) {
- if (cur_min) {
- drr_safe_inc(&state[cur_min_devidx].usecount);
- result = cur_min;
- } else {
- result = first;
- }
- }
-
- *rp = result;
-}
-
-static struct ip_mp_alg_ops drr_ops = {
- .mp_alg_select_route = drr_select_route,
-};
-
-static int __init drr_init(void)
-{
- int err = register_netdevice_notifier(&drr_dev_notifier);
-
- if (err)
- return err;
-
- err = multipath_alg_register(&drr_ops, IP_MP_ALG_DRR);
- if (err)
- goto fail;
-
- return 0;
-
-fail:
- unregister_netdevice_notifier(&drr_dev_notifier);
- return err;
-}
-
-static void __exit drr_exit(void)
-{
- unregister_netdevice_notifier(&drr_dev_notifier);
- multipath_alg_unregister(&drr_ops, IP_MP_ALG_DRR);
-}
-
-module_init(drr_init);
-module_exit(drr_exit);
-MODULE_LICENSE("GPL");
+++ /dev/null
-/*
- * Random policy for multipath.
- *
- *
- * Version: $Id: multipath_random.c,v 1.1.2.3 2004/09/21 08:42:11 elueck Exp $
- *
- * Authors: Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/timer.h>
-#include <linux/mm.h>
-#include <linux/kernel.h>
-#include <linux/fcntl.h>
-#include <linux/stat.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/igmp.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/module.h>
-#include <linux/mroute.h>
-#include <linux/init.h>
-#include <linux/random.h>
-#include <net/ip.h>
-#include <net/protocol.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/icmp.h>
-#include <net/udp.h>
-#include <net/raw.h>
-#include <linux/notifier.h>
-#include <linux/if_arp.h>
-#include <linux/netfilter_ipv4.h>
-#include <net/ipip.h>
-#include <net/checksum.h>
-#include <net/ip_mp_alg.h>
-
-#define MULTIPATH_MAX_CANDIDATES 40
-
-static void random_select_route(const struct flowi *flp,
- struct rtable *first,
- struct rtable **rp)
-{
- struct rtable *rt;
- struct rtable *decision;
- unsigned char candidate_count = 0;
-
- /* count all candidate */
- for (rt = rcu_dereference(first); rt;
- rt = rcu_dereference(rt->u.dst.rt_next)) {
- if ((rt->u.dst.flags & DST_BALANCED) != 0 &&
- multipath_comparekeys(&rt->fl, flp))
- ++candidate_count;
- }
-
- /* choose a random candidate */
- decision = first;
- if (candidate_count > 1) {
- unsigned char i = 0;
- unsigned char candidate_no = (unsigned char)
- (random32() % candidate_count);
-
- /* find chosen candidate and adjust GC data for all candidates
- * to ensure they stay in cache
- */
- for (rt = first; rt; rt = rt->u.dst.rt_next) {
- if ((rt->u.dst.flags & DST_BALANCED) != 0 &&
- multipath_comparekeys(&rt->fl, flp)) {
- rt->u.dst.lastuse = jiffies;
-
- if (i == candidate_no)
- decision = rt;
-
- if (i >= candidate_count)
- break;
-
- i++;
- }
- }
- }
-
- decision->u.dst.__use++;
- *rp = decision;
-}
-
-static struct ip_mp_alg_ops random_ops = {
- .mp_alg_select_route = random_select_route,
-};
-
-static int __init random_init(void)
-{
- return multipath_alg_register(&random_ops, IP_MP_ALG_RANDOM);
-}
-
-static void __exit random_exit(void)
-{
- multipath_alg_unregister(&random_ops, IP_MP_ALG_RANDOM);
-}
-
-module_init(random_init);
-module_exit(random_exit);
-MODULE_LICENSE("GPL");
+++ /dev/null
-/*
- * Round robin policy for multipath.
- *
- *
- * Version: $Id: multipath_rr.c,v 1.1.2.2 2004/09/16 07:42:34 elueck Exp $
- *
- * Authors: Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/timer.h>
-#include <linux/mm.h>
-#include <linux/kernel.h>
-#include <linux/fcntl.h>
-#include <linux/stat.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/igmp.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/module.h>
-#include <linux/mroute.h>
-#include <linux/init.h>
-#include <net/ip.h>
-#include <net/protocol.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/icmp.h>
-#include <net/udp.h>
-#include <net/raw.h>
-#include <linux/notifier.h>
-#include <linux/if_arp.h>
-#include <linux/netfilter_ipv4.h>
-#include <net/ipip.h>
-#include <net/checksum.h>
-#include <net/ip_mp_alg.h>
-
-static void rr_select_route(const struct flowi *flp,
- struct rtable *first, struct rtable **rp)
-{
- struct rtable *nh, *result, *min_use_cand = NULL;
- int min_use = -1;
-
- /* 1. make sure all alt. nexthops have the same GC related data
- * 2. determine the new candidate to be returned
- */
- result = NULL;
- for (nh = rcu_dereference(first); nh;
- nh = rcu_dereference(nh->u.dst.rt_next)) {
- if ((nh->u.dst.flags & DST_BALANCED) != 0 &&
- multipath_comparekeys(&nh->fl, flp)) {
- nh->u.dst.lastuse = jiffies;
-
- if (min_use == -1 || nh->u.dst.__use < min_use) {
- min_use = nh->u.dst.__use;
- min_use_cand = nh;
- }
- }
- }
- result = min_use_cand;
- if (!result)
- result = first;
-
- result->u.dst.__use++;
- *rp = result;
-}
-
-static struct ip_mp_alg_ops rr_ops = {
- .mp_alg_select_route = rr_select_route,
-};
-
-static int __init rr_init(void)
-{
- return multipath_alg_register(&rr_ops, IP_MP_ALG_RR);
-}
-
-static void __exit rr_exit(void)
-{
- multipath_alg_unregister(&rr_ops, IP_MP_ALG_RR);
-}
-
-module_init(rr_init);
-module_exit(rr_exit);
-MODULE_LICENSE("GPL");
+++ /dev/null
-/*
- * Weighted random policy for multipath.
- *
- *
- * Version: $Id: multipath_wrandom.c,v 1.1.2.3 2004/09/22 07:51:40 elueck Exp $
- *
- * Authors: Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/timer.h>
-#include <linux/mm.h>
-#include <linux/kernel.h>
-#include <linux/fcntl.h>
-#include <linux/stat.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/igmp.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/module.h>
-#include <linux/mroute.h>
-#include <linux/init.h>
-#include <linux/random.h>
-#include <net/ip.h>
-#include <net/protocol.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/icmp.h>
-#include <net/udp.h>
-#include <net/raw.h>
-#include <linux/notifier.h>
-#include <linux/if_arp.h>
-#include <linux/netfilter_ipv4.h>
-#include <net/ipip.h>
-#include <net/checksum.h>
-#include <net/ip_fib.h>
-#include <net/ip_mp_alg.h>
-
-#define MULTIPATH_STATE_SIZE 15
-
-struct multipath_candidate {
- struct multipath_candidate *next;
- int power;
- struct rtable *rt;
-};
-
-struct multipath_dest {
- struct list_head list;
-
- const struct fib_nh *nh_info;
- __be32 netmask;
- __be32 network;
- unsigned char prefixlen;
-
- struct rcu_head rcu;
-};
-
-struct multipath_bucket {
- struct list_head head;
- spinlock_t lock;
-};
-
-struct multipath_route {
- struct list_head list;
-
- int oif;
- __be32 gw;
- struct list_head dests;
-
- struct rcu_head rcu;
-};
-
-/* state: primarily weight per route information */
-static struct multipath_bucket state[MULTIPATH_STATE_SIZE];
-
-static unsigned char __multipath_lookup_weight(const struct flowi *fl,
- const struct rtable *rt)
-{
- const int state_idx = rt->idev->dev->ifindex % MULTIPATH_STATE_SIZE;
- struct multipath_route *r;
- struct multipath_route *target_route = NULL;
- struct multipath_dest *d;
- int weight = 1;
-
- /* lookup the weight information for a certain route */
- rcu_read_lock();
-
- /* find state entry for gateway or add one if necessary */
- list_for_each_entry_rcu(r, &state[state_idx].head, list) {
- if (r->gw == rt->rt_gateway &&
- r->oif == rt->idev->dev->ifindex) {
- target_route = r;
- break;
- }
- }
-
- if (!target_route) {
- /* this should not happen... but we are prepared */
- printk( KERN_CRIT"%s: missing state for gateway: %u and " \
- "device %d\n", __FUNCTION__, rt->rt_gateway,
- rt->idev->dev->ifindex);
- goto out;
- }
-
- /* find state entry for destination */
- list_for_each_entry_rcu(d, &target_route->dests, list) {
- __be32 targetnetwork = fl->fl4_dst &
- inet_make_mask(d->prefixlen);
-
- if ((targetnetwork & d->netmask) == d->network) {
- weight = d->nh_info->nh_weight;
- goto out;
- }
- }
-
-out:
- rcu_read_unlock();
- return weight;
-}
-
-static void wrandom_init_state(void)
-{
- int i;
-
- for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) {
- INIT_LIST_HEAD(&state[i].head);
- spin_lock_init(&state[i].lock);
- }
-}
-
-static void wrandom_select_route(const struct flowi *flp,
- struct rtable *first,
- struct rtable **rp)
-{
- struct rtable *rt;
- struct rtable *decision;
- struct multipath_candidate *first_mpc = NULL;
- struct multipath_candidate *mpc, *last_mpc = NULL;
- int power = 0;
- int last_power;
- int selector;
- const size_t size_mpc = sizeof(struct multipath_candidate);
-
- /* collect all candidates and identify their weights */
- for (rt = rcu_dereference(first); rt;
- rt = rcu_dereference(rt->u.dst.rt_next)) {
- if ((rt->u.dst.flags & DST_BALANCED) != 0 &&
- multipath_comparekeys(&rt->fl, flp)) {
- struct multipath_candidate* mpc =
- (struct multipath_candidate*)
- kmalloc(size_mpc, GFP_ATOMIC);
-
- if (!mpc)
- return;
-
- power += __multipath_lookup_weight(flp, rt) * 10000;
-
- mpc->power = power;
- mpc->rt = rt;
- mpc->next = NULL;
-
- if (!first_mpc)
- first_mpc = mpc;
- else
- last_mpc->next = mpc;
-
- last_mpc = mpc;
- }
- }
-
- /* choose a weighted random candidate */
- decision = first;
- selector = random32() % power;
- last_power = 0;
-
- /* select candidate, adjust GC data and cleanup local state */
- decision = first;
- last_mpc = NULL;
- for (mpc = first_mpc; mpc; mpc = mpc->next) {
- mpc->rt->u.dst.lastuse = jiffies;
- if (last_power <= selector && selector < mpc->power)
- decision = mpc->rt;
-
- last_power = mpc->power;
- kfree(last_mpc);
- last_mpc = mpc;
- }
-
- /* concurrent __multipath_flush may lead to !last_mpc */
- kfree(last_mpc);
-
- decision->u.dst.__use++;
- *rp = decision;
-}
-
-static void wrandom_set_nhinfo(__be32 network,
- __be32 netmask,
- unsigned char prefixlen,
- const struct fib_nh *nh)
-{
- const int state_idx = nh->nh_oif % MULTIPATH_STATE_SIZE;
- struct multipath_route *r, *target_route = NULL;
- struct multipath_dest *d, *target_dest = NULL;
-
- /* store the weight information for a certain route */
- spin_lock_bh(&state[state_idx].lock);
-
- /* find state entry for gateway or add one if necessary */
- list_for_each_entry_rcu(r, &state[state_idx].head, list) {
- if (r->gw == nh->nh_gw && r->oif == nh->nh_oif) {
- target_route = r;
- break;
- }
- }
-
- if (!target_route) {
- const size_t size_rt = sizeof(struct multipath_route);
- target_route = (struct multipath_route *)
- kmalloc(size_rt, GFP_ATOMIC);
-
- target_route->gw = nh->nh_gw;
- target_route->oif = nh->nh_oif;
- memset(&target_route->rcu, 0, sizeof(struct rcu_head));
- INIT_LIST_HEAD(&target_route->dests);
-
- list_add_rcu(&target_route->list, &state[state_idx].head);
- }
-
- /* find state entry for destination or add one if necessary */
- list_for_each_entry_rcu(d, &target_route->dests, list) {
- if (d->nh_info == nh) {
- target_dest = d;
- break;
- }
- }
-
- if (!target_dest) {
- const size_t size_dst = sizeof(struct multipath_dest);
- target_dest = (struct multipath_dest*)
- kmalloc(size_dst, GFP_ATOMIC);
-
- target_dest->nh_info = nh;
- target_dest->network = network;
- target_dest->netmask = netmask;
- target_dest->prefixlen = prefixlen;
- memset(&target_dest->rcu, 0, sizeof(struct rcu_head));
-
- list_add_rcu(&target_dest->list, &target_route->dests);
- }
- /* else: we already stored this info for another destination =>
- * we are finished
- */
-
- spin_unlock_bh(&state[state_idx].lock);
-}
-
-static void __multipath_free(struct rcu_head *head)
-{
- struct multipath_route *rt = container_of(head, struct multipath_route,
- rcu);
- kfree(rt);
-}
-
-static void __multipath_free_dst(struct rcu_head *head)
-{
- struct multipath_dest *dst = container_of(head,
- struct multipath_dest,
- rcu);
- kfree(dst);
-}
-
-static void wrandom_flush(void)
-{
- int i;
-
- /* defere delete to all entries */
- for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) {
- struct multipath_route *r;
-
- spin_lock_bh(&state[i].lock);
- list_for_each_entry_rcu(r, &state[i].head, list) {
- struct multipath_dest *d;
- list_for_each_entry_rcu(d, &r->dests, list) {
- list_del_rcu(&d->list);
- call_rcu(&d->rcu,
- __multipath_free_dst);
- }
- list_del_rcu(&r->list);
- call_rcu(&r->rcu,
- __multipath_free);
- }
-
- spin_unlock_bh(&state[i].lock);
- }
-}
-
-static struct ip_mp_alg_ops wrandom_ops = {
- .mp_alg_select_route = wrandom_select_route,
- .mp_alg_flush = wrandom_flush,
- .mp_alg_set_nhinfo = wrandom_set_nhinfo,
-};
-
-static int __init wrandom_init(void)
-{
- wrandom_init_state();
-
- return multipath_alg_register(&wrandom_ops, IP_MP_ALG_WRANDOM);
-}
-
-static void __exit wrandom_exit(void)
-{
- multipath_alg_unregister(&wrandom_ops, IP_MP_ALG_WRANDOM);
-}
-
-module_init(wrandom_init);
-module_exit(wrandom_exit);
-MODULE_LICENSE("GPL");
#include <net/tcp.h>
#include <net/icmp.h>
#include <net/xfrm.h>
-#include <net/ip_mp_alg.h>
#include <net/netevent.h>
#include <net/rtnetlink.h>
#ifdef CONFIG_SYSCTL
static __inline__ void rt_free(struct rtable *rt)
{
- multipath_remove(rt);
call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
}
static __inline__ void rt_drop(struct rtable *rt)
{
- multipath_remove(rt);
ip_rt_put(rt);
call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
}
(fl1->iif ^ fl2->iif)) == 0;
}
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-static struct rtable **rt_remove_balanced_route(struct rtable **chain_head,
- struct rtable *expentry,
- int *removed_count)
-{
- int passedexpired = 0;
- struct rtable **nextstep = NULL;
- struct rtable **rthp = chain_head;
- struct rtable *rth;
-
- if (removed_count)
- *removed_count = 0;
-
- while ((rth = *rthp) != NULL) {
- if (rth == expentry)
- passedexpired = 1;
-
- if (((*rthp)->u.dst.flags & DST_BALANCED) != 0 &&
- compare_keys(&(*rthp)->fl, &expentry->fl)) {
- if (*rthp == expentry) {
- *rthp = rth->u.dst.rt_next;
- continue;
- } else {
- *rthp = rth->u.dst.rt_next;
- rt_free(rth);
- if (removed_count)
- ++(*removed_count);
- }
- } else {
- if (!((*rthp)->u.dst.flags & DST_BALANCED) &&
- passedexpired && !nextstep)
- nextstep = &rth->u.dst.rt_next;
-
- rthp = &rth->u.dst.rt_next;
- }
- }
-
- rt_free(expentry);
- if (removed_count)
- ++(*removed_count);
-
- return nextstep;
-}
-#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
-
-
/* This runs via a timer and thus is always in BH context. */
static void rt_check_expire(unsigned long dummy)
{
}
/* Cleanup aged off entries. */
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
- /* remove all related balanced entries if necessary */
- if (rth->u.dst.flags & DST_BALANCED) {
- rthp = rt_remove_balanced_route(
- &rt_hash_table[i].chain,
- rth, NULL);
- if (!rthp)
- break;
- } else {
- *rthp = rth->u.dst.rt_next;
- rt_free(rth);
- }
-#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
*rthp = rth->u.dst.rt_next;
rt_free(rth);
-#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
}
spin_unlock(rt_hash_lock_addr(i));
if (delay < 0)
delay = ip_rt_min_delay;
- /* flush existing multipath state*/
- multipath_flush();
-
spin_lock_bh(&rt_flush_lock);
if (del_timer(&rt_flush_timer) && delay > 0 && rt_deadline) {
rthp = &rth->u.dst.rt_next;
continue;
}
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
- /* remove all related balanced entries
- * if necessary
- */
- if (rth->u.dst.flags & DST_BALANCED) {
- int r;
-
- rthp = rt_remove_balanced_route(
- &rt_hash_table[k].chain,
- rth,
- &r);
- goal -= r;
- if (!rthp)
- break;
- } else {
- *rthp = rth->u.dst.rt_next;
- rt_free(rth);
- goal--;
- }
-#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
*rthp = rth->u.dst.rt_next;
rt_free(rth);
goal--;
-#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
}
spin_unlock_bh(rt_hash_lock_addr(k));
if (goal <= 0)
spin_lock_bh(rt_hash_lock_addr(hash));
while ((rth = *rthp) != NULL) {
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
- if (!(rth->u.dst.flags & DST_BALANCED) &&
- compare_keys(&rth->fl, &rt->fl)) {
-#else
if (compare_keys(&rth->fl, &rt->fl)) {
-#endif
/* Put it first */
*rthp = rth->u.dst.rt_next;
/*
atomic_set(&rth->u.dst.__refcnt, 1);
rth->u.dst.flags= DST_HOST;
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
- if (res->fi->fib_nhs > 1)
- rth->u.dst.flags |= DST_BALANCED;
-#endif
if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
rth->u.dst.flags |= DST_NOPOLICY;
if (IN_DEV_CONF_GET(out_dev, NOXFRM))
return err;
}
-static inline int ip_mkroute_input_def(struct sk_buff *skb,
- struct fib_result* res,
- const struct flowi *fl,
- struct in_device *in_dev,
- __be32 daddr, __be32 saddr, u32 tos)
+static inline int ip_mkroute_input(struct sk_buff *skb,
+ struct fib_result* res,
+ const struct flowi *fl,
+ struct in_device *in_dev,
+ __be32 daddr, __be32 saddr, u32 tos)
{
struct rtable* rth = NULL;
int err;
return rt_intern_hash(hash, rth, (struct rtable**)&skb->dst);
}
-static inline int ip_mkroute_input(struct sk_buff *skb,
- struct fib_result* res,
- const struct flowi *fl,
- struct in_device *in_dev,
- __be32 daddr, __be32 saddr, u32 tos)
-{
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
- struct rtable* rth = NULL, *rtres;
- unsigned char hop, hopcount;
- int err = -EINVAL;
- unsigned int hash;
-
- if (res->fi)
- hopcount = res->fi->fib_nhs;
- else
- hopcount = 1;
-
- /* distinguish between multipath and singlepath */
- if (hopcount < 2)
- return ip_mkroute_input_def(skb, res, fl, in_dev, daddr,
- saddr, tos);
-
- /* add all alternatives to the routing cache */
- for (hop = 0; hop < hopcount; hop++) {
- res->nh_sel = hop;
-
- /* put reference to previous result */
- if (hop)
- ip_rt_put(rtres);
-
- /* create a routing cache entry */
- err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos,
- &rth);
- if (err)
- return err;
-
- /* put it into the cache */
- hash = rt_hash(daddr, saddr, fl->iif);
- err = rt_intern_hash(hash, rth, &rtres);
- if (err)
- return err;
-
- /* forward hop information to multipath impl. */
- multipath_set_nhinfo(rth,
- FIB_RES_NETWORK(*res),
- FIB_RES_NETMASK(*res),
- res->prefixlen,
- &FIB_RES_NH(*res));
- }
- skb->dst = &rtres->u.dst;
- return err;
-#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
- return ip_mkroute_input_def(skb, res, fl, in_dev, daddr, saddr, tos);
-#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
-}
-
-
/*
* NOTE. We drop all the packets that has local source
* addresses, because every properly looped back packet
atomic_set(&rth->u.dst.__refcnt, 1);
rth->u.dst.flags= DST_HOST;
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
- if (res->fi) {
- rth->rt_multipath_alg = res->fi->fib_mp_alg;
- if (res->fi->fib_nhs > 1)
- rth->u.dst.flags |= DST_BALANCED;
- }
-#endif
if (IN_DEV_CONF_GET(in_dev, NOXFRM))
rth->u.dst.flags |= DST_NOXFRM;
if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
return err;
}
-static inline int ip_mkroute_output_def(struct rtable **rp,
- struct fib_result* res,
- const struct flowi *fl,
- const struct flowi *oldflp,
- struct net_device *dev_out,
- unsigned flags)
+static inline int ip_mkroute_output(struct rtable **rp,
+ struct fib_result* res,
+ const struct flowi *fl,
+ const struct flowi *oldflp,
+ struct net_device *dev_out,
+ unsigned flags)
{
struct rtable *rth = NULL;
int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags);
return err;
}
-static inline int ip_mkroute_output(struct rtable** rp,
- struct fib_result* res,
- const struct flowi *fl,
- const struct flowi *oldflp,
- struct net_device *dev_out,
- unsigned flags)
-{
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
- unsigned char hop;
- unsigned hash;
- int err = -EINVAL;
- struct rtable *rth = NULL;
-
- if (res->fi && res->fi->fib_nhs > 1) {
- unsigned char hopcount = res->fi->fib_nhs;
-
- for (hop = 0; hop < hopcount; hop++) {
- struct net_device *dev2nexthop;
-
- res->nh_sel = hop;
-
- /* hold a work reference to the output device */
- dev2nexthop = FIB_RES_DEV(*res);
- dev_hold(dev2nexthop);
-
- /* put reference to previous result */
- if (hop)
- ip_rt_put(*rp);
-
- err = __mkroute_output(&rth, res, fl, oldflp,
- dev2nexthop, flags);
-
- if (err != 0)
- goto cleanup;
-
- hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src,
- oldflp->oif);
- err = rt_intern_hash(hash, rth, rp);
-
- /* forward hop information to multipath impl. */
- multipath_set_nhinfo(rth,
- FIB_RES_NETWORK(*res),
- FIB_RES_NETMASK(*res),
- res->prefixlen,
- &FIB_RES_NH(*res));
- cleanup:
- /* release work reference to output device */
- dev_put(dev2nexthop);
-
- if (err != 0)
- return err;
- }
- return err;
- } else {
- return ip_mkroute_output_def(rp, res, fl, oldflp, dev_out,
- flags);
- }
-#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
- return ip_mkroute_output_def(rp, res, fl, oldflp, dev_out, flags);
-#endif
-}
-
/*
* Major route resolver routine.
*/
rth->fl.mark == flp->mark &&
!((rth->fl.fl4_tos ^ flp->fl4_tos) &
(IPTOS_RT_MASK | RTO_ONLINK))) {
-
- /* check for multipath routes and choose one if
- * necessary
- */
- if (multipath_select_route(flp, rth, rp)) {
- dst_hold(&(*rp)->u.dst);
- RT_CACHE_STAT_INC(out_hit);
- rcu_read_unlock_bh();
- return 0;
- }
-
rth->u.dst.lastuse = jiffies;
dst_hold(&rth->u.dst);
rth->u.dst.__use++;
#ifdef CONFIG_NET_CLS_ROUTE
if (rt->u.dst.tclassid)
NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid);
-#endif
-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
- if (rt->rt_multipath_alg != IP_MP_ALG_NONE)
- NLA_PUT_U32(skb, RTA_MP_ALGO, rt->rt_multipath_alg);
#endif
if (rt->fl.iif)
NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst);