ipv6: Add support for non-equal-cost multipath
authorIdo Schimmel <idosch@mellanox.com>
Tue, 9 Jan 2018 14:40:28 +0000 (16:40 +0200)
committerDavid S. Miller <davem@davemloft.net>
Wed, 10 Jan 2018 20:14:44 +0000 (15:14 -0500)
The use of hash-threshold instead of modulo-N makes it trivial to add
support for non-equal-cost multipath.

Instead of dividing the multipath hash function's output space equally
between the nexthops, each nexthop is assigned a region size which is
proportional to its weight.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Acked-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/net/ip6_fib.h
net/ipv6/route.c

index 97cd05d87780707a6f33b65e42241da47464215a..34ec321d6a03baaf1efdfe591a5ca1f27b1d276c 100644 (file)
@@ -171,6 +171,7 @@ struct rt6_info {
        u32                             rt6i_metric;
        u32                             rt6i_pmtu;
        /* more non-fragment space at head required */
+       int                             rt6i_nh_weight;
        unsigned short                  rt6i_nfheader_len;
        u8                              rt6i_protocol;
        u8                              exception_bucket_flushed:1,
index 7837b8c754a303e467f5be40a10b9e090146fdc0..1076ae0ea9d57594bdac8a871570c53efb2afbdb 100644 (file)
@@ -2594,6 +2594,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 #endif
 
        rt->rt6i_metric = cfg->fc_metric;
+       rt->rt6i_nh_weight = 1;
 
        /* We cannot add true routes via loopback here,
           they would result in kernel looping; promote them to reject routes
@@ -3507,11 +3508,11 @@ static int rt6_multipath_total_weight(const struct rt6_info *rt)
        int total = 0;
 
        if (!rt6_is_dead(rt))
-               total++;
+               total += rt->rt6i_nh_weight;
 
        list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) {
                if (!rt6_is_dead(iter))
-                       total++;
+                       total += iter->rt6i_nh_weight;
        }
 
        return total;
@@ -3522,7 +3523,7 @@ static void rt6_upper_bound_set(struct rt6_info *rt, int *weight, int total)
        int upper_bound = -1;
 
        if (!rt6_is_dead(rt)) {
-               (*weight)++;
+               *weight += rt->rt6i_nh_weight;
                upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
                                                    total) - 1;
        }
@@ -4024,6 +4025,8 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
                        goto cleanup;
                }
 
+               rt->rt6i_nh_weight = rtnh->rtnh_hops + 1;
+
                err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
                if (err) {
                        dst_release_immediate(&rt->dst);
@@ -4246,7 +4249,7 @@ static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
        if (!rtnh)
                goto nla_put_failure;
 
-       rtnh->rtnh_hops = 0;
+       rtnh->rtnh_hops = rt->rt6i_nh_weight - 1;
        rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
 
        if (rt6_nexthop_info(skb, rt, &flags, true) < 0)