ipv4: use separate genid for next hop exceptions
authorTimo Teräs <timo.teras@iki.fi>
Mon, 27 May 2013 20:46:33 +0000 (20:46 +0000)
committerDavid S. Miller <davem@davemloft.net>
Mon, 3 Jun 2013 07:07:43 +0000 (00:07 -0700)
commit 13d82bf5 (ipv4: Fix flushing of cached routing informations)
added the support to flush learned pmtu information.

However, using rt_genid is quite heavy as it is bumped on route
add/change and multicast events amongst other places. These can
happen quite often, especially if using dynamic routing protocols.

While this is ok with routes (as they are just recreated locally),
the pmtu information is learned from remote systems and the icmp
notification can come with long delays. It is worthy to have separate
genid to avoid excessive pmtu resets.

Cc: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Timo Teräs <timo.teras@iki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/net/ip_fib.h
include/net/net_namespace.h
net/ipv4/route.c

index e49db91593a953422b0ce0a15c0eb0902ee33c56..44424e9dab2a7c74cdd58930bd3efa0ddb6be952 100644 (file)
@@ -51,6 +51,7 @@ struct rtable;
 
 struct fib_nh_exception {
        struct fib_nh_exception __rcu   *fnhe_next;
+       int                             fnhe_genid;
        __be32                          fnhe_daddr;
        u32                             fnhe_pmtu;
        __be32                          fnhe_gw;
index b176978274828206b784e7003e04c871bef582a3..495bc57f292caa6deba113f7fd6ffb5cfd8e3668 100644 (file)
@@ -118,6 +118,7 @@ struct net {
        struct netns_ipvs       *ipvs;
        struct sock             *diag_nlsk;
        atomic_t                rt_genid;
+       atomic_t                fnhe_genid;
 };
 
 /*
@@ -340,4 +341,14 @@ static inline void rt_genid_bump(struct net *net)
        atomic_inc(&net->rt_genid);
 }
 
+static inline int fnhe_genid(struct net *net)
+{
+       return atomic_read(&net->fnhe_genid);
+}
+
+static inline void fnhe_genid_bump(struct net *net)
+{
+       atomic_inc(&net->fnhe_genid);
+}
+
 #endif /* __NET_NET_NAMESPACE_H */
index a4082be1b9b476b9315b4a013cd49c7fd9de0d73..403e283028690a173b112f1600ef2ecf1b2f1550 100644 (file)
@@ -658,6 +658,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
                        fnhe->fnhe_next = hash->chain;
                        rcu_assign_pointer(hash->chain, fnhe);
                }
+               fnhe->fnhe_genid = fnhe_genid(dev_net(nh->nh_dev));
                fnhe->fnhe_daddr = daddr;
                fnhe->fnhe_gw = gw;
                fnhe->fnhe_pmtu = pmtu;
@@ -1236,8 +1237,11 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
        spin_lock_bh(&fnhe_lock);
 
        if (daddr == fnhe->fnhe_daddr) {
+               int genid = fnhe_genid(dev_net(rt->dst.dev));
                struct rtable *orig = rcu_dereference(fnhe->fnhe_rth);
-               if (orig && rt_is_expired(orig)) {
+
+               if (fnhe->fnhe_genid != genid) {
+                       fnhe->fnhe_genid = genid;
                        fnhe->fnhe_gw = 0;
                        fnhe->fnhe_pmtu = 0;
                        fnhe->fnhe_expires = 0;
@@ -2443,8 +2447,11 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
                                        void __user *buffer,
                                        size_t *lenp, loff_t *ppos)
 {
+       struct net *net = (struct net *)__ctl->extra1;
+
        if (write) {
-               rt_cache_flush((struct net *)__ctl->extra1);
+               rt_cache_flush(net);
+               fnhe_genid_bump(net);
                return 0;
        }
 
@@ -2619,6 +2626,7 @@ static __net_initdata struct pernet_operations sysctl_route_ops = {
 static __net_init int rt_genid_init(struct net *net)
 {
        atomic_set(&net->rt_genid, 0);
+       atomic_set(&net->fnhe_genid, 0);
        get_random_bytes(&net->ipv4.dev_addr_genid,
                         sizeof(net->ipv4.dev_addr_genid));
        return 0;