ipv4: properly refresh rtable entries on pmtu/redirect events
authorTimo Teräs <timo.teras@iki.fi>
Mon, 27 May 2013 20:46:31 +0000 (20:46 +0000)
committerDavid S. Miller <davem@davemloft.net>
Mon, 3 Jun 2013 07:07:42 +0000 (00:07 -0700)
This reverts commit 05ab86c5 (xfrm4: Invalidate all ipv4 routes on
IPsec pmtu events). Flushing all cached entries is not needed.

Instead, invalidate only the related next hop dsts to recheck for
the added next hop exception where needed. This also fixes a subtle
race due to bumping generation id's before updating the pmtu.

Cc: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Timo Teräs <timo.teras@iki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
net/ipv4/ah4.c
net/ipv4/esp4.c
net/ipv4/ipcomp.c
net/ipv4/route.c

index 2e7f1948216fe8ade7d57aa3baa652d14d6b9b7b..717902669d2f2ef34714c215c4695ffe59ddd283 100644 (file)
@@ -419,12 +419,9 @@ static void ah4_err(struct sk_buff *skb, u32 info)
        if (!x)
                return;
 
-       if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) {
-               atomic_inc(&flow_cache_genid);
-               rt_genid_bump(net);
-
+       if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
                ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_AH, 0);
-       else
+       else
                ipv4_redirect(skb, net, 0, 0, IPPROTO_AH, 0);
        xfrm_state_put(x);
 }
index 4cfe34d4cc967a94ed15f2deef3d249d2429e846..ab3d814bc80af8f377da971af189a49ae4f2f094 100644 (file)
@@ -502,12 +502,9 @@ static void esp4_err(struct sk_buff *skb, u32 info)
        if (!x)
                return;
 
-       if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) {
-               atomic_inc(&flow_cache_genid);
-               rt_genid_bump(net);
-
+       if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
                ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ESP, 0);
-       else
+       else
                ipv4_redirect(skb, net, 0, 0, IPPROTO_ESP, 0);
        xfrm_state_put(x);
 }
index 59cb8c7690561f3fc44ecc382557408af53cb5fb..826be4cb482a29b401f2314da6581e1127a7a731 100644 (file)
@@ -47,12 +47,9 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
        if (!x)
                return;
 
-       if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) {
-               atomic_inc(&flow_cache_genid);
-               rt_genid_bump(net);
-
+       if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
                ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_COMP, 0);
-       else
+       else
                ipv4_redirect(skb, net, 0, 0, IPPROTO_COMP, 0);
        xfrm_state_put(x);
 }
index 550781a17b34f75f94b919a39120eb119db106a1..561a37833d86c3d3d13c002e4e7a18005f97b751 100644 (file)
@@ -594,11 +594,25 @@ static inline u32 fnhe_hashfun(__be32 daddr)
        return hval & (FNHE_HASH_SIZE - 1);
 }
 
+static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
+{
+       rt->rt_pmtu = fnhe->fnhe_pmtu;
+       rt->dst.expires = fnhe->fnhe_expires;
+
+       if (fnhe->fnhe_gw) {
+               rt->rt_flags |= RTCF_REDIRECTED;
+               rt->rt_gateway = fnhe->fnhe_gw;
+               rt->rt_uses_gateway = 1;
+       }
+}
+
 static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
                                  u32 pmtu, unsigned long expires)
 {
        struct fnhe_hash_bucket *hash;
        struct fib_nh_exception *fnhe;
+       struct rtable *rt;
+       unsigned int i;
        int depth;
        u32 hval = fnhe_hashfun(daddr);
 
@@ -627,8 +641,12 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
                        fnhe->fnhe_gw = gw;
                if (pmtu) {
                        fnhe->fnhe_pmtu = pmtu;
-                       fnhe->fnhe_expires = expires;
+                       fnhe->fnhe_expires = max(1UL, expires);
                }
+               /* Update all cached dsts too */
+               rt = rcu_dereference(fnhe->fnhe_rth);
+               if (rt)
+                       fill_route_from_fnhe(rt, fnhe);
        } else {
                if (depth > FNHE_RECLAIM_DEPTH)
                        fnhe = fnhe_oldest(hash);
@@ -644,6 +662,18 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
                fnhe->fnhe_gw = gw;
                fnhe->fnhe_pmtu = pmtu;
                fnhe->fnhe_expires = expires;
+
+               /* Exception created; mark the cached routes for the nexthop
+                * stale, so anyone caching it rechecks if this exception
+                * applies to them.
+                */
+               for_each_possible_cpu(i) {
+                       struct rtable __rcu **prt;
+                       prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i);
+                       rt = rcu_dereference(*prt);
+                       if (rt)
+                               rt->dst.obsolete = DST_OBSOLETE_KILL;
+               }
        }
 
        fnhe->fnhe_stamp = jiffies;
@@ -917,13 +947,6 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
        if (mtu < ip_rt_min_pmtu)
                mtu = ip_rt_min_pmtu;
 
-       if (!rt->rt_pmtu) {
-               dst->obsolete = DST_OBSOLETE_KILL;
-       } else {
-               rt->rt_pmtu = mtu;
-               dst->expires = max(1UL, jiffies + ip_rt_mtu_expires);
-       }
-
        rcu_read_lock();
        if (fib_lookup(dev_net(dst->dev), fl4, &res) == 0) {
                struct fib_nh *nh = &FIB_RES_NH(res);
@@ -1063,11 +1086,11 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
         * DST_OBSOLETE_FORCE_CHK which forces validation calls down
         * into this function always.
         *
-        * When a PMTU/redirect information update invalidates a
-        * route, this is indicated by setting obsolete to
-        * DST_OBSOLETE_KILL.
+        * When a PMTU/redirect information update invalidates a route,
+        * this is indicated by setting obsolete to DST_OBSOLETE_KILL or
+        * DST_OBSOLETE_DEAD by dst_free().
         */
-       if (dst->obsolete == DST_OBSOLETE_KILL || rt_is_expired(rt))
+       if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt))
                return NULL;
        return dst;
 }
@@ -1215,20 +1238,8 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
                        fnhe->fnhe_pmtu = 0;
                        fnhe->fnhe_expires = 0;
                }
-               if (fnhe->fnhe_pmtu) {
-                       unsigned long expires = fnhe->fnhe_expires;
-                       unsigned long diff = expires - jiffies;
-
-                       if (time_before(jiffies, expires)) {
-                               rt->rt_pmtu = fnhe->fnhe_pmtu;
-                               dst_set_expires(&rt->dst, diff);
-                       }
-               }
-               if (fnhe->fnhe_gw) {
-                       rt->rt_flags |= RTCF_REDIRECTED;
-                       rt->rt_gateway = fnhe->fnhe_gw;
-                       rt->rt_uses_gateway = 1;
-               } else if (!rt->rt_gateway)
+               fill_route_from_fnhe(rt, fnhe);
+               if (!rt->rt_gateway)
                        rt->rt_gateway = daddr;
 
                rcu_assign_pointer(fnhe->fnhe_rth, rt);