ipv4: introduce hardened ip_no_pmtu_disc mode
authorHannes Frederic Sowa <hannes@stressinduktion.org>
Thu, 9 Jan 2014 09:01:17 +0000 (10:01 +0100)
committerDavid S. Miller <davem@davemloft.net>
Mon, 13 Jan 2014 19:22:55 +0000 (11:22 -0800)
This new ip_no_pmtu_disc mode only allowes fragmentation-needed errors
to be honored by protocols which do more stringent validation on the
ICMP's packet payload. This knob is useful for people who e.g. want to
run an unmodified DNS server in a namespace where they need to use pmtu
for TCP connections (as they are used for zone transfers or fallback
for requests) but don't want to use possibly spoofed UDP pmtu information.

Currently the whitelisted protocols are TCP, SCTP and DCCP as they check
if the returned packet is in the window or if the association is valid.

Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: John Heffner <johnwheffner@gmail.com>
Suggested-by: Florian Weimer <fweimer@redhat.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Documentation/networking/ip-sysctl.txt
include/net/protocol.h
net/dccp/ipv4.c
net/ipv4/af_inet.c
net/ipv4/icmp.c
net/sctp/protocol.c

index 0d71fa962d8ae3899fc6720a799e49734fa1f738..c97932c88ea3a39b8547d67df667061c9b677a30 100644 (file)
@@ -26,7 +26,18 @@ ip_no_pmtu_disc - INTEGER
        discarded. Outgoing frames are handled the same as in mode 1,
        implicitly setting IP_PMTUDISC_DONT on every created socket.
 
-       Possible values: 0-2
+       Mode 3 is a hardend pmtu discover mode. The kernel will only
+       accept fragmentation-needed errors if the underlying protocol
+       can verify them besides a plain socket lookup. Current
+       protocols for which pmtu events will be honored are TCP, SCTP
+       and DCCP as they verify e.g. the sequence number or the
+       association. This mode should not be enabled globally but is
+       only intended to secure e.g. name servers in namespaces where
+       TCP path mtu must still work but path MTU information of other
+       protocols should be discarded. If enabled globally this mode
+       could break other protocols.
+
+       Possible values: 0-3
        Default: FALSE
 
 min_pmtu - INTEGER
index fbf7676c9a02e352890b66d0bc3caf1775dbb199..0e5f8665d7fbf651746ba46727eeda0365be6a47 100644 (file)
@@ -43,7 +43,12 @@ struct net_protocol {
        int                     (*handler)(struct sk_buff *skb);
        void                    (*err_handler)(struct sk_buff *skb, u32 info);
        unsigned int            no_policy:1,
-                               netns_ok:1;
+                               netns_ok:1,
+                               /* does the protocol do more stringent
+                                * icmp tag validation than simple
+                                * socket lookup?
+                                */
+                               icmp_strict_tag_validation:1;
 };
 
 #if IS_ENABLED(CONFIG_IPV6)
index 88299c29101db7f8b52dba800ec19087b495f7dd..22b5d818b2001b177b765cbb67eb2551e87502ce 100644 (file)
@@ -989,6 +989,7 @@ static const struct net_protocol dccp_v4_protocol = {
        .err_handler    = dccp_v4_err,
        .no_policy      = 1,
        .netns_ok       = 1,
+       .icmp_strict_tag_validation = 1,
 };
 
 static const struct proto_ops inet_dccp_ops = {
index 6268a4751e641a764701d22ca9e717bbe31fae2c..ecd2c3f245ce2b2e0b79f17417c5e6ad8c70abf6 100644 (file)
@@ -1545,6 +1545,7 @@ static const struct net_protocol tcp_protocol = {
        .err_handler    =       tcp_v4_err,
        .no_policy      =       1,
        .netns_ok       =       1,
+       .icmp_strict_tag_validation = 1,
 };
 
 static const struct net_protocol udp_protocol = {
index fb3c5637199dbfacd7b7b5c40ae14b1b2880d178..0134663fdbce86f6da39d8f6c9d27ce6c404687c 100644 (file)
@@ -668,6 +668,16 @@ static void icmp_socket_deliver(struct sk_buff *skb, u32 info)
        rcu_read_unlock();
 }
 
+static bool icmp_tag_validation(int proto)
+{
+       bool ok;
+
+       rcu_read_lock();
+       ok = rcu_dereference(inet_protos[proto])->icmp_strict_tag_validation;
+       rcu_read_unlock();
+       return ok;
+}
+
 /*
  *     Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, ICMP_QUENCH, and
  *     ICMP_PARAMETERPROB.
@@ -705,12 +715,22 @@ static void icmp_unreach(struct sk_buff *skb)
                case ICMP_PORT_UNREACH:
                        break;
                case ICMP_FRAG_NEEDED:
-                       if (net->ipv4.sysctl_ip_no_pmtu_disc == 2) {
-                               goto out;
-                       } else if (net->ipv4.sysctl_ip_no_pmtu_disc) {
+                       /* for documentation of the ip_no_pmtu_disc
+                        * values please see
+                        * Documentation/networking/ip-sysctl.txt
+                        */
+                       switch (net->ipv4.sysctl_ip_no_pmtu_disc) {
+                       default:
                                LIMIT_NETDEBUG(KERN_INFO pr_fmt("%pI4: fragmentation needed and DF set\n"),
                                               &iph->daddr);
-                       } else {
+                               break;
+                       case 2:
+                               goto out;
+                       case 3:
+                               if (!icmp_tag_validation(iph->protocol))
+                                       goto out;
+                               /* fall through */
+                       case 0:
                                info = ntohs(icmph->un.frag.mtu);
                                if (!info)
                                        goto out;
index 34b7726bcd7ff472da4603fea6010bc39f467440..7c161084f2414b61abbf10a35937d15e460d9f66 100644 (file)
@@ -1030,6 +1030,7 @@ static const struct net_protocol sctp_protocol = {
        .err_handler = sctp_v4_err,
        .no_policy   = 1,
        .netns_ok    = 1,
+       .icmp_strict_tag_validation = 1,
 };
 
 /* IPv4 address related functions.  */