ipv4: tcp: get rid of ugly unicast_sock

author Eric Dumazet <edumazet@google.com>

Fri, 30 Jan 2015 05:35:05 +0000 (21:35 -0800)

committer David S. Miller <davem@davemloft.net>

Mon, 2 Feb 2015 07:06:19 +0000 (23:06 -0800)
author Eric Dumazet <edumazet@google.com>
Fri, 30 Jan 2015 05:35:05 +0000 (21:35 -0800)
committer David S. Miller <davem@davemloft.net>
Mon, 2 Feb 2015 07:06:19 +0000 (23:06 -0800)
diff --git a/include/net/ip.h b/include/net/ip.h

index f7cbd703d15d24edca61cf9e159cb1ce3857cb5b..09cf5aebb28368fbb93c974f14a78c3fa9a6f408 100644 (file)
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -181,7 +181,7 @@ static inline __u8 ip_reply_arg_flowi_flags(const struct ip_reply_arg *arg)
         return (arg->flags & IP_REPLY_ARG_NOSRCCHECK) ? FLOWI_FLAG_ANYSRC : 0;
  }
  
-void ip_send_unicast_reply(struct net *net, struct sk_buff *skb,
+void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
                            const struct ip_options *sopt,
                            __be32 daddr, __be32 saddr,
                            const struct ip_reply_arg *arg,
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h

index 24945cefc4fde6bfaf9c4560080c91b2e3b12d0d..0ffef1a38efcc2f75e78dab09aa5e111f5b8b72f 100644 (file)
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -52,6 +52,7 @@ struct netns_ipv4 {
         struct inet_peer_base   *peers;
         struct tcpm_hash_bucket *tcp_metrics_hash;
         unsigned int            tcp_metrics_hash_log;
+       struct sock  * __percpu *tcp_sk;
         struct netns_frags      frags;
  #ifdef CONFIG_NETFILTER
         struct xt_table         *iptable_filter;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c

index 38a20a9cca1af327618c816e0695e92e8e152bb5..c373c0708d9799b0f17a969412efde7623455dbd 100644 (file)
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1506,24 +1506,8 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset,
  /*
   *     Generic function to send a packet as reply to another packet.
   *     Used to send some TCP resets/acks so far.
- *
- *     Use a fake percpu inet socket to avoid false sharing and contention.
   */
-static DEFINE_PER_CPU(struct inet_sock, unicast_sock) = {
-       .sk = {
-               .__sk_common = {
-                       .skc_refcnt = ATOMIC_INIT(1),
-               },
-               .sk_wmem_alloc  = ATOMIC_INIT(1),
-               .sk_allocation  = GFP_ATOMIC,
-               .sk_flags       = (1UL << SOCK_USE_WRITE_QUEUE),
-               .sk_pacing_rate = ~0U,
-       },
-       .pmtudisc       = IP_PMTUDISC_WANT,
-       .uc_ttl         = -1,
-};
-
-void ip_send_unicast_reply(struct net *net, struct sk_buff *skb,
+void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
                            const struct ip_options *sopt,
                            __be32 daddr, __be32 saddr,
                            const struct ip_reply_arg *arg,
@@ -1533,9 +1517,8 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb,
         struct ipcm_cookie ipc;
         struct flowi4 fl4;
         struct rtable *rt = skb_rtable(skb);
+       struct net *net = sock_net(sk);
         struct sk_buff *nskb;
-       struct sock *sk;
-       struct inet_sock *inet;
         int err;
  
         if (__ip_options_echo(&replyopts.opt.opt, skb, sopt))
@@ -1566,15 +1549,11 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb,
         if (IS_ERR(rt))
                 return;
  
-       inet = &get_cpu_var(unicast_sock);
+       inet_sk(sk)->tos = arg->tos;
  
-       inet->tos = arg->tos;
-       sk = &inet->sk;
         sk->sk_priority = skb->priority;
         sk->sk_protocol = ip_hdr(skb)->protocol;
         sk->sk_bound_dev_if = arg->bound_dev_if;
-       sock_net_set(sk, net);
-       __skb_queue_head_init(&sk->sk_write_queue);
         sk->sk_sndbuf = sysctl_wmem_default;
         err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base,
                              len, 0, &ipc, &rt, MSG_DONTWAIT);
@@ -1590,13 +1569,10 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb,
                           arg->csumoffset) = csum_fold(csum_add(nskb->csum,
                                                                 arg->csum));
                 nskb->ip_summed = CHECKSUM_NONE;
-               skb_orphan(nskb);
                 skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb));
                 ip_push_pending_frames(sk, &fl4);
         }
  out:
-       put_cpu_var(unicast_sock);
-
         ip_rt_put(rt);
  }
  
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c

index a3f72d7fc06c07c43e1c00b67970eaee074e4593..d22f54482babf8bbd41972596d01326e4b06f060 100644 (file)
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -683,7 +683,8 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
                 arg.bound_dev_if = sk->sk_bound_dev_if;
  
         arg.tos = ip_hdr(skb)->tos;
-       ip_send_unicast_reply(net, skb, &TCP_SKB_CB(skb)->header.h4.opt,
+       ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
+                             skb, &TCP_SKB_CB(skb)->header.h4.opt,
                               ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
                               &arg, arg.iov[0].iov_len);
  
@@ -767,7 +768,8 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
         if (oif)
                 arg.bound_dev_if = oif;
         arg.tos = tos;
-       ip_send_unicast_reply(net, skb, &TCP_SKB_CB(skb)->header.h4.opt,
+       ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
+                             skb, &TCP_SKB_CB(skb)->header.h4.opt,
                               ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
                               &arg, arg.iov[0].iov_len);
  
@@ -2428,14 +2430,39 @@ struct proto tcp_prot = {
  };
  EXPORT_SYMBOL(tcp_prot);
  
+static void __net_exit tcp_sk_exit(struct net *net)
+{
+       int cpu;
+
+       for_each_possible_cpu(cpu)
+               inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
+       free_percpu(net->ipv4.tcp_sk);
+}
+
  static int __net_init tcp_sk_init(struct net *net)
  {
+       int res, cpu;
+
+       net->ipv4.tcp_sk = alloc_percpu(struct sock *);
+       if (!net->ipv4.tcp_sk)
+               return -ENOMEM;
+
+       for_each_possible_cpu(cpu) {
+               struct sock *sk;
+
+               res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
+                                          IPPROTO_TCP, net);
+               if (res)
+                       goto fail;
+               *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
+       }
         net->ipv4.sysctl_tcp_ecn = 2;
         return 0;
-}
  
-static void __net_exit tcp_sk_exit(struct net *net)
-{
+fail:
+       tcp_sk_exit(net);
+
+       return res;
  }
  
  static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
author	Eric Dumazet <edumazet@google.com>
	Fri, 30 Jan 2015 05:35:05 +0000 (21:35 -0800)
committer	David S. Miller <davem@davemloft.net>
	Mon, 2 Feb 2015 07:06:19 +0000 (23:06 -0800)
include/net/ip.h		patch \| blob \| history
include/net/netns/ipv4.h		patch \| blob \| history
net/ipv4/ip_output.c		patch \| blob \| history
net/ipv4/tcp_ipv4.c		patch \| blob \| history