tuntap: accept an array of XDP buffs through sendmsg()
authorJason Wang <jasowang@redhat.com>
Wed, 12 Sep 2018 03:17:07 +0000 (11:17 +0800)
committerDavid S. Miller <davem@davemloft.net>
Thu, 13 Sep 2018 16:25:40 +0000 (09:25 -0700)
This patch implement TUN_MSG_PTR msg_control type. This type allows
the caller to pass an array of XDP buffs to tuntap through ptr field
of the tun_msg_control. If an XDP program is attached, tuntap can run
XDP program directly. If not, tuntap will build skb and do a fast
receiving since part of the work has been done by vhost_net.

This will avoid lots of indirect calls thus improves the icache
utilization and allows to do XDP batched flushing when doing XDP
redirection.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/tun.c

index 89779b58c7cabb06811b78ab657d938d7aa2da44..2a2cd35853b769d95eb9cc9e0bdb2652fac2b7ee 100644 (file)
@@ -2426,22 +2426,133 @@ static void tun_sock_write_space(struct sock *sk)
        kill_fasync(&tfile->fasync, SIGIO, POLL_OUT);
 }
 
+static int tun_xdp_one(struct tun_struct *tun,
+                      struct tun_file *tfile,
+                      struct xdp_buff *xdp, int *flush)
+{
+       struct tun_xdp_hdr *hdr = xdp->data_hard_start;
+       struct virtio_net_hdr *gso = &hdr->gso;
+       struct tun_pcpu_stats *stats;
+       struct bpf_prog *xdp_prog;
+       struct sk_buff *skb = NULL;
+       u32 rxhash = 0, act;
+       int buflen = hdr->buflen;
+       int err = 0;
+       bool skb_xdp = false;
+
+       xdp_prog = rcu_dereference(tun->xdp_prog);
+       if (xdp_prog) {
+               if (gso->gso_type) {
+                       skb_xdp = true;
+                       goto build;
+               }
+               xdp_set_data_meta_invalid(xdp);
+               xdp->rxq = &tfile->xdp_rxq;
+
+               act = bpf_prog_run_xdp(xdp_prog, xdp);
+               err = tun_xdp_act(tun, xdp_prog, xdp, act);
+               if (err < 0) {
+                       put_page(virt_to_head_page(xdp->data));
+                       return err;
+               }
+
+               switch (err) {
+               case XDP_REDIRECT:
+                       *flush = true;
+                       /* fall through */
+               case XDP_TX:
+                       return 0;
+               case XDP_PASS:
+                       break;
+               default:
+                       put_page(virt_to_head_page(xdp->data));
+                       return 0;
+               }
+       }
+
+build:
+       skb = build_skb(xdp->data_hard_start, buflen);
+       if (!skb) {
+               err = -ENOMEM;
+               goto out;
+       }
+
+       skb_reserve(skb, xdp->data - xdp->data_hard_start);
+       skb_put(skb, xdp->data_end - xdp->data);
+
+       if (virtio_net_hdr_to_skb(skb, gso, tun_is_little_endian(tun))) {
+               this_cpu_inc(tun->pcpu_stats->rx_frame_errors);
+               kfree_skb(skb);
+               err = -EINVAL;
+               goto out;
+       }
+
+       skb->protocol = eth_type_trans(skb, tun->dev);
+       skb_reset_network_header(skb);
+       skb_probe_transport_header(skb, 0);
+
+       if (skb_xdp) {
+               err = do_xdp_generic(xdp_prog, skb);
+               if (err != XDP_PASS)
+                       goto out;
+       }
+
+       if (!rcu_dereference(tun->steering_prog))
+               rxhash = __skb_get_hash_symmetric(skb);
+
+       netif_receive_skb(skb);
+
+       stats = get_cpu_ptr(tun->pcpu_stats);
+       u64_stats_update_begin(&stats->syncp);
+       stats->rx_packets++;
+       stats->rx_bytes += skb->len;
+       u64_stats_update_end(&stats->syncp);
+       put_cpu_ptr(stats);
+
+       if (rxhash)
+               tun_flow_update(tun, rxhash, tfile);
+
+out:
+       return err;
+}
+
 static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
 {
-       int ret;
+       int ret, i;
        struct tun_file *tfile = container_of(sock, struct tun_file, socket);
        struct tun_struct *tun = tun_get(tfile);
        struct tun_msg_ctl *ctl = m->msg_control;
+       struct xdp_buff *xdp;
 
        if (!tun)
                return -EBADFD;
 
-       if (ctl && ctl->type != TUN_MSG_UBUF)
-               return -EINVAL;
+       if (ctl && (ctl->type == TUN_MSG_PTR)) {
+               int n = ctl->num;
+               int flush = 0;
+
+               local_bh_disable();
+               rcu_read_lock();
+
+               for (i = 0; i < n; i++) {
+                       xdp = &((struct xdp_buff *)ctl->ptr)[i];
+                       tun_xdp_one(tun, tfile, xdp, &flush);
+               }
+
+               if (flush)
+                       xdp_do_flush_map();
+
+               rcu_read_unlock();
+               local_bh_enable();
+
+               ret = total_len;
+               goto out;
+       }
 
        ret = tun_get_user(tun, tfile, ctl ? ctl->ptr : NULL, &m->msg_iter,
                           m->msg_flags & MSG_DONTWAIT,
                           m->msg_flags & MSG_MORE);
+out:
        tun_put(tun);
        return ret;
 }