vhost: switch to use new message format
authorJason Wang <jasowang@redhat.com>
Mon, 6 Aug 2018 03:17:47 +0000 (11:17 +0800)
committerDavid S. Miller <davem@davemloft.net>
Mon, 6 Aug 2018 17:41:04 +0000 (10:41 -0700)
We use to have message like:

struct vhost_msg {
int type;
union {
struct vhost_iotlb_msg iotlb;
__u8 padding[64];
};
};

Unfortunately, there will be a hole of 32bit in 64bit machine because
of the alignment. This leads a different formats between 32bit API and
64bit API. What's more it will break 32bit program running on 64bit
machine.

So fixing this by introducing a new message type with an explicit
32bit reserved field after type like:

struct vhost_msg_v2 {
__u32 type;
__u32 reserved;
union {
struct vhost_iotlb_msg iotlb;
__u8 padding[64];
};
};

We will have a consistent ABI after switching to use this. To enable
this capability, introduce a new ioctl (VHOST_SET_BAKCEND_FEATURE) for
userspace to enable this feature (VHOST_BACKEND_F_IOTLB_V2).

Fixes: 6b1e6cc7855b ("vhost: new device IOTLB API")
Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/vhost/net.c
drivers/vhost/vhost.c
drivers/vhost/vhost.h
include/uapi/linux/vhost.h

index 367d8023b54dbabddd526d08cec66fa29bd4f36d..4e656f89cb225c83b42f579d1bc1f988224d49bc 100644 (file)
@@ -77,6 +77,10 @@ enum {
                         (1ULL << VIRTIO_F_IOMMU_PLATFORM)
 };
 
+enum {
+       VHOST_NET_BACKEND_FEATURES = (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2)
+};
+
 enum {
        VHOST_NET_VQ_RX = 0,
        VHOST_NET_VQ_TX = 1,
@@ -1399,6 +1403,21 @@ done:
        return err;
 }
 
+static int vhost_net_set_backend_features(struct vhost_net *n, u64 features)
+{
+       int i;
+
+       mutex_lock(&n->dev.mutex);
+       for (i = 0; i < VHOST_NET_VQ_MAX; ++i) {
+               mutex_lock(&n->vqs[i].vq.mutex);
+               n->vqs[i].vq.acked_backend_features = features;
+               mutex_unlock(&n->vqs[i].vq.mutex);
+       }
+       mutex_unlock(&n->dev.mutex);
+
+       return 0;
+}
+
 static int vhost_net_set_features(struct vhost_net *n, u64 features)
 {
        size_t vhost_hlen, sock_hlen, hdr_len;
@@ -1489,6 +1508,17 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl,
                if (features & ~VHOST_NET_FEATURES)
                        return -EOPNOTSUPP;
                return vhost_net_set_features(n, features);
+       case VHOST_GET_BACKEND_FEATURES:
+               features = VHOST_NET_BACKEND_FEATURES;
+               if (copy_to_user(featurep, &features, sizeof(features)))
+                       return -EFAULT;
+               return 0;
+       case VHOST_SET_BACKEND_FEATURES:
+               if (copy_from_user(&features, featurep, sizeof(features)))
+                       return -EFAULT;
+               if (features & ~VHOST_NET_BACKEND_FEATURES)
+                       return -EOPNOTSUPP;
+               return vhost_net_set_backend_features(n, features);
        case VHOST_RESET_OWNER:
                return vhost_net_reset_owner(n);
        case VHOST_SET_OWNER:
index a502f1af4a213607adec4aa28fa6ae8eb9ce0389..6f6c42d5e4be6163c3ec91e1deb073240d44f205 100644 (file)
@@ -315,6 +315,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
        vq->log_addr = -1ull;
        vq->private_data = NULL;
        vq->acked_features = 0;
+       vq->acked_backend_features = 0;
        vq->log_base = NULL;
        vq->error_ctx = NULL;
        vq->kick = NULL;
@@ -1027,28 +1028,40 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev,
 ssize_t vhost_chr_write_iter(struct vhost_dev *dev,
                             struct iov_iter *from)
 {
-       struct vhost_msg_node node;
-       unsigned size = sizeof(struct vhost_msg);
-       size_t ret;
-       int err;
+       struct vhost_iotlb_msg msg;
+       size_t offset;
+       int type, ret;
 
-       if (iov_iter_count(from) < size)
-               return 0;
-       ret = copy_from_iter(&node.msg, size, from);
-       if (ret != size)
+       ret = copy_from_iter(&type, sizeof(type), from);
+       if (ret != sizeof(type))
                goto done;
 
-       switch (node.msg.type) {
+       switch (type) {
        case VHOST_IOTLB_MSG:
-               err = vhost_process_iotlb_msg(dev, &node.msg.iotlb);
-               if (err)
-                       ret = err;
+               /* There maybe a hole after type for V1 message type,
+                * so skip it here.
+                */
+               offset = offsetof(struct vhost_msg, iotlb) - sizeof(int);
+               break;
+       case VHOST_IOTLB_MSG_V2:
+               offset = sizeof(__u32);
                break;
        default:
                ret = -EINVAL;
-               break;
+               goto done;
+       }
+
+       iov_iter_advance(from, offset);
+       ret = copy_from_iter(&msg, sizeof(msg), from);
+       if (ret != sizeof(msg))
+               goto done;
+       if (vhost_process_iotlb_msg(dev, &msg)) {
+               ret = -EFAULT;
+               goto done;
        }
 
+       ret = (type == VHOST_IOTLB_MSG) ? sizeof(struct vhost_msg) :
+             sizeof(struct vhost_msg_v2);
 done:
        return ret;
 }
@@ -1107,13 +1120,28 @@ ssize_t vhost_chr_read_iter(struct vhost_dev *dev, struct iov_iter *to,
                finish_wait(&dev->wait, &wait);
 
        if (node) {
-               ret = copy_to_iter(&node->msg, size, to);
+               struct vhost_iotlb_msg *msg;
+               void *start = &node->msg;
+
+               switch (node->msg.type) {
+               case VHOST_IOTLB_MSG:
+                       size = sizeof(node->msg);
+                       msg = &node->msg.iotlb;
+                       break;
+               case VHOST_IOTLB_MSG_V2:
+                       size = sizeof(node->msg_v2);
+                       msg = &node->msg_v2.iotlb;
+                       break;
+               default:
+                       BUG();
+                       break;
+               }
 
-               if (ret != size || node->msg.type != VHOST_IOTLB_MISS) {
+               ret = copy_to_iter(start, size, to);
+               if (ret != size || msg->type != VHOST_IOTLB_MISS) {
                        kfree(node);
                        return ret;
                }
-
                vhost_enqueue_msg(dev, &dev->pending_list, node);
        }
 
@@ -1126,12 +1154,19 @@ static int vhost_iotlb_miss(struct vhost_virtqueue *vq, u64 iova, int access)
        struct vhost_dev *dev = vq->dev;
        struct vhost_msg_node *node;
        struct vhost_iotlb_msg *msg;
+       bool v2 = vhost_backend_has_feature(vq, VHOST_BACKEND_F_IOTLB_MSG_V2);
 
-       node = vhost_new_msg(vq, VHOST_IOTLB_MISS);
+       node = vhost_new_msg(vq, v2 ? VHOST_IOTLB_MSG_V2 : VHOST_IOTLB_MSG);
        if (!node)
                return -ENOMEM;
 
-       msg = &node->msg.iotlb;
+       if (v2) {
+               node->msg_v2.type = VHOST_IOTLB_MSG_V2;
+               msg = &node->msg_v2.iotlb;
+       } else {
+               msg = &node->msg.iotlb;
+       }
+
        msg->type = VHOST_IOTLB_MISS;
        msg->iova = iova;
        msg->perm = access;
index 6c844b90a1687317792bb7687a9955d7436bf572..466ef75422916c020fcb26d27cf812bd99f99284 100644 (file)
@@ -132,6 +132,7 @@ struct vhost_virtqueue {
        struct vhost_umem *iotlb;
        void *private_data;
        u64 acked_features;
+       u64 acked_backend_features;
        /* Log write descriptors */
        void __user *log_base;
        struct vhost_log *log;
@@ -147,7 +148,10 @@ struct vhost_virtqueue {
 };
 
 struct vhost_msg_node {
-  struct vhost_msg msg;
+  union {
+         struct vhost_msg msg;
+         struct vhost_msg_v2 msg_v2;
+  };
   struct vhost_virtqueue *vq;
   struct list_head node;
 };
@@ -238,6 +242,11 @@ static inline bool vhost_has_feature(struct vhost_virtqueue *vq, int bit)
        return vq->acked_features & (1ULL << bit);
 }
 
+static inline bool vhost_backend_has_feature(struct vhost_virtqueue *vq, int bit)
+{
+       return vq->acked_backend_features & (1ULL << bit);
+}
+
 #ifdef CONFIG_VHOST_CROSS_ENDIAN_LEGACY
 static inline bool vhost_is_little_endian(struct vhost_virtqueue *vq)
 {
index c51f8e5cc6080c03028727cdb81343fc4cabb6e1..b1e22c40c4b68a0b39d0a8a567baea1f3b5403ed 100644 (file)
@@ -65,6 +65,7 @@ struct vhost_iotlb_msg {
 };
 
 #define VHOST_IOTLB_MSG 0x1
+#define VHOST_IOTLB_MSG_V2 0x2
 
 struct vhost_msg {
        int type;
@@ -74,6 +75,15 @@ struct vhost_msg {
        };
 };
 
+struct vhost_msg_v2 {
+       __u32 type;
+       __u32 reserved;
+       union {
+               struct vhost_iotlb_msg iotlb;
+               __u8 padding[64];
+       };
+};
+
 struct vhost_memory_region {
        __u64 guest_phys_addr;
        __u64 memory_size; /* bytes */
@@ -160,6 +170,14 @@ struct vhost_memory {
 #define VHOST_GET_VRING_BUSYLOOP_TIMEOUT _IOW(VHOST_VIRTIO, 0x24,      \
                                         struct vhost_vring_state)
 
+/* Set or get vhost backend capability */
+
+/* Use message type V2 */
+#define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1
+
+#define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64)
+#define VHOST_GET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x26, __u64)
+
 /* VHOST_NET specific defines */
 
 /* Attach virtio net ring to a raw socket, or tap device.