io_uring: enable option to only trigger eventfd for async completions
authorJens Axboe <axboe@kernel.dk>
Wed, 8 Jan 2020 18:04:00 +0000 (11:04 -0700)
committerJens Axboe <axboe@kernel.dk>
Tue, 21 Jan 2020 00:04:04 +0000 (17:04 -0700)
If an application is using eventfd notifications with poll to know when
new SQEs can be issued, it's expecting the following read/writes to
complete inline. And with that, it knows that there are events available,
and don't want spurious wakeups on the eventfd for those requests.

This adds IORING_REGISTER_EVENTFD_ASYNC, which works just like
IORING_REGISTER_EVENTFD, except it only triggers notifications for events
that happen from async completions (IRQ, or io-wq worker completions).
Any completions inline from the submission itself will not trigger
notifications.

Suggested-by: Mark Papadakis <markuspapadakis@icloud.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
fs/io_uring.c
include/uapi/linux/io_uring.h

index 42bf83b3fbd547c36f780cb235e299f9f5156b43..70656762244fedf99dd69ec583066bc2c0a97ee2 100644 (file)
@@ -206,6 +206,7 @@ struct io_ring_ctx {
                int                     account_mem: 1;
                int                     cq_overflow_flushed: 1;
                int                     drain_next: 1;
+               int                     eventfd_async: 1;
 
                /*
                 * Ring buffer of indices into array of io_uring_sqe, which is
@@ -963,13 +964,20 @@ static struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx)
        return &rings->cqes[tail & ctx->cq_mask];
 }
 
+static inline bool io_should_trigger_evfd(struct io_ring_ctx *ctx)
+{
+       if (!ctx->eventfd_async)
+               return true;
+       return io_wq_current_is_worker() || in_interrupt();
+}
+
 static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
 {
        if (waitqueue_active(&ctx->wait))
                wake_up(&ctx->wait);
        if (waitqueue_active(&ctx->sqo_wait))
                wake_up(&ctx->sqo_wait);
-       if (ctx->cq_ev_fd)
+       if (ctx->cq_ev_fd && io_should_trigger_evfd(ctx))
                eventfd_signal(ctx->cq_ev_fd, 1);
 }
 
@@ -6556,10 +6564,17 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
                ret = io_sqe_files_update(ctx, arg, nr_args);
                break;
        case IORING_REGISTER_EVENTFD:
+       case IORING_REGISTER_EVENTFD_ASYNC:
                ret = -EINVAL;
                if (nr_args != 1)
                        break;
                ret = io_eventfd_register(ctx, arg);
+               if (ret)
+                       break;
+               if (opcode == IORING_REGISTER_EVENTFD_ASYNC)
+                       ctx->eventfd_async = 1;
+               else
+                       ctx->eventfd_async = 0;
                break;
        case IORING_UNREGISTER_EVENTFD:
                ret = -EINVAL;
index 0fe270ab191c5ddd93b130c3cc27b23a3ce2fcbd..66772a90a7f2dd55a5a77725026554321728c047 100644 (file)
@@ -192,6 +192,7 @@ struct io_uring_params {
 #define IORING_REGISTER_EVENTFD                4
 #define IORING_UNREGISTER_EVENTFD      5
 #define IORING_REGISTER_FILES_UPDATE   6
+#define IORING_REGISTER_EVENTFD_ASYNC  7
 
 struct io_uring_files_update {
        __u32 offset;