]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
io_uring/napi: add static napi tracking strategy
authorOlivier Langlois <olivier@trillion01.com>
Sun, 13 Oct 2024 18:29:24 +0000 (14:29 -0400)
committerJens Axboe <axboe@kernel.dk>
Wed, 6 Nov 2024 20:55:38 +0000 (13:55 -0700)
Add the static napi tracking strategy. That allows the user to manually
manage the napi ids list for busy polling, and eliminate the overhead of
dynamically updating the list from the fast path.

Signed-off-by: Olivier Langlois <olivier@trillion01.com>
Link: https://lore.kernel.org/r/96943de14968c35a5c599352259ad98f3c0770ba.1728828877.git.olivier@trillion01.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
include/linux/io_uring_types.h
include/uapi/linux/io_uring.h
io_uring/fdinfo.c
io_uring/napi.c
io_uring/napi.h

index fba2988accc3a9715e763e62be783a9206addc4c..072e65e931053336f0fcc99e4b4723babcd33fdb 100644 (file)
@@ -408,7 +408,7 @@ struct io_ring_ctx {
        /* napi busy poll default timeout */
        ktime_t                 napi_busy_poll_dt;
        bool                    napi_prefer_busy_poll;
-       bool                    napi_enabled;
+       u8                      napi_track_mode;
 
        DECLARE_HASHTABLE(napi_ht, 4);
 #endif
index 47977a5c65f5c9a84f5a2c233028af78bf548187..5d08435b95a8aaab317edb248c882f7384b5b254 100644 (file)
@@ -790,12 +790,40 @@ struct io_uring_buf_status {
        __u32   resv[8];
 };
 
+enum io_uring_napi_op {
+       /* register/ungister backward compatible opcode */
+       IO_URING_NAPI_REGISTER_OP = 0,
+
+       /* opcodes to update napi_list when static tracking is used */
+       IO_URING_NAPI_STATIC_ADD_ID = 1,
+       IO_URING_NAPI_STATIC_DEL_ID = 2
+};
+
+enum io_uring_napi_tracking_strategy {
+       /* value must be 0 for backward compatibility */
+       IO_URING_NAPI_TRACKING_DYNAMIC = 0,
+       IO_URING_NAPI_TRACKING_STATIC = 1,
+       IO_URING_NAPI_TRACKING_INACTIVE = 255
+};
+
 /* argument for IORING_(UN)REGISTER_NAPI */
 struct io_uring_napi {
        __u32   busy_poll_to;
        __u8    prefer_busy_poll;
-       __u8    pad[3];
-       __u64   resv;
+
+       /* a io_uring_napi_op value */
+       __u8    opcode;
+       __u8    pad[2];
+
+       /*
+        * for IO_URING_NAPI_REGISTER_OP, it is a
+        * io_uring_napi_tracking_strategy value.
+        *
+        * for IO_URING_NAPI_STATIC_ADD_ID/IO_URING_NAPI_STATIC_DEL_ID
+        * it is the napi id to add/del from napi_list.
+        */
+       __u32   op_param;
+       __u32   resv;
 };
 
 /*
index efbec34ccb18d7543c8d87e49d233c85bb186ef2..b214e5a407b565d32532f7c3bf8917034a1fe8aa 100644 (file)
@@ -46,6 +46,46 @@ static __cold int io_uring_show_cred(struct seq_file *m, unsigned int id,
        return 0;
 }
 
+#ifdef CONFIG_NET_RX_BUSY_POLL
+static __cold void common_tracking_show_fdinfo(struct io_ring_ctx *ctx,
+                                              struct seq_file *m,
+                                              const char *tracking_strategy)
+{
+       seq_puts(m, "NAPI:\tenabled\n");
+       seq_printf(m, "napi tracking:\t%s\n", tracking_strategy);
+       seq_printf(m, "napi_busy_poll_dt:\t%llu\n", ctx->napi_busy_poll_dt);
+       if (ctx->napi_prefer_busy_poll)
+               seq_puts(m, "napi_prefer_busy_poll:\ttrue\n");
+       else
+               seq_puts(m, "napi_prefer_busy_poll:\tfalse\n");
+}
+
+static __cold void napi_show_fdinfo(struct io_ring_ctx *ctx,
+                                   struct seq_file *m)
+{
+       unsigned int mode = READ_ONCE(ctx->napi_track_mode);
+
+       switch (mode) {
+       case IO_URING_NAPI_TRACKING_INACTIVE:
+               seq_puts(m, "NAPI:\tdisabled\n");
+               break;
+       case IO_URING_NAPI_TRACKING_DYNAMIC:
+               common_tracking_show_fdinfo(ctx, m, "dynamic");
+               break;
+       case IO_URING_NAPI_TRACKING_STATIC:
+               common_tracking_show_fdinfo(ctx, m, "static");
+               break;
+       default:
+               seq_printf(m, "NAPI:\tunknown mode (%u)\n", mode);
+       }
+}
+#else
+static inline void napi_show_fdinfo(struct io_ring_ctx *ctx,
+                                   struct seq_file *m)
+{
+}
+#endif
+
 /*
  * Caller holds a reference to the file already, we don't need to do
  * anything else to get an extra reference.
@@ -219,18 +259,6 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
 
        }
        spin_unlock(&ctx->completion_lock);
-
-#ifdef CONFIG_NET_RX_BUSY_POLL
-       if (ctx->napi_enabled) {
-               seq_puts(m, "NAPI:\tenabled\n");
-               seq_printf(m, "napi_busy_poll_dt:\t%llu\n", ctx->napi_busy_poll_dt);
-               if (ctx->napi_prefer_busy_poll)
-                       seq_puts(m, "napi_prefer_busy_poll:\ttrue\n");
-               else
-                       seq_puts(m, "napi_prefer_busy_poll:\tfalse\n");
-       } else {
-               seq_puts(m, "NAPI:\tdisabled\n");
-       }
-#endif
+       napi_show_fdinfo(ctx, m);
 }
 #endif
index 1de1543d803440a6f3077e21a25141b3cb433374..b1ade3fda30f3e97110503cfe7b19d8fee5d22c0 100644 (file)
@@ -81,6 +81,27 @@ int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id)
        return 0;
 }
 
+static int __io_napi_del_id(struct io_ring_ctx *ctx, unsigned int napi_id)
+{
+       struct hlist_head *hash_list;
+       struct io_napi_entry *e;
+
+       /* Non-NAPI IDs can be rejected. */
+       if (napi_id < MIN_NAPI_ID)
+               return -EINVAL;
+
+       hash_list = &ctx->napi_ht[hash_min(napi_id, HASH_BITS(ctx->napi_ht))];
+       guard(spinlock)(&ctx->napi_lock);
+       e = io_napi_hash_find(hash_list, napi_id);
+       if (!e)
+               return -ENOENT;
+
+       list_del_rcu(&e->list);
+       hash_del_rcu(&e->node);
+       kfree_rcu(e, rcu);
+       return 0;
+}
+
 static void __io_napi_remove_stale(struct io_ring_ctx *ctx)
 {
        struct io_napi_entry *e;
@@ -136,9 +157,25 @@ static bool io_napi_busy_loop_should_end(void *data,
        return false;
 }
 
-static bool __io_napi_do_busy_loop(struct io_ring_ctx *ctx,
-                                  bool (*loop_end)(void *, unsigned long),
-                                  void *loop_end_arg)
+/*
+ * never report stale entries
+ */
+static bool static_tracking_do_busy_loop(struct io_ring_ctx *ctx,
+                                        bool (*loop_end)(void *, unsigned long),
+                                        void *loop_end_arg)
+{
+       struct io_napi_entry *e;
+
+       list_for_each_entry_rcu(e, &ctx->napi_list, list)
+               napi_busy_loop_rcu(e->napi_id, loop_end, loop_end_arg,
+                                  ctx->napi_prefer_busy_poll, BUSY_POLL_BUDGET);
+       return false;
+}
+
+static bool
+dynamic_tracking_do_busy_loop(struct io_ring_ctx *ctx,
+                             bool (*loop_end)(void *, unsigned long),
+                             void *loop_end_arg)
 {
        struct io_napi_entry *e;
        bool is_stale = false;
@@ -154,6 +191,16 @@ static bool __io_napi_do_busy_loop(struct io_ring_ctx *ctx,
        return is_stale;
 }
 
+static inline bool
+__io_napi_do_busy_loop(struct io_ring_ctx *ctx,
+                      bool (*loop_end)(void *, unsigned long),
+                      void *loop_end_arg)
+{
+       if (READ_ONCE(ctx->napi_track_mode) == IO_URING_NAPI_TRACKING_STATIC)
+               return static_tracking_do_busy_loop(ctx, loop_end, loop_end_arg);
+       return dynamic_tracking_do_busy_loop(ctx, loop_end, loop_end_arg);
+}
+
 static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx,
                                       struct io_wait_queue *iowq)
 {
@@ -195,6 +242,7 @@ void io_napi_init(struct io_ring_ctx *ctx)
        spin_lock_init(&ctx->napi_lock);
        ctx->napi_prefer_busy_poll = false;
        ctx->napi_busy_poll_dt = ns_to_ktime(sys_dt);
+       ctx->napi_track_mode = IO_URING_NAPI_TRACKING_INACTIVE;
 }
 
 /*
@@ -215,6 +263,24 @@ void io_napi_free(struct io_ring_ctx *ctx)
        INIT_LIST_HEAD_RCU(&ctx->napi_list);
 }
 
+static int io_napi_register_napi(struct io_ring_ctx *ctx,
+                                struct io_uring_napi *napi)
+{
+       switch (napi->op_param) {
+       case IO_URING_NAPI_TRACKING_DYNAMIC:
+       case IO_URING_NAPI_TRACKING_STATIC:
+               break;
+       default:
+               return -EINVAL;
+       }
+       /* clean the napi list for new settings */
+       io_napi_free(ctx);
+       WRITE_ONCE(ctx->napi_track_mode, napi->op_param);
+       WRITE_ONCE(ctx->napi_busy_poll_dt, napi->busy_poll_to * NSEC_PER_USEC);
+       WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi->prefer_busy_poll);
+       return 0;
+}
+
 /*
  * io_napi_register() - Register napi with io-uring
  * @ctx: pointer to io-uring context structure
@@ -226,7 +292,8 @@ int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
 {
        const struct io_uring_napi curr = {
                .busy_poll_to     = ktime_to_us(ctx->napi_busy_poll_dt),
-               .prefer_busy_poll = ctx->napi_prefer_busy_poll
+               .prefer_busy_poll = ctx->napi_prefer_busy_poll,
+               .op_param         = ctx->napi_track_mode
        };
        struct io_uring_napi napi;
 
@@ -234,16 +301,26 @@ int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
                return -EINVAL;
        if (copy_from_user(&napi, arg, sizeof(napi)))
                return -EFAULT;
-       if (napi.pad[0] || napi.pad[1] || napi.pad[2] || napi.resv)
+       if (napi.pad[0] || napi.pad[1] || napi.resv)
                return -EINVAL;
 
        if (copy_to_user(arg, &curr, sizeof(curr)))
                return -EFAULT;
 
-       WRITE_ONCE(ctx->napi_busy_poll_dt, napi.busy_poll_to * NSEC_PER_USEC);
-       WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi.prefer_busy_poll);
-       WRITE_ONCE(ctx->napi_enabled, true);
-       return 0;
+       switch (napi.opcode) {
+       case IO_URING_NAPI_REGISTER_OP:
+               return io_napi_register_napi(ctx, &napi);
+       case IO_URING_NAPI_STATIC_ADD_ID:
+               if (curr.op_param != IO_URING_NAPI_TRACKING_STATIC)
+                       return -EINVAL;
+               return __io_napi_add_id(ctx, napi.op_param);
+       case IO_URING_NAPI_STATIC_DEL_ID:
+               if (curr.op_param != IO_URING_NAPI_TRACKING_STATIC)
+                       return -EINVAL;
+               return __io_napi_del_id(ctx, napi.op_param);
+       default:
+               return -EINVAL;
+       }
 }
 
 /*
@@ -266,7 +343,7 @@ int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
 
        WRITE_ONCE(ctx->napi_busy_poll_dt, 0);
        WRITE_ONCE(ctx->napi_prefer_busy_poll, false);
-       WRITE_ONCE(ctx->napi_enabled, false);
+       WRITE_ONCE(ctx->napi_track_mode, IO_URING_NAPI_TRACKING_INACTIVE);
        return 0;
 }
 
index 4ae622f37b302b859102cee303c30aca19aa34d7..fa742f42e09b472356ca549a146912506c98d0c9 100644 (file)
@@ -44,7 +44,7 @@ static inline void io_napi_add(struct io_kiocb *req)
        struct io_ring_ctx *ctx = req->ctx;
        struct socket *sock;
 
-       if (!READ_ONCE(ctx->napi_enabled))
+       if (READ_ONCE(ctx->napi_track_mode) != IO_URING_NAPI_TRACKING_DYNAMIC)
                return;
 
        sock = sock_from_file(req->file);