]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
ceph: add manual reset debugfs control and tracepoints
authorAlex Markuze <amarkuze@redhat.com>
Thu, 7 May 2026 08:54:07 +0000 (08:54 +0000)
committerIlya Dryomov <idryomov@gmail.com>
Mon, 22 Jun 2026 20:45:05 +0000 (22:45 +0200)
Add the debugfs and trace plumbing used to trigger and observe
manual client reset.

The reset interface exposes a trigger file for operator-initiated
reset and a status file for tracking the most recent run.  The
tracepoints record scheduling, completion, and blocked caller
behavior so reset progress can be diagnosed from the client side.

debugfs layout under /sys/kernel/debug/ceph/<client>/reset/:
  trigger - write to initiate a manual reset
  status  - read to see the most recent reset result

The reset directory is cleaned up via debugfs_remove_recursive()
on the parent, so individual file dentries are not stored.

Tracepoints:
  ceph_client_reset_schedule  - reset queued
  ceph_client_reset_complete  - reset finished (success or failure)
  ceph_client_reset_blocked   - caller blocked waiting for reset
  ceph_client_reset_unblocked - caller unblocked after reset

All tracepoints use a null-safe access for monc.auth->global_id
to guard against early-init or late-teardown edge cases.

Signed-off-by: Alex Markuze <amarkuze@redhat.com>
Reviewed-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
Signed-off-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
fs/ceph/debugfs.c
fs/ceph/mds_client.c
fs/ceph/super.h
include/trace/events/ceph.h

index e2463f93cf6b5c90ae5e255c1d034f328fa37515..18eb5da03411b2ac13d6940ebbb17f437b90c7d4 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/seq_file.h>
 #include <linux/math64.h>
 #include <linux/ktime.h>
+#include <linux/uaccess.h>
 #include <linux/atomic.h>
 
 #include <linux/ceph/libceph.h>
@@ -392,6 +393,90 @@ static int status_show(struct seq_file *s, void *p)
        return 0;
 }
 
+static int reset_status_show(struct seq_file *s, void *p)
+{
+       struct ceph_fs_client *fsc = s->private;
+       struct ceph_mds_client *mdsc = fsc->mdsc;
+       struct ceph_client_reset_state *st;
+       u64 trigger = 0, success = 0, failure = 0;
+       unsigned long last_start = 0, last_finish = 0;
+       int last_errno = 0;
+       enum ceph_client_reset_phase phase = CEPH_CLIENT_RESET_IDLE;
+       bool drain_timed_out = false;
+       int sessions_reset = 0;
+       int blocked_requests = 0;
+       char reason[CEPH_CLIENT_RESET_REASON_LEN];
+
+       if (!mdsc)
+               return 0;
+
+       st = &mdsc->reset_state;
+
+       spin_lock(&st->lock);
+       trigger = st->trigger_count;
+       success = st->success_count;
+       failure = st->failure_count;
+       last_start = st->last_start;
+       last_finish = st->last_finish;
+       last_errno = st->last_errno;
+       phase = st->phase;
+       drain_timed_out = st->drain_timed_out;
+       sessions_reset = st->sessions_reset;
+       strscpy(reason, st->last_reason, sizeof(reason));
+       spin_unlock(&st->lock);
+
+       blocked_requests = atomic_read(&st->blocked_requests);
+
+       seq_printf(s, "phase: %s\n", ceph_reset_phase_name(phase));
+       seq_printf(s, "trigger_count: %llu\n", trigger);
+       seq_printf(s, "success_count: %llu\n", success);
+       seq_printf(s, "failure_count: %llu\n", failure);
+       if (last_start)
+               seq_printf(s, "last_start_ms_ago: %u\n",
+                          jiffies_to_msecs(jiffies - last_start));
+       else
+               seq_puts(s, "last_start_ms_ago: (never)\n");
+       if (last_finish)
+               seq_printf(s, "last_finish_ms_ago: %u\n",
+                          jiffies_to_msecs(jiffies - last_finish));
+       else
+               seq_puts(s, "last_finish_ms_ago: (never)\n");
+       seq_printf(s, "last_errno: %d\n", last_errno);
+       seq_printf(s, "last_reason: %s\n",
+                  reason[0] ? reason : "(none)");
+       seq_printf(s, "drain_timed_out: %s\n",
+                  drain_timed_out ? "yes" : "no");
+       seq_printf(s, "sessions_reset: %d\n", sessions_reset);
+       seq_printf(s, "blocked_requests: %d\n", blocked_requests);
+
+       return 0;
+}
+
+static ssize_t reset_trigger_write(struct file *file, const char __user *buf,
+                                  size_t len, loff_t *ppos)
+{
+       struct ceph_fs_client *fsc = file->private_data;
+       struct ceph_mds_client *mdsc = fsc->mdsc;
+       char reason[CEPH_CLIENT_RESET_REASON_LEN];
+       size_t copy;
+       int ret;
+
+       if (!mdsc)
+               return -ENODEV;
+
+       copy = min_t(size_t, len, sizeof(reason) - 1);
+       if (copy && copy_from_user(reason, buf, copy))
+               return -EFAULT;
+       reason[copy] = '\0';
+       strim(reason);
+
+       ret = ceph_mdsc_schedule_reset(mdsc, reason);
+       if (ret)
+               return ret;
+
+       return len;
+}
+
 static int subvolume_metrics_show(struct seq_file *s, void *p)
 {
        struct ceph_fs_client *fsc = s->private;
@@ -450,6 +535,7 @@ DEFINE_SHOW_ATTRIBUTE(mdsc);
 DEFINE_SHOW_ATTRIBUTE(caps);
 DEFINE_SHOW_ATTRIBUTE(mds_sessions);
 DEFINE_SHOW_ATTRIBUTE(status);
+DEFINE_SHOW_ATTRIBUTE(reset_status);
 DEFINE_SHOW_ATTRIBUTE(metrics_file);
 DEFINE_SHOW_ATTRIBUTE(metrics_latency);
 DEFINE_SHOW_ATTRIBUTE(metrics_size);
@@ -521,6 +607,13 @@ static int metric_features_show(struct seq_file *s, void *p)
 
 DEFINE_SHOW_ATTRIBUTE(metric_features);
 
+static const struct file_operations ceph_reset_trigger_fops = {
+       .owner = THIS_MODULE,
+       .open = simple_open,
+       .write = reset_trigger_write,
+       .llseek = noop_llseek,
+};
+
 /*
  * debugfs
  */
@@ -554,6 +647,7 @@ void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
        debugfs_remove(fsc->debugfs_caps);
        debugfs_remove(fsc->debugfs_status);
        debugfs_remove(fsc->debugfs_mdsc);
+       debugfs_remove_recursive(fsc->debugfs_reset_dir);
        debugfs_remove(fsc->debugfs_subvolume_metrics);
        debugfs_remove_recursive(fsc->debugfs_metrics_dir);
        doutc(fsc->client, "done\n");
@@ -602,6 +696,15 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
                                                fsc,
                                                &caps_fops);
 
+       fsc->debugfs_reset_dir = debugfs_create_dir("reset",
+                                                   fsc->client->debugfs_dir);
+       debugfs_create_file("trigger", 0200,
+                           fsc->debugfs_reset_dir, fsc,
+                           &ceph_reset_trigger_fops);
+       debugfs_create_file("status", 0400,
+                           fsc->debugfs_reset_dir, fsc,
+                           &reset_status_fops);
+
        fsc->debugfs_status = debugfs_create_file("status",
                                                  0400,
                                                  fsc->client->debugfs_dir,
index ddafbd6917b023297687818bcd4a4abec68d83e5..9f84ef2ac6e454f986428c1da7a02a229269f223 100644 (file)
@@ -5325,6 +5325,7 @@ int ceph_mdsc_wait_for_reset(struct ceph_mds_client *mdsc)
        blocked_count = atomic_inc_return(&st->blocked_requests);
        doutc(cl, "request blocked during reset, %d total blocked\n",
              blocked_count);
+       trace_ceph_client_reset_blocked(mdsc, blocked_count);
 
 retry:
        remaining = max_t(long, deadline - jiffies, 1);
@@ -5335,10 +5336,12 @@ retry:
        if (wait_ret == 0) {
                atomic_dec(&st->blocked_requests);
                pr_warn_client(cl, "timed out waiting for reset to complete\n");
+               trace_ceph_client_reset_unblocked(mdsc, -ETIMEDOUT);
                return -ETIMEDOUT;
        }
        if (wait_ret < 0) {
                atomic_dec(&st->blocked_requests);
+               trace_ceph_client_reset_unblocked(mdsc, (int)wait_ret);
                return (int)wait_ret;  /* -ERESTARTSYS */
        }
 
@@ -5353,12 +5356,14 @@ retry:
                if (time_before(jiffies, deadline))
                        goto retry;
                atomic_dec(&st->blocked_requests);
+               trace_ceph_client_reset_unblocked(mdsc, -ETIMEDOUT);
                return -ETIMEDOUT;
        }
        ret = st->last_errno;
        spin_unlock(&st->lock);
 
        atomic_dec(&st->blocked_requests);
+       trace_ceph_client_reset_unblocked(mdsc, ret);
        return ret ? -EAGAIN : 0;
 }
 
@@ -5388,6 +5393,7 @@ static void ceph_mdsc_reset_complete(struct ceph_mds_client *mdsc, int ret)
        /* Wake up all requests that were blocked waiting for reset */
        wake_up_all(&st->blocked_wq);
 
+       trace_ceph_client_reset_complete(mdsc, ret);
 }
 
 static void ceph_mdsc_reset_workfn(struct work_struct *work)
@@ -5750,6 +5756,7 @@ int ceph_mdsc_schedule_reset(struct ceph_mds_client *mdsc,
        pr_info_client(mdsc->fsc->client,
                       "manual session reset scheduled (reason=\"%s\")\n",
                       msg);
+       trace_ceph_client_reset_schedule(mdsc, msg);
        return 0;
 }
 
index a4993644d543de6c0c7e6ee84860a29574c12c95..1d6aab060780aae8339313ea2d605a6cf500cf40 100644 (file)
@@ -179,6 +179,7 @@ struct ceph_fs_client {
        struct dentry *debugfs_status;
        struct dentry *debugfs_mds_sessions;
        struct dentry *debugfs_metrics_dir;
+       struct dentry *debugfs_reset_dir;
        struct dentry *debugfs_subvolume_metrics;
 #endif
 
index 08cb0659fbfc78585566eb913851cc939c458de5..1b990632f62baa54b9da2d1e8a43d2a68e22bb1f 100644 (file)
@@ -226,6 +226,73 @@ TRACE_EVENT(ceph_handle_caps,
                  __entry->mseq)
 );
 
+/*
+ * Client reset tracepoints - identify the client by its monitor-
+ * assigned global_id so traces remain meaningful when kernel pointer
+ * hashing is enabled.
+ */
+TRACE_EVENT(ceph_client_reset_schedule,
+       TP_PROTO(const struct ceph_mds_client *mdsc, const char *reason),
+       TP_ARGS(mdsc, reason),
+       TP_STRUCT__entry(
+               __field(u64, client_id)
+               __string(reason, reason ? reason : "")
+       ),
+       TP_fast_assign(
+               __entry->client_id = mdsc->fsc->client->monc.auth ?
+                       mdsc->fsc->client->monc.auth->global_id : 0;
+               __assign_str(reason);
+       ),
+       TP_printk("client_id=%llu reason=%s",
+                 __entry->client_id, __get_str(reason))
+);
+
+TRACE_EVENT(ceph_client_reset_complete,
+       TP_PROTO(const struct ceph_mds_client *mdsc, int ret),
+       TP_ARGS(mdsc, ret),
+       TP_STRUCT__entry(
+               __field(u64, client_id)
+               __field(int, ret)
+       ),
+       TP_fast_assign(
+               __entry->client_id = mdsc->fsc->client->monc.auth ?
+                       mdsc->fsc->client->monc.auth->global_id : 0;
+               __entry->ret = ret;
+       ),
+       TP_printk("client_id=%llu ret=%d", __entry->client_id, __entry->ret)
+);
+
+TRACE_EVENT(ceph_client_reset_blocked,
+       TP_PROTO(const struct ceph_mds_client *mdsc, int blocked_count),
+       TP_ARGS(mdsc, blocked_count),
+       TP_STRUCT__entry(
+               __field(u64, client_id)
+               __field(int, blocked_count)
+       ),
+       TP_fast_assign(
+               __entry->client_id = mdsc->fsc->client->monc.auth ?
+                       mdsc->fsc->client->monc.auth->global_id : 0;
+               __entry->blocked_count = blocked_count;
+       ),
+       TP_printk("client_id=%llu blocked_count=%d", __entry->client_id,
+                 __entry->blocked_count)
+);
+
+TRACE_EVENT(ceph_client_reset_unblocked,
+       TP_PROTO(const struct ceph_mds_client *mdsc, int ret),
+       TP_ARGS(mdsc, ret),
+       TP_STRUCT__entry(
+               __field(u64, client_id)
+               __field(int, ret)
+       ),
+       TP_fast_assign(
+               __entry->client_id = mdsc->fsc->client->monc.auth ?
+                       mdsc->fsc->client->monc.auth->global_id : 0;
+               __entry->ret = ret;
+       ),
+       TP_printk("client_id=%llu ret=%d", __entry->client_id, __entry->ret)
+);
+
 #undef EM
 #undef E_
 #endif /* _TRACE_CEPH_H */