From: Alex Markuze Date: Thu, 7 May 2026 08:54:07 +0000 (+0000) Subject: ceph: add manual reset debugfs control and tracepoints X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=7e1f9e2cd2d0e780c394a4402c40e125109fec72;p=thirdparty%2Fkernel%2Flinux.git ceph: add manual reset debugfs control and tracepoints Add the debugfs and trace plumbing used to trigger and observe manual client reset. The reset interface exposes a trigger file for operator-initiated reset and a status file for tracking the most recent run. The tracepoints record scheduling, completion, and blocked caller behavior so reset progress can be diagnosed from the client side. debugfs layout under /sys/kernel/debug/ceph//reset/: trigger - write to initiate a manual reset status - read to see the most recent reset result The reset directory is cleaned up via debugfs_remove_recursive() on the parent, so individual file dentries are not stored. Tracepoints: ceph_client_reset_schedule - reset queued ceph_client_reset_complete - reset finished (success or failure) ceph_client_reset_blocked - caller blocked waiting for reset ceph_client_reset_unblocked - caller unblocked after reset All tracepoints use a null-safe access for monc.auth->global_id to guard against early-init or late-teardown edge cases. Signed-off-by: Alex Markuze Reviewed-by: Viacheslav Dubeyko Signed-off-by: Viacheslav Dubeyko Signed-off-by: Ilya Dryomov --- diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index e2463f93cf6b5..18eb5da03411b 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -392,6 +393,90 @@ static int status_show(struct seq_file *s, void *p) return 0; } +static int reset_status_show(struct seq_file *s, void *p) +{ + struct ceph_fs_client *fsc = s->private; + struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_client_reset_state *st; + u64 trigger = 0, success = 0, failure = 0; + unsigned long last_start = 0, last_finish = 0; + int last_errno = 0; + enum ceph_client_reset_phase phase = CEPH_CLIENT_RESET_IDLE; + bool drain_timed_out = false; + int sessions_reset = 0; + int blocked_requests = 0; + char reason[CEPH_CLIENT_RESET_REASON_LEN]; + + if (!mdsc) + return 0; + + st = &mdsc->reset_state; + + spin_lock(&st->lock); + trigger = st->trigger_count; + success = st->success_count; + failure = st->failure_count; + last_start = st->last_start; + last_finish = st->last_finish; + last_errno = st->last_errno; + phase = st->phase; + drain_timed_out = st->drain_timed_out; + sessions_reset = st->sessions_reset; + strscpy(reason, st->last_reason, sizeof(reason)); + spin_unlock(&st->lock); + + blocked_requests = atomic_read(&st->blocked_requests); + + seq_printf(s, "phase: %s\n", ceph_reset_phase_name(phase)); + seq_printf(s, "trigger_count: %llu\n", trigger); + seq_printf(s, "success_count: %llu\n", success); + seq_printf(s, "failure_count: %llu\n", failure); + if (last_start) + seq_printf(s, "last_start_ms_ago: %u\n", + jiffies_to_msecs(jiffies - last_start)); + else + seq_puts(s, "last_start_ms_ago: (never)\n"); + if (last_finish) + seq_printf(s, "last_finish_ms_ago: %u\n", + jiffies_to_msecs(jiffies - last_finish)); + else + seq_puts(s, "last_finish_ms_ago: (never)\n"); + seq_printf(s, "last_errno: %d\n", last_errno); + seq_printf(s, "last_reason: %s\n", + reason[0] ? reason : "(none)"); + seq_printf(s, "drain_timed_out: %s\n", + drain_timed_out ? "yes" : "no"); + seq_printf(s, "sessions_reset: %d\n", sessions_reset); + seq_printf(s, "blocked_requests: %d\n", blocked_requests); + + return 0; +} + +static ssize_t reset_trigger_write(struct file *file, const char __user *buf, + size_t len, loff_t *ppos) +{ + struct ceph_fs_client *fsc = file->private_data; + struct ceph_mds_client *mdsc = fsc->mdsc; + char reason[CEPH_CLIENT_RESET_REASON_LEN]; + size_t copy; + int ret; + + if (!mdsc) + return -ENODEV; + + copy = min_t(size_t, len, sizeof(reason) - 1); + if (copy && copy_from_user(reason, buf, copy)) + return -EFAULT; + reason[copy] = '\0'; + strim(reason); + + ret = ceph_mdsc_schedule_reset(mdsc, reason); + if (ret) + return ret; + + return len; +} + static int subvolume_metrics_show(struct seq_file *s, void *p) { struct ceph_fs_client *fsc = s->private; @@ -450,6 +535,7 @@ DEFINE_SHOW_ATTRIBUTE(mdsc); DEFINE_SHOW_ATTRIBUTE(caps); DEFINE_SHOW_ATTRIBUTE(mds_sessions); DEFINE_SHOW_ATTRIBUTE(status); +DEFINE_SHOW_ATTRIBUTE(reset_status); DEFINE_SHOW_ATTRIBUTE(metrics_file); DEFINE_SHOW_ATTRIBUTE(metrics_latency); DEFINE_SHOW_ATTRIBUTE(metrics_size); @@ -521,6 +607,13 @@ static int metric_features_show(struct seq_file *s, void *p) DEFINE_SHOW_ATTRIBUTE(metric_features); +static const struct file_operations ceph_reset_trigger_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = reset_trigger_write, + .llseek = noop_llseek, +}; + /* * debugfs */ @@ -554,6 +647,7 @@ void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc) debugfs_remove(fsc->debugfs_caps); debugfs_remove(fsc->debugfs_status); debugfs_remove(fsc->debugfs_mdsc); + debugfs_remove_recursive(fsc->debugfs_reset_dir); debugfs_remove(fsc->debugfs_subvolume_metrics); debugfs_remove_recursive(fsc->debugfs_metrics_dir); doutc(fsc->client, "done\n"); @@ -602,6 +696,15 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc) fsc, &caps_fops); + fsc->debugfs_reset_dir = debugfs_create_dir("reset", + fsc->client->debugfs_dir); + debugfs_create_file("trigger", 0200, + fsc->debugfs_reset_dir, fsc, + &ceph_reset_trigger_fops); + debugfs_create_file("status", 0400, + fsc->debugfs_reset_dir, fsc, + &reset_status_fops); + fsc->debugfs_status = debugfs_create_file("status", 0400, fsc->client->debugfs_dir, diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index ddafbd6917b02..9f84ef2ac6e45 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -5325,6 +5325,7 @@ int ceph_mdsc_wait_for_reset(struct ceph_mds_client *mdsc) blocked_count = atomic_inc_return(&st->blocked_requests); doutc(cl, "request blocked during reset, %d total blocked\n", blocked_count); + trace_ceph_client_reset_blocked(mdsc, blocked_count); retry: remaining = max_t(long, deadline - jiffies, 1); @@ -5335,10 +5336,12 @@ retry: if (wait_ret == 0) { atomic_dec(&st->blocked_requests); pr_warn_client(cl, "timed out waiting for reset to complete\n"); + trace_ceph_client_reset_unblocked(mdsc, -ETIMEDOUT); return -ETIMEDOUT; } if (wait_ret < 0) { atomic_dec(&st->blocked_requests); + trace_ceph_client_reset_unblocked(mdsc, (int)wait_ret); return (int)wait_ret; /* -ERESTARTSYS */ } @@ -5353,12 +5356,14 @@ retry: if (time_before(jiffies, deadline)) goto retry; atomic_dec(&st->blocked_requests); + trace_ceph_client_reset_unblocked(mdsc, -ETIMEDOUT); return -ETIMEDOUT; } ret = st->last_errno; spin_unlock(&st->lock); atomic_dec(&st->blocked_requests); + trace_ceph_client_reset_unblocked(mdsc, ret); return ret ? -EAGAIN : 0; } @@ -5388,6 +5393,7 @@ static void ceph_mdsc_reset_complete(struct ceph_mds_client *mdsc, int ret) /* Wake up all requests that were blocked waiting for reset */ wake_up_all(&st->blocked_wq); + trace_ceph_client_reset_complete(mdsc, ret); } static void ceph_mdsc_reset_workfn(struct work_struct *work) @@ -5750,6 +5756,7 @@ int ceph_mdsc_schedule_reset(struct ceph_mds_client *mdsc, pr_info_client(mdsc->fsc->client, "manual session reset scheduled (reason=\"%s\")\n", msg); + trace_ceph_client_reset_schedule(mdsc, msg); return 0; } diff --git a/fs/ceph/super.h b/fs/ceph/super.h index a4993644d543d..1d6aab060780a 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -179,6 +179,7 @@ struct ceph_fs_client { struct dentry *debugfs_status; struct dentry *debugfs_mds_sessions; struct dentry *debugfs_metrics_dir; + struct dentry *debugfs_reset_dir; struct dentry *debugfs_subvolume_metrics; #endif diff --git a/include/trace/events/ceph.h b/include/trace/events/ceph.h index 08cb0659fbfc7..1b990632f62ba 100644 --- a/include/trace/events/ceph.h +++ b/include/trace/events/ceph.h @@ -226,6 +226,73 @@ TRACE_EVENT(ceph_handle_caps, __entry->mseq) ); +/* + * Client reset tracepoints - identify the client by its monitor- + * assigned global_id so traces remain meaningful when kernel pointer + * hashing is enabled. + */ +TRACE_EVENT(ceph_client_reset_schedule, + TP_PROTO(const struct ceph_mds_client *mdsc, const char *reason), + TP_ARGS(mdsc, reason), + TP_STRUCT__entry( + __field(u64, client_id) + __string(reason, reason ? reason : "") + ), + TP_fast_assign( + __entry->client_id = mdsc->fsc->client->monc.auth ? + mdsc->fsc->client->monc.auth->global_id : 0; + __assign_str(reason); + ), + TP_printk("client_id=%llu reason=%s", + __entry->client_id, __get_str(reason)) +); + +TRACE_EVENT(ceph_client_reset_complete, + TP_PROTO(const struct ceph_mds_client *mdsc, int ret), + TP_ARGS(mdsc, ret), + TP_STRUCT__entry( + __field(u64, client_id) + __field(int, ret) + ), + TP_fast_assign( + __entry->client_id = mdsc->fsc->client->monc.auth ? + mdsc->fsc->client->monc.auth->global_id : 0; + __entry->ret = ret; + ), + TP_printk("client_id=%llu ret=%d", __entry->client_id, __entry->ret) +); + +TRACE_EVENT(ceph_client_reset_blocked, + TP_PROTO(const struct ceph_mds_client *mdsc, int blocked_count), + TP_ARGS(mdsc, blocked_count), + TP_STRUCT__entry( + __field(u64, client_id) + __field(int, blocked_count) + ), + TP_fast_assign( + __entry->client_id = mdsc->fsc->client->monc.auth ? + mdsc->fsc->client->monc.auth->global_id : 0; + __entry->blocked_count = blocked_count; + ), + TP_printk("client_id=%llu blocked_count=%d", __entry->client_id, + __entry->blocked_count) +); + +TRACE_EVENT(ceph_client_reset_unblocked, + TP_PROTO(const struct ceph_mds_client *mdsc, int ret), + TP_ARGS(mdsc, ret), + TP_STRUCT__entry( + __field(u64, client_id) + __field(int, ret) + ), + TP_fast_assign( + __entry->client_id = mdsc->fsc->client->monc.auth ? + mdsc->fsc->client->monc.auth->global_id : 0; + __entry->ret = ret; + ), + TP_printk("client_id=%llu ret=%d", __entry->client_id, __entry->ret) +); + #undef EM #undef E_ #endif /* _TRACE_CEPH_H */