From: Alexander Aring Date: Wed, 23 Jul 2025 15:21:56 +0000 (-0400) Subject: dlm: add new flag DLM_RELEASE_RECOVER for dlm_lockspace_release X-Git-Tag: v6.18-rc1~222^2~4 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=6f4f4ca5caf73de5e86329547d4527b3e0c08488;p=thirdparty%2Flinux.git dlm: add new flag DLM_RELEASE_RECOVER for dlm_lockspace_release When dlm_lockspace_release() is passed DLM_RELEASE_RECOVER, it tells the dlm to handle the release/leave as if the node had failed, i.e. perform recovery steps for a failed node, like recover_slot(). When DLM_RELEASE_RECOVER is set: - dlm_release_lockspace() includes RELEASE_RECOVER=1 in the OFFLINE uevent sent to userspace. - userspace/dlm_controld sends a message to all lockspace members indicating that the subsequent node removal should be handled as if the node had failed. - when dlm_controld on all nodes receives the new message, it sets the release_recover configfs entry to 1 for the node. - when the dlm/kernel next performs recovery and removes the node, it will see that release_recover has been set, and will perform recovery steps for the node as if it had failed, e.g. the recover_slot() callback is called to notify the fs. Signed-off-by: Alexander Aring Signed-off-by: David Teigland --- diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 6ff666a511c7a..d986b7ef153dc 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -738,7 +738,7 @@ static int release_lockspace(struct dlm_ls *ls, int release_option) if (release_option != DLM_RELEASE_NO_EVENT && dlm_user_daemon_available()) - do_uevent(ls, 0, 0); + do_uevent(ls, 0, (release_option == DLM_RELEASE_RECOVER)); dlm_recoverd_stop(ls); diff --git a/fs/dlm/member.c b/fs/dlm/member.c index 152d2cb16f591..356337102015e 100644 --- a/fs/dlm/member.c +++ b/fs/dlm/member.c @@ -478,7 +478,8 @@ static void dlm_lsop_recover_prep(struct dlm_ls *ls) ls->ls_ops->recover_prep(ls->ls_ops_arg); } -static void dlm_lsop_recover_slot(struct dlm_ls *ls, struct dlm_member *memb) +static void dlm_lsop_recover_slot(struct dlm_ls *ls, struct dlm_member *memb, + unsigned int release_recover) { struct dlm_slot slot; uint32_t seq; @@ -495,7 +496,7 @@ static void dlm_lsop_recover_slot(struct dlm_ls *ls, struct dlm_member *memb) error = dlm_comm_seq(memb->nodeid, &seq, false); - if (!error && seq == memb->comm_seq) + if (!release_recover && !error && seq == memb->comm_seq) return; slot.nodeid = memb->nodeid; @@ -552,6 +553,7 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) struct dlm_member *memb, *safe; struct dlm_config_node *node; int i, error, neg = 0, low = -1; + unsigned int release_recover; /* previously removed members that we've not finished removing need to * count as a negative change so the "neg" recovery steps will happen @@ -572,8 +574,12 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) if (node && !node->new && !node->gone) continue; + release_recover = 0; + if (node->gone) { - log_rinfo(ls, "remove member %d", memb->nodeid); + release_recover = node->release_recover; + log_rinfo(ls, "remove member %d%s", memb->nodeid, + release_recover ? " (release_recover)" : ""); } else { /* removed and re-added */ log_rinfo(ls, "remove member %d comm_seq %u %u", @@ -584,7 +590,7 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) list_move(&memb->list, &ls->ls_nodes_gone); remove_remote_member(memb->nodeid); ls->ls_num_nodes--; - dlm_lsop_recover_slot(ls, memb); + dlm_lsop_recover_slot(ls, memb, release_recover); } /* add new members to ls_nodes */ diff --git a/include/linux/dlm.h b/include/linux/dlm.h index cc7a36244893d..108eb953eb181 100644 --- a/include/linux/dlm.h +++ b/include/linux/dlm.h @@ -103,11 +103,16 @@ int dlm_new_lockspace(const char *name, const char *cluster, * a leave event to the cluster manager, so other nodes will * not be notified that the node should be removed from the * list of lockspace members. + * + * DLM_RELEASE_RECOVER like DLM_RELEASE_NORMAL, but the remaining + * nodes will handle the removal of the node as if the node + * had failed, e.g. the recover_slot() callback would be used. */ #define DLM_RELEASE_NO_LOCKS 0 #define DLM_RELEASE_UNUSED 1 #define DLM_RELEASE_NORMAL 2 #define DLM_RELEASE_NO_EVENT 3 +#define DLM_RELEASE_RECOVER 4 /* * dlm_release_lockspace