From: David Tardon Date: Tue, 24 Apr 2018 13:19:38 +0000 (+0200) Subject: fix race between daemon-reload and other commands X-Git-Tag: v239~234 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=a7a7163df7fc8a9f794f6803b2f6c9c9b0745a1f;p=thirdparty%2Fsystemd.git fix race between daemon-reload and other commands When "systemctl daemon-reload" is run at the same time as "systemctl start foo", the latter might hang. That's because commands like start wait for JobRemoved signal to know when the job is finished. But if the job is finished during reloading, the signal is never sent. The hang can be easily reproduced by running # for ((N=1; N>0; N++)) ; do echo $N ; systemctl daemon-reload ; done # for ((N=1; N>0; N++)) ; do echo $N ; systemctl start systemd-coredump.socket ; done in two different terminals. The start command will hang after 1-2 iterations. This keeps track of jobs that were started before reload and finished during it and sends JobRemoved after the reload has finished. --- diff --git a/src/core/job.c b/src/core/job.c index 32f6fc1a50f..a0f7685c4e1 100644 --- a/src/core/job.c +++ b/src/core/job.c @@ -43,6 +43,7 @@ Job* job_new_raw(Unit *unit) { j->manager = unit->manager; j->unit = unit; j->type = _JOB_TYPE_INVALID; + j->reloaded = false; return j; } @@ -64,7 +65,7 @@ Job* job_new(Unit *unit, JobType type) { return j; } -void job_free(Job *j) { +void job_unlink(Job *j) { assert(j); assert(!j->installed); assert(!j->transaction_prev); @@ -72,16 +73,33 @@ void job_free(Job *j) { assert(!j->subject_list); assert(!j->object_list); - if (j->in_run_queue) + if (j->in_run_queue) { LIST_REMOVE(run_queue, j->manager->run_queue, j); + j->in_run_queue = false; + } - if (j->in_dbus_queue) + if (j->in_dbus_queue) { LIST_REMOVE(dbus_queue, j->manager->dbus_job_queue, j); + j->in_dbus_queue = false; + } - if (j->in_gc_queue) + if (j->in_gc_queue) { LIST_REMOVE(gc_queue, j->manager->gc_job_queue, j); + j->in_gc_queue = false; + } - sd_event_source_unref(j->timer_event_source); + j->timer_event_source = sd_event_source_unref(j->timer_event_source); +} + +void job_free(Job *j) { + assert(j); + assert(!j->installed); + assert(!j->transaction_prev); + assert(!j->transaction_next); + assert(!j->subject_list); + assert(!j->object_list); + + job_unlink(j); sd_bus_track_unref(j->bus_track); strv_free(j->deserialized_clients); @@ -241,6 +259,7 @@ int job_install_deserialized(Job *j) { *pj = j; j->installed = true; + j->reloaded = true; if (j->state == JOB_RUNNING) j->unit->manager->n_running_jobs++; @@ -844,6 +863,19 @@ static void job_fail_dependencies(Unit *u, UnitDependency d) { } } +static int job_save_pending_finished_job(Job *j) { + int r; + + assert(j); + + r = set_ensure_allocated(&j->manager->pending_finished_jobs, NULL); + if (r < 0) + return r; + + job_unlink(j); + return set_put(j->manager->pending_finished_jobs, j); +} + int job_finish_and_invalidate(Job *j, JobResult result, bool recursive, bool already) { Unit *u; Unit *other; @@ -883,7 +915,12 @@ int job_finish_and_invalidate(Job *j, JobResult result, bool recursive, bool alr j->manager->n_failed_jobs++; job_uninstall(j); - job_free(j); + /* Remember jobs started before the reload */ + if (MANAGER_IS_RELOADING(j->manager) && j->reloaded) { + if (job_save_pending_finished_job(j) < 0) + job_free(j); + } else + job_free(j); /* Fail depending jobs on failure */ if (result != JOB_DONE && recursive) { diff --git a/src/core/job.h b/src/core/job.h index ccb8e1b6741..338670e3935 100644 --- a/src/core/job.h +++ b/src/core/job.h @@ -162,10 +162,12 @@ struct Job { bool irreversible:1; bool in_gc_queue:1; bool ref_by_private_bus:1; + bool reloaded:1; }; Job* job_new(Unit *unit, JobType type); Job* job_new_raw(Unit *unit); +void job_unlink(Job *job); void job_free(Job *job); Job* job_install(Job *j); int job_install_deserialized(Job *j); diff --git a/src/core/manager.c b/src/core/manager.c index 204fc8b819d..b4f197c2049 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -3186,6 +3186,17 @@ finish: return r; } +static void manager_flush_finished_jobs(Manager *m) { + Job *j; + + while ((j = set_steal_first(m->pending_finished_jobs))) { + bus_job_send_removed_signal(j); + job_free(j); + } + + m->pending_finished_jobs = set_free(m->pending_finished_jobs); +} + int manager_reload(Manager *m) { int r, q; _cleanup_fclose_ FILE *f = NULL; @@ -3294,6 +3305,9 @@ int manager_reload(Manager *m) { if (q < 0 && r >= 0) r = q; + if (!MANAGER_IS_RELOADING(m)) + manager_flush_finished_jobs(m); + m->send_reloading_done = true; return r; diff --git a/src/core/manager.h b/src/core/manager.h index f41cce1c09e..1f97c15365c 100644 --- a/src/core/manager.h +++ b/src/core/manager.h @@ -300,6 +300,9 @@ struct Manager { /* non-zero if we are reloading or reexecuting, */ int n_reloading; + /* A set which contains all jobs that started before reload and finished + * during it */ + Set *pending_finished_jobs; unsigned n_installed_jobs; unsigned n_failed_jobs;