]> git.ipfire.org Git - thirdparty/tor.git/commitdiff
Revert "Rework of worker threads' start and exit + slight changes in cleanup code"
authorDavid Goulet <dgoulet@torproject.org>
Tue, 3 Dec 2024 15:06:25 +0000 (10:06 -0500)
committerDavid Goulet <dgoulet@torproject.org>
Tue, 3 Dec 2024 15:06:25 +0000 (10:06 -0500)
This reverts commit 4f3aafa1e422e9aa005b74c8a1d40cc3e9143346.

Once merged upstream, the CI failed here with:

Warning: Failed to confirm worker threads' start up after timeout.
Warning: Bug:     /builds/tpo/core/tor/build/src/app/tor(_start+0x2a) [0x56404d21ddaa] (on Tor 0.4.9.0-alpha-dev 4f3aafa1e422e9aa)
Warning: Bug:     /builds/tpo/core/tor/build/src/app/tor(cpuworker_init+0x53) [0x56404d373d53] (on Tor 0.4.9.0-alpha-dev 4f3aafa1e422e9aa)
Warning: Bug:     /builds/tpo/core/tor/build/src/app/tor(log_backtrace_impl+0x57) [0x56404d29e1f7] (on Tor 0.4.9.0-alpha-dev 4f3aafa1e422e9aa)
Warning: Bug:     /builds/tpo/core/tor/build/src/app/tor(main+0x19) [0x56404d21dd59] (on Tor 0.4.9.0-alpha-dev 4f3aafa1e422e9aa)
Warning: Bug:     /builds/tpo/core/tor/build/src/app/tor(run_tor_main_loop+0xce) [0x56404d22188e] (on Tor 0.4.9.0-alpha-dev 4f3aafa1e422e9aa)
Warning: Bug:     /builds/tpo/core/tor/build/src/app/tor(threadpool_new+0x295) [0x56404d3e28f5] (on Tor 0.4.9.0-alpha-dev 4f3aafa1e422e9aa)
Warning: Bug:     /builds/tpo/core/tor/build/src/app/tor(tor_assertion_failed_+0x148) [0x56404d2a9248] (on Tor 0.4.9.0-alpha-dev 4f3aafa1e422e9aa)
Warning: Bug:     /builds/tpo/core/tor/build/src/app/tor(tor_main+0x49) [0x56404d21e179] (on Tor 0.4.9.0-alpha-dev 4f3aafa1e422e9aa)
Warning: Bug:     /builds/tpo/core/tor/build/src/app/tor(tor_run_main+0x1e5) [0x56404d221db5] (on Tor 0.4.9.0-alpha-dev 4f3aafa1e422e9aa)
Warning: Bug:     /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xea) [0x7f6aa3b1cd7a] (on Tor 0.4.9.0-alpha-dev 4f3aafa1e422e9aa)
Warning: Bug: Tor 0.4.9.0-alpha-dev (git-4f3aafa1e422e9aa): Assertion 0 failed in threadpool_new at /builds/tpo/core/tor/src/lib/evloop/workqueue.c:641: . Stack trace: (on Tor 0.4.9.0-alpha-dev 4f3aafa1e422e9aa)

We are figuring it out but revert this so we can release an alpha.

changes/ticket40991 [deleted file]
src/lib/evloop/workqueue.c

diff --git a/changes/ticket40991 b/changes/ticket40991
deleted file mode 100644 (file)
index 3ee26ef..0000000
+++ /dev/null
@@ -1,4 +0,0 @@
-  o Minor bugfixes (threads, memory):
-    - Rework start and exit of worker threads.
-      Improvements in cleanup of resources used by threads.
-      Fixes bug 40991; bugfix on 0.4.8.13.
index e61c838b22e53abd4eadff576ac1fbb52a1e5a2e..17ab44e3ab08cbe00ed98be2ae5f4032ddbb9468 100644 (file)
@@ -78,8 +78,6 @@ struct threadpool_t {
 
   /** Number of elements in threads. */
   int n_threads;
-  /** Number of elements to be created in threads. */
-  int n_threads_max;
   /** Mutex to protect all the above fields. */
   tor_mutex_t lock;
 
@@ -90,11 +88,6 @@ struct threadpool_t {
   void *(*new_thread_state_fn)(void*);
   void (*free_thread_state_fn)(void*);
   void *new_thread_state_arg;
-
-  /** Used for signalling the worker threads to exit. */
-  int exit;
-  /** Mutex for controlling worker threads' startup and exit. */
-  tor_mutex_t control_lock;
 };
 
 /** Used to put a workqueue_priority_t value into a bitfield. */
@@ -277,25 +270,13 @@ worker_thread_extract_next_work(workerthread_t *thread)
 static void
 worker_thread_main(void *thread_)
 {
-  static int n_worker_threads_running = 0;
   workerthread_t *thread = thread_;
   threadpool_t *pool = thread->in_pool;
   workqueue_entry_t *work;
   workqueue_reply_t result;
 
   tor_mutex_acquire(&pool->lock);
-  log_debug(LD_GENERAL, "Worker thread (TID: %lu) has started.",
-            tor_get_thread_id());
-
-  if (++n_worker_threads_running == pool->n_threads_max)
-    /* Let the main thread know, the last worker thread has started. */
-    tor_cond_signal_one(&pool->condition);
-
   while (1) {
-    /* Exit thread when signaled to exit */
-    if (pool->exit)
-      break;
-
     /* lock must be held at this point. */
     while (worker_thread_has_work(thread)) {
       /* lock must be held at this point. */
@@ -310,7 +291,7 @@ worker_thread_main(void *thread_)
         workqueue_reply_t r = update_fn(thread->state, arg);
 
         if (r != WQ_RPL_REPLY) {
-          break;
+          return;
         }
 
         tor_mutex_acquire(&pool->lock);
@@ -330,7 +311,7 @@ worker_thread_main(void *thread_)
 
       /* We may need to exit the thread. */
       if (result != WQ_RPL_REPLY) {
-        break;
+        return;
       }
       tor_mutex_acquire(&pool->lock);
     }
@@ -344,14 +325,6 @@ worker_thread_main(void *thread_)
       log_warn(LD_GENERAL, "Fail tor_cond_wait.");
     }
   }
-
-  log_debug(LD_GENERAL, "Worker thread (TID: %lu) has exited.",
-            tor_get_thread_id());
-
-  if (--n_worker_threads_running == 0)
-    tor_mutex_release(&pool->control_lock);
-
-  tor_mutex_release(&pool->lock);
 }
 
 /** Put a reply on the reply queue.  The reply must not currently be on
@@ -549,9 +522,6 @@ threadpool_start_threads(threadpool_t *pool, int n)
     pool->threads = tor_reallocarray(pool->threads,
                                      sizeof(workerthread_t*), n);
 
-  pool->n_threads_max = n;
-  log_debug(LD_GENERAL, "Starting worker threads...");
-
   while (pool->n_threads < n) {
     /* For half of our threads, we'll choose lower priorities permissively;
      * for the other half, we'll stick more strictly to higher priorities.
@@ -573,36 +543,9 @@ threadpool_start_threads(threadpool_t *pool, int n)
     thr->index = pool->n_threads;
     pool->threads[pool->n_threads++] = thr;
   }
-
   tor_mutex_release(&pool->lock);
-  tor_mutex_acquire(&pool->control_lock);
-
-  struct timeval tv = {.tv_sec = 30, .tv_usec = 0};
-
-  /* Wait for the last launched thread to confirm us, it has started.
-   * Wait max 30 seconds */
-  switch (tor_cond_wait(&pool->condition, &pool->control_lock, &tv)) {
-  case -1:
-    log_warn(LD_GENERAL, "Failed to confirm worker threads' start up.");
-    goto error;
-  case 1:
-    log_warn(LD_GENERAL, "Failed to confirm worker threads' "
-                         "start up after timeout.");
-    goto error;
-  case 0:
-    log_debug(LD_GENERAL, "Starting worker threads finished.");
-    break;
-  default:;
-  }
-
-  /* On success control lock stays locked. This is required for the
-   * main thread to wait for the worker threads to exit on shutdown. */
 
   return 0;
-
-error:
-  tor_mutex_release(&pool->control_lock);
-  return -1;
 }
 
 /**
@@ -623,9 +566,6 @@ threadpool_new(int n_threads,
   pool = tor_malloc_zero(sizeof(threadpool_t));
   tor_mutex_init_nonrecursive(&pool->lock);
   tor_cond_init(&pool->condition);
-  tor_mutex_init_nonrecursive(&pool->control_lock);
-  pool->exit = 0;
-
   unsigned i;
   for (i = WORKQUEUE_PRIORITY_FIRST; i <= WORKQUEUE_PRIORITY_LAST; ++i) {
     TOR_TAILQ_INIT(&pool->work[i]);
@@ -639,6 +579,8 @@ threadpool_new(int n_threads,
   if (threadpool_start_threads(pool, n_threads) < 0) {
     //LCOV_EXCL_START
     tor_assert_nonfatal_unreached();
+    tor_cond_uninit(&pool->condition);
+    tor_mutex_uninit(&pool->lock);
     threadpool_free(pool);
     return NULL;
     //LCOV_EXCL_STOP
@@ -656,32 +598,6 @@ threadpool_free_(threadpool_t *pool)
   if (!pool)
     return;
 
-  tor_mutex_acquire(&pool->lock);
-  /* Signal the worker threads to exit */
-  pool->exit = 1;
-  /* If worker threads are waiting for work, let them continue to exit */
-  tor_cond_signal_all(&pool->condition);
-
-  log_debug(LD_GENERAL, "Signaled worker threads to exit. "
-                        "Waiting for them to exit...");
-  tor_mutex_release(&pool->lock);
-
-  /* Wait until all worker threads have exited.
-   * pool->control_lock must be prelocked here. */
-  tor_mutex_acquire(&pool->control_lock);
-  /* Unlock required, else main thread hangs on
-   * tor_mutex_uninit(&pool->control_lock) */
-  tor_mutex_release(&pool->control_lock);
-
-  /* If this message appears in the log before all threads have confirmed
-   * their exit, then pool->control_lock wasn't prelocked for some reason. */
-  log_debug(LD_GENERAL, "All worker threads have exited. "
-                        "Beginning to clean up...");
-
-  tor_cond_uninit(&pool->condition);
-  tor_mutex_uninit(&pool->lock);
-  tor_mutex_uninit(&pool->control_lock);
-
   if (pool->threads) {
     for (int i = 0; i != pool->n_threads; ++i)
       workerthread_free(pool->threads[i]);
@@ -689,35 +605,21 @@ threadpool_free_(threadpool_t *pool)
     tor_free(pool->threads);
   }
 
-  if (pool->update_args) {
-    if (!pool->free_update_arg_fn)
-      log_warn(LD_GENERAL, "Freeing pool->update_args not possible. "
-                           "pool->free_update_arg_fn is not set.");
-    else
-      pool->free_update_arg_fn(pool->update_args);
-  }
+  if (pool->update_args)
+    pool->free_update_arg_fn(pool->update_args);
 
   if (pool->reply_event) {
-    if (tor_event_del(pool->reply_event) == -1)
-      log_warn(LD_GENERAL, "libevent error: deleting reply event failed.");
-    else
-      tor_event_free(pool->reply_event);
+    tor_event_del(pool->reply_event);
+    tor_event_free(pool->reply_event);
   }
 
   if (pool->reply_queue)
     replyqueue_free(pool->reply_queue);
 
-  if (pool->new_thread_state_arg) {
-    if (!pool->free_thread_state_fn)
-      log_warn(LD_GENERAL, "Freeing pool->new_thread_state_arg not possible. "
-                           "pool->free_thread_state_fn is not set.");
-    else
-      pool->free_thread_state_fn(pool->new_thread_state_arg);
-  }
+  if (pool->new_thread_state_arg)
+    pool->free_thread_state_fn(pool->new_thread_state_arg);
 
   tor_free(pool);
-
-  log_debug(LD_GENERAL, "Cleanup finished.");
 }
 
 /** Return the reply queue associated with a given thread pool. */