]> git.ipfire.org Git - thirdparty/qemu.git/commitdiff
replay: push replay_mutex_lock up the call tree
authorAlex Bennée <alex.bennee@linaro.org>
Tue, 27 Feb 2018 09:52:48 +0000 (12:52 +0300)
committerPaolo Bonzini <pbonzini@redhat.com>
Mon, 12 Mar 2018 16:10:36 +0000 (17:10 +0100)
Now instead of using the replay_lock to guard the output of the log we
now use it to protect the whole execution section. This replaces what
the BQL used to do when it was held during TCG execution.

We also introduce some rules for locking order - mainly that you
cannot take the replay_mutex while holding the BQL. This leads to some
slight sophistry during start-up and extending the
replay_mutex_destroy function to unlock the mutex without checking
for the BQL condition so it can be cleanly dropped in the non-replay
case.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
Tested-by: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
Message-Id: <20180227095248.1060.40374.stgit@pasha-VirtualBox>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
cpus.c
docs/replay.txt
replay/replay-audio.c
replay/replay-char.c
replay/replay-events.c
replay/replay-internal.c
replay/replay-time.c
replay/replay.c
util/main-loop.c
vl.c

diff --git a/cpus.c b/cpus.c
index c652da84cf4825225fb51b8bbca924a3ef35c0ef..2e6701795ba9835665126f0423876626d81a170b 100644 (file)
--- a/cpus.c
+++ b/cpus.c
@@ -1317,6 +1317,8 @@ static void prepare_icount_for_run(CPUState *cpu)
         insns_left = MIN(0xffff, cpu->icount_budget);
         cpu->icount_decr.u16.low = insns_left;
         cpu->icount_extra = cpu->icount_budget - insns_left;
+
+        replay_mutex_lock();
     }
 }
 
@@ -1332,6 +1334,8 @@ static void process_icount_data(CPUState *cpu)
         cpu->icount_budget = 0;
 
         replay_account_executed_instructions();
+
+        replay_mutex_unlock();
     }
 }
 
@@ -1346,11 +1350,9 @@ static int tcg_cpu_exec(CPUState *cpu)
 #ifdef CONFIG_PROFILER
     ti = profile_getclock();
 #endif
-    qemu_mutex_unlock_iothread();
     cpu_exec_start(cpu);
     ret = cpu_exec(cpu);
     cpu_exec_end(cpu);
-    qemu_mutex_lock_iothread();
 #ifdef CONFIG_PROFILER
     tcg_time += profile_getclock() - ti;
 #endif
@@ -1417,6 +1419,9 @@ static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
     cpu->exit_request = 1;
 
     while (1) {
+        qemu_mutex_unlock_iothread();
+        replay_mutex_lock();
+        qemu_mutex_lock_iothread();
         /* Account partial waits to QEMU_CLOCK_VIRTUAL.  */
         qemu_account_warp_timer();
 
@@ -1425,6 +1430,8 @@ static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
          */
         handle_icount_deadline();
 
+        replay_mutex_unlock();
+
         if (!cpu) {
             cpu = first_cpu;
         }
@@ -1440,11 +1447,13 @@ static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
             if (cpu_can_run(cpu)) {
                 int r;
 
+                qemu_mutex_unlock_iothread();
                 prepare_icount_for_run(cpu);
 
                 r = tcg_cpu_exec(cpu);
 
                 process_icount_data(cpu);
+                qemu_mutex_lock_iothread();
 
                 if (r == EXCP_DEBUG) {
                     cpu_handle_guest_debug(cpu);
@@ -1634,7 +1643,9 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
     while (1) {
         if (cpu_can_run(cpu)) {
             int r;
+            qemu_mutex_unlock_iothread();
             r = tcg_cpu_exec(cpu);
+            qemu_mutex_lock_iothread();
             switch (r) {
             case EXCP_DEBUG:
                 cpu_handle_guest_debug(cpu);
@@ -1781,12 +1792,21 @@ void pause_all_vcpus(void)
         }
     }
 
+    /* We need to drop the replay_lock so any vCPU threads woken up
+     * can finish their replay tasks
+     */
+    replay_mutex_unlock();
+
     while (!all_vcpus_paused()) {
         qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
         CPU_FOREACH(cpu) {
             qemu_cpu_kick(cpu);
         }
     }
+
+    qemu_mutex_unlock_iothread();
+    replay_mutex_lock();
+    qemu_mutex_lock_iothread();
 }
 
 void cpu_resume(CPUState *cpu)
index c52407fe23c8e95962d6a96364b3fb371a1949b3..959633e7ea8867a0887538551251dfeb165bb56a 100644 (file)
@@ -49,6 +49,28 @@ Modifications of qemu include:
  * recording/replaying user input (mouse and keyboard)
  * adding internal checkpoints for cpu and io synchronization
 
+Locking and thread synchronisation
+----------------------------------
+
+Previously the synchronisation of the main thread and the vCPU thread
+was ensured by the holding of the BQL. However the trend has been to
+reduce the time the BQL was held across the system including under TCG
+system emulation. As it is important that batches of events are kept
+in sequence (e.g. expiring timers and checkpoints in the main thread
+while instruction checkpoints are written by the vCPU thread) we need
+another lock to keep things in lock-step. This role is now handled by
+the replay_mutex_lock. It used to be held only for each event being
+written but now it is held for a whole execution period. This results
+in a deterministic ping-pong between the two main threads.
+
+As the BQL is now a finer grained lock than the replay_lock it is almost
+certainly a bug, and a source of deadlocks, to take the
+replay_mutex_lock while the BQL is held. This is enforced by an assert.
+While the unlocks are usually in the reverse order, this is not
+necessary; you can drop the replay_lock while holding the BQL, without
+doing a more complicated unlock_iothread/replay_unlock/lock_iothread
+sequence.
+
 Non-deterministic events
 ------------------------
 
index 3d837434d4952701e608e4b5c9feccea23f82ee3..b113836de497c6edea5f2bc838f4c1b5ec7e9b44 100644 (file)
 void replay_audio_out(int *played)
 {
     if (replay_mode == REPLAY_MODE_RECORD) {
+        g_assert(replay_mutex_locked());
         replay_save_instructions();
-        replay_mutex_lock();
         replay_put_event(EVENT_AUDIO_OUT);
         replay_put_dword(*played);
-        replay_mutex_unlock();
     } else if (replay_mode == REPLAY_MODE_PLAY) {
+        g_assert(replay_mutex_locked());
         replay_account_executed_instructions();
-        replay_mutex_lock();
         if (replay_next_event_is(EVENT_AUDIO_OUT)) {
             *played = replay_get_dword();
             replay_finish_event();
-            replay_mutex_unlock();
         } else {
-            replay_mutex_unlock();
             error_report("Missing audio out event in the replay log");
             abort();
         }
@@ -44,8 +41,8 @@ void replay_audio_in(int *recorded, void *samples, int *wpos, int size)
     int pos;
     uint64_t left, right;
     if (replay_mode == REPLAY_MODE_RECORD) {
+        g_assert(replay_mutex_locked());
         replay_save_instructions();
-        replay_mutex_lock();
         replay_put_event(EVENT_AUDIO_IN);
         replay_put_dword(*recorded);
         replay_put_dword(*wpos);
@@ -55,10 +52,9 @@ void replay_audio_in(int *recorded, void *samples, int *wpos, int size)
             replay_put_qword(left);
             replay_put_qword(right);
         }
-        replay_mutex_unlock();
     } else if (replay_mode == REPLAY_MODE_PLAY) {
+        g_assert(replay_mutex_locked());
         replay_account_executed_instructions();
-        replay_mutex_lock();
         if (replay_next_event_is(EVENT_AUDIO_IN)) {
             *recorded = replay_get_dword();
             *wpos = replay_get_dword();
@@ -69,9 +65,7 @@ void replay_audio_in(int *recorded, void *samples, int *wpos, int size)
                 audio_sample_from_uint64(samples, pos, left, right);
             }
             replay_finish_event();
-            replay_mutex_unlock();
         } else {
-            replay_mutex_unlock();
             error_report("Missing audio in event in the replay log");
             abort();
         }
index cbf7c04a9f6b3c48cc1a15af355dfa9c343bcd72..736cc8c2e61de0ec03a94cad8a9b127332549869 100755 (executable)
@@ -96,25 +96,24 @@ void *replay_event_char_read_load(void)
 
 void replay_char_write_event_save(int res, int offset)
 {
+    g_assert(replay_mutex_locked());
+
     replay_save_instructions();
-    replay_mutex_lock();
     replay_put_event(EVENT_CHAR_WRITE);
     replay_put_dword(res);
     replay_put_dword(offset);
-    replay_mutex_unlock();
 }
 
 void replay_char_write_event_load(int *res, int *offset)
 {
+    g_assert(replay_mutex_locked());
+
     replay_account_executed_instructions();
-    replay_mutex_lock();
     if (replay_next_event_is(EVENT_CHAR_WRITE)) {
         *res = replay_get_dword();
         *offset = replay_get_dword();
         replay_finish_event();
-        replay_mutex_unlock();
     } else {
-        replay_mutex_unlock();
         error_report("Missing character write event in the replay log");
         exit(1);
     }
@@ -122,23 +121,21 @@ void replay_char_write_event_load(int *res, int *offset)
 
 int replay_char_read_all_load(uint8_t *buf)
 {
-    replay_mutex_lock();
+    g_assert(replay_mutex_locked());
+
     if (replay_next_event_is(EVENT_CHAR_READ_ALL)) {
         size_t size;
         int res;
         replay_get_array(buf, &size);
         replay_finish_event();
-        replay_mutex_unlock();
         res = (int)size;
         assert(res >= 0);
         return res;
     } else if (replay_next_event_is(EVENT_CHAR_READ_ALL_ERROR)) {
         int res = replay_get_dword();
         replay_finish_event();
-        replay_mutex_unlock();
         return res;
     } else {
-        replay_mutex_unlock();
         error_report("Missing character read all event in the replay log");
         exit(1);
     }
@@ -146,19 +143,17 @@ int replay_char_read_all_load(uint8_t *buf)
 
 void replay_char_read_all_save_error(int res)
 {
+    g_assert(replay_mutex_locked());
     assert(res < 0);
     replay_save_instructions();
-    replay_mutex_lock();
     replay_put_event(EVENT_CHAR_READ_ALL_ERROR);
     replay_put_dword(res);
-    replay_mutex_unlock();
 }
 
 void replay_char_read_all_save_buf(uint8_t *buf, int offset)
 {
+    g_assert(replay_mutex_locked());
     replay_save_instructions();
-    replay_mutex_lock();
     replay_put_event(EVENT_CHAR_READ_ALL);
     replay_put_array(buf, offset);
-    replay_mutex_unlock();
 }
index e858254074f3ae216a31c554151ee711a668f89a..54dd9d260680721cafab4a1a96c00b9b9a524dcb 100644 (file)
@@ -79,16 +79,14 @@ bool replay_has_events(void)
 
 void replay_flush_events(void)
 {
-    replay_mutex_lock();
+    g_assert(replay_mutex_locked());
+
     while (!QTAILQ_EMPTY(&events_list)) {
         Event *event = QTAILQ_FIRST(&events_list);
-        replay_mutex_unlock();
         replay_run_event(event);
-        replay_mutex_lock();
         QTAILQ_REMOVE(&events_list, event, events);
         g_free(event);
     }
-    replay_mutex_unlock();
 }
 
 void replay_disable_events(void)
@@ -102,14 +100,14 @@ void replay_disable_events(void)
 
 void replay_clear_events(void)
 {
-    replay_mutex_lock();
+    g_assert(replay_mutex_locked());
+
     while (!QTAILQ_EMPTY(&events_list)) {
         Event *event = QTAILQ_FIRST(&events_list);
         QTAILQ_REMOVE(&events_list, event, events);
 
         g_free(event);
     }
-    replay_mutex_unlock();
 }
 
 /*! Adds specified async event to the queue */
@@ -136,9 +134,8 @@ void replay_add_event(ReplayAsyncEventKind event_kind,
     event->opaque2 = opaque2;
     event->id = id;
 
-    replay_mutex_lock();
+    g_assert(replay_mutex_locked());
     QTAILQ_INSERT_TAIL(&events_list, event, events);
-    replay_mutex_unlock();
 }
 
 void replay_bh_schedule_event(QEMUBH *bh)
@@ -207,13 +204,11 @@ static void replay_save_event(Event *event, int checkpoint)
 /* Called with replay mutex locked */
 void replay_save_events(int checkpoint)
 {
+    g_assert(replay_mutex_locked());
     while (!QTAILQ_EMPTY(&events_list)) {
         Event *event = QTAILQ_FIRST(&events_list);
         replay_save_event(event, checkpoint);
-
-        replay_mutex_unlock();
         replay_run_event(event);
-        replay_mutex_lock();
         QTAILQ_REMOVE(&events_list, event, events);
         g_free(event);
     }
@@ -292,6 +287,7 @@ static Event *replay_read_event(int checkpoint)
 /* Called with replay mutex locked */
 void replay_read_events(int checkpoint)
 {
+    g_assert(replay_mutex_locked());
     while (replay_state.data_kind == EVENT_ASYNC) {
         Event *event = replay_read_event(checkpoint);
         if (!event) {
@@ -299,9 +295,7 @@ void replay_read_events(int checkpoint)
         }
         replay_finish_event();
         read_event_kind = -1;
-        replay_mutex_unlock();
         replay_run_event(event);
-        replay_mutex_lock();
 
         g_free(event);
     }
index fa7bba6dfdb48b21beb5de689a6f6163d818e76e..8e7474f787de99144c0b3ff9580cacd0ae63ee9d 100644 (file)
@@ -174,6 +174,9 @@ static __thread bool replay_locked;
 void replay_mutex_init(void)
 {
     qemu_mutex_init(&lock);
+    /* Hold the mutex while we start-up */
+    qemu_mutex_lock(&lock);
+    replay_locked = true;
 }
 
 bool replay_mutex_locked(void)
@@ -181,25 +184,31 @@ bool replay_mutex_locked(void)
     return replay_locked;
 }
 
+/* Ordering constraints, replay_lock must be taken before BQL */
 void replay_mutex_lock(void)
 {
-    g_assert(!replay_mutex_locked());
-    qemu_mutex_lock(&lock);
-    replay_locked = true;
+    if (replay_mode != REPLAY_MODE_NONE) {
+        g_assert(!qemu_mutex_iothread_locked());
+        g_assert(!replay_mutex_locked());
+        qemu_mutex_lock(&lock);
+        replay_locked = true;
+    }
 }
 
 void replay_mutex_unlock(void)
 {
-    g_assert(replay_mutex_locked());
-    replay_locked = false;
-    qemu_mutex_unlock(&lock);
+    if (replay_mode != REPLAY_MODE_NONE) {
+        g_assert(replay_mutex_locked());
+        replay_locked = false;
+        qemu_mutex_unlock(&lock);
+    }
 }
 
 /*! Saves cached instructions. */
 void replay_save_instructions(void)
 {
     if (replay_file && replay_mode == REPLAY_MODE_RECORD) {
-        replay_mutex_lock();
+        g_assert(replay_mutex_locked());
         int diff = (int)(replay_get_current_step() - replay_state.current_step);
 
         /* Time can only go forward */
@@ -210,6 +219,5 @@ void replay_save_instructions(void)
             replay_put_dword(diff);
             replay_state.current_step += diff;
         }
-        replay_mutex_unlock();
     }
 }
index f70382a88f3c9fa7c387a90765932747f04319b3..6a7565ec8d075f861d641b9887811ece7910f9e0 100644 (file)
 
 int64_t replay_save_clock(ReplayClockKind kind, int64_t clock)
 {
-    replay_save_instructions();
 
     if (replay_file) {
-        replay_mutex_lock();
+        g_assert(replay_mutex_locked());
+
+        replay_save_instructions();
         replay_put_event(EVENT_CLOCK + kind);
         replay_put_qword(clock);
-        replay_mutex_unlock();
     }
 
     return clock;
@@ -46,16 +46,16 @@ void replay_read_next_clock(ReplayClockKind kind)
 /*! Reads next clock event from the input. */
 int64_t replay_read_clock(ReplayClockKind kind)
 {
+    g_assert(replay_file && replay_mutex_locked());
+
     replay_account_executed_instructions();
 
     if (replay_file) {
         int64_t ret;
-        replay_mutex_lock();
         if (replay_next_event_is(EVENT_CLOCK + kind)) {
             replay_read_next_clock(kind);
         }
         ret = replay_state.cached_clock[kind];
-        replay_mutex_unlock();
 
         return ret;
     }
index 5d05ee0460101bd624598e1359f89182b89749b9..90f98b749024b583813d415498812628cbc1a37a 100644 (file)
@@ -81,7 +81,7 @@ int replay_get_instructions(void)
 void replay_account_executed_instructions(void)
 {
     if (replay_mode == REPLAY_MODE_PLAY) {
-        replay_mutex_lock();
+        g_assert(replay_mutex_locked());
         if (replay_state.instructions_count > 0) {
             int count = (int)(replay_get_current_step()
                               - replay_state.current_step);
@@ -100,24 +100,22 @@ void replay_account_executed_instructions(void)
                 qemu_notify_event();
             }
         }
-        replay_mutex_unlock();
     }
 }
 
 bool replay_exception(void)
 {
+
     if (replay_mode == REPLAY_MODE_RECORD) {
+        g_assert(replay_mutex_locked());
         replay_save_instructions();
-        replay_mutex_lock();
         replay_put_event(EVENT_EXCEPTION);
-        replay_mutex_unlock();
         return true;
     } else if (replay_mode == REPLAY_MODE_PLAY) {
+        g_assert(replay_mutex_locked());
         bool res = replay_has_exception();
         if (res) {
-            replay_mutex_lock();
             replay_finish_event();
-            replay_mutex_unlock();
         }
         return res;
     }
@@ -129,10 +127,9 @@ bool replay_has_exception(void)
 {
     bool res = false;
     if (replay_mode == REPLAY_MODE_PLAY) {
+        g_assert(replay_mutex_locked());
         replay_account_executed_instructions();
-        replay_mutex_lock();
         res = replay_next_event_is(EVENT_EXCEPTION);
-        replay_mutex_unlock();
     }
 
     return res;
@@ -141,17 +138,15 @@ bool replay_has_exception(void)
 bool replay_interrupt(void)
 {
     if (replay_mode == REPLAY_MODE_RECORD) {
+        g_assert(replay_mutex_locked());
         replay_save_instructions();
-        replay_mutex_lock();
         replay_put_event(EVENT_INTERRUPT);
-        replay_mutex_unlock();
         return true;
     } else if (replay_mode == REPLAY_MODE_PLAY) {
+        g_assert(replay_mutex_locked());
         bool res = replay_has_interrupt();
         if (res) {
-            replay_mutex_lock();
             replay_finish_event();
-            replay_mutex_unlock();
         }
         return res;
     }
@@ -163,10 +158,9 @@ bool replay_has_interrupt(void)
 {
     bool res = false;
     if (replay_mode == REPLAY_MODE_PLAY) {
+        g_assert(replay_mutex_locked());
         replay_account_executed_instructions();
-        replay_mutex_lock();
         res = replay_next_event_is(EVENT_INTERRUPT);
-        replay_mutex_unlock();
     }
     return res;
 }
@@ -174,9 +168,8 @@ bool replay_has_interrupt(void)
 void replay_shutdown_request(ShutdownCause cause)
 {
     if (replay_mode == REPLAY_MODE_RECORD) {
-        replay_mutex_lock();
+        g_assert(replay_mutex_locked());
         replay_put_event(EVENT_SHUTDOWN + cause);
-        replay_mutex_unlock();
     }
 }
 
@@ -190,9 +183,9 @@ bool replay_checkpoint(ReplayCheckpoint checkpoint)
         return true;
     }
 
-    replay_mutex_lock();
 
     if (replay_mode == REPLAY_MODE_PLAY) {
+        g_assert(replay_mutex_locked());
         if (replay_next_event_is(EVENT_CHECKPOINT + checkpoint)) {
             replay_finish_event();
         } else if (replay_state.data_kind != EVENT_ASYNC) {
@@ -205,12 +198,12 @@ bool replay_checkpoint(ReplayCheckpoint checkpoint)
            checkpoint were processed */
         res = replay_state.data_kind != EVENT_ASYNC;
     } else if (replay_mode == REPLAY_MODE_RECORD) {
+        g_assert(replay_mutex_locked());
         replay_put_event(EVENT_CHECKPOINT + checkpoint);
         replay_save_events(checkpoint);
         res = true;
     }
 out:
-    replay_mutex_unlock();
     return res;
 }
 
@@ -233,8 +226,6 @@ static void replay_enable(const char *fname, int mode)
 
     atexit(replay_finish);
 
-    replay_mutex_init();
-
     replay_file = fopen(fname, fmode);
     if (replay_file == NULL) {
         fprintf(stderr, "Replay: open %s: %s\n", fname, strerror(errno));
@@ -242,8 +233,9 @@ static void replay_enable(const char *fname, int mode)
     }
 
     replay_filename = g_strdup(fname);
-
     replay_mode = mode;
+    replay_mutex_init();
+
     replay_state.data_kind = -1;
     replay_state.instructions_count = 0;
     replay_state.current_step = 0;
index 7558eb5f5323cfb6c5b7c53d5314df941c1ff666..992f9b0f3457fd959eb975df0165ec34aa47676a 100644 (file)
@@ -29,6 +29,7 @@
 #include "qemu/sockets.h"      // struct in_addr needed for libslirp.h
 #include "sysemu/qtest.h"
 #include "sysemu/cpus.h"
+#include "sysemu/replay.h"
 #include "slirp/libslirp.h"
 #include "qemu/main-loop.h"
 #include "block/aio.h"
@@ -245,18 +246,19 @@ static int os_host_main_loop_wait(int64_t timeout)
         timeout = SCALE_MS;
     }
 
+
     if (timeout) {
         spin_counter = 0;
-        qemu_mutex_unlock_iothread();
     } else {
         spin_counter++;
     }
+    qemu_mutex_unlock_iothread();
+    replay_mutex_unlock();
 
     ret = qemu_poll_ns((GPollFD *)gpollfds->data, gpollfds->len, timeout);
 
-    if (timeout) {
-        qemu_mutex_lock_iothread();
-    }
+    replay_mutex_lock();
+    qemu_mutex_lock_iothread();
 
     glib_pollfds_poll();
 
@@ -463,8 +465,13 @@ static int os_host_main_loop_wait(int64_t timeout)
     poll_timeout_ns = qemu_soonest_timeout(poll_timeout_ns, timeout);
 
     qemu_mutex_unlock_iothread();
+
+    replay_mutex_unlock();
+
     g_poll_ret = qemu_poll_ns(poll_fds, n_poll_fds + w->num, poll_timeout_ns);
 
+    replay_mutex_lock();
+
     qemu_mutex_lock_iothread();
     if (g_poll_ret > 0) {
         for (i = 0; i < w->num; i++) {
diff --git a/vl.c b/vl.c
index e81152417a541c91d3c7fea5406d3000b93f9dab..5925a4b5022703d885220ea49aecbc1bab4a18b0 100644 (file)
--- a/vl.c
+++ b/vl.c
@@ -3058,6 +3058,7 @@ int main(int argc, char **argv, char **envp)
 
     qemu_init_cpu_list();
     qemu_init_cpu_loop();
+
     qemu_mutex_lock_iothread();
 
     atexit(qemu_run_exit_notifiers);