]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
um: insert scheduler ticks when userspace does not yield
authorBenjamin Berg <benjamin.berg@intel.com>
Thu, 10 Oct 2024 14:25:37 +0000 (16:25 +0200)
committerJohannes Berg <johannes.berg@intel.com>
Wed, 23 Oct 2024 07:52:49 +0000 (09:52 +0200)
In time-travel mode userspace can do a lot of work without any time
passing. Unfortunately, this can result in OOM situations as the RCU
core code will never be run.

Work around this by keeping track of userspace processes that do not
yield for a lot of operations. When this happens, insert a jiffie into
the sched_clock clock to account time against the process and cause the
bookkeeping to run.

As sched_clock is used for tracing, it is useful to keep it in sync
between the different VMs. As such, try to remove added ticks again when
the actual clock ticks.

Signed-off-by: Benjamin Berg <benjamin.berg@intel.com>
Link: https://patch.msgid.link/20241010142537.1134685-1-benjamin@sipsolutions.net
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
arch/um/Kconfig
arch/um/include/shared/common-offsets.h
arch/um/kernel/time.c
arch/um/os-Linux/skas/process.c

index 448454a3d8b574a09a486553ed40e0366cbba2dc..5dc702ad9e7aa6f3279f27d99bc7e43b66e028ef 100644 (file)
@@ -227,6 +227,21 @@ config UML_TIME_TRAVEL_SUPPORT
 
          It is safe to say Y, but you probably don't need this.
 
+config UML_MAX_USERSPACE_ITERATIONS
+       int
+       prompt "Maximum number of unscheduled userspace iterations"
+       default 10000
+       depends on UML_TIME_TRAVEL_SUPPORT
+       help
+         In UML inf-cpu and ext time-travel mode userspace can run without being
+         interrupted. This will eventually overwhelm the kernel and create OOM
+         situations (mainly RCU not running). This setting specifies the number
+         of kernel/userspace switches (minor/major page fault, signal or syscall)
+         for the same userspace thread before the sched_clock is advanced by a
+         jiffie to trigger scheduling.
+
+         Setting it to zero disables the feature.
+
 config KASAN_SHADOW_OFFSET
        hex
        depends on KASAN
index 579ed946a3a9a11c4f40df86ebd09a48ce1f23a3..86537e20942a672a639c88aebb9348cc6f4ee367 100644 (file)
@@ -28,4 +28,8 @@ DEFINE(UML_CONFIG_64BIT, CONFIG_64BIT);
 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
 DEFINE(UML_CONFIG_UML_TIME_TRAVEL_SUPPORT, CONFIG_UML_TIME_TRAVEL_SUPPORT);
 #endif
-
+#ifdef CONFIG_UML_MAX_USERSPACE_ITERATIONS
+DEFINE(UML_CONFIG_UML_MAX_USERSPACE_ITERATIONS, CONFIG_UML_MAX_USERSPACE_ITERATIONS);
+#else
+DEFINE(UML_CONFIG_UML_MAX_USERSPACE_ITERATIONS, 0);
+#endif
index 29b27b90581fbb06365b4b7e2962698c3bfdc15e..1394568c02106fdd706aad69387571baff803fa1 100644 (file)
@@ -25,6 +25,8 @@
 #include <shared/init.h>
 
 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
+#include <linux/sched/clock.h>
+
 enum time_travel_mode time_travel_mode;
 EXPORT_SYMBOL_GPL(time_travel_mode);
 
@@ -47,6 +49,15 @@ static u16 time_travel_shm_id;
 static struct um_timetravel_schedshm *time_travel_shm;
 static union um_timetravel_schedshm_client *time_travel_shm_client;
 
+unsigned long tt_extra_sched_jiffies;
+
+notrace unsigned long long sched_clock(void)
+{
+       return (unsigned long long)(jiffies - INITIAL_JIFFIES +
+                                   tt_extra_sched_jiffies)
+                                       * (NSEC_PER_SEC / HZ);
+}
+
 static void time_travel_set_time(unsigned long long ns)
 {
        if (unlikely(ns < time_travel_time))
@@ -443,6 +454,11 @@ static void time_travel_periodic_timer(struct time_travel_event *e)
 {
        time_travel_add_event(&time_travel_timer_event,
                              time_travel_time + time_travel_timer_interval);
+
+       /* clock tick; decrease extra jiffies by keeping sched_clock constant */
+       if (tt_extra_sched_jiffies > 0)
+               tt_extra_sched_jiffies -= 1;
+
        deliver_alarm();
 }
 
@@ -594,6 +610,10 @@ EXPORT_SYMBOL_GPL(time_travel_add_irq_event);
 
 static void time_travel_oneshot_timer(struct time_travel_event *e)
 {
+       /* clock tick; decrease extra jiffies by keeping sched_clock constant */
+       if (tt_extra_sched_jiffies > 0)
+               tt_extra_sched_jiffies -= 1;
+
        deliver_alarm();
 }
 
index 8b328eb9d1f783155d36cea5eabd1573b3a5070b..97856955e892ac613b188f042667df8e81bfd03a 100644 (file)
@@ -388,6 +388,9 @@ int start_userspace(unsigned long stub_stack)
        return err;
 }
 
+int unscheduled_userspace_iterations;
+extern unsigned long tt_extra_sched_jiffies;
+
 void userspace(struct uml_pt_regs *regs)
 {
        int err, status, op, pid = userspace_pid[0];
@@ -397,6 +400,27 @@ void userspace(struct uml_pt_regs *regs)
        interrupt_end();
 
        while (1) {
+               /*
+                * When we are in time-travel mode, userspace can theoretically
+                * do a *lot* of work without being scheduled. The problem with
+                * this is that it will prevent kernel bookkeeping (primarily
+                * the RCU) from running and this can for example cause OOM
+                * situations.
+                *
+                * This code accounts a jiffie against the scheduling clock
+                * after the defined userspace iterations in the same thread.
+                * By doing so the situation is effectively prevented.
+                */
+               if (time_travel_mode == TT_MODE_INFCPU ||
+                   time_travel_mode == TT_MODE_EXTERNAL) {
+                       if (UML_CONFIG_UML_MAX_USERSPACE_ITERATIONS &&
+                           unscheduled_userspace_iterations++ >
+                           UML_CONFIG_UML_MAX_USERSPACE_ITERATIONS) {
+                               tt_extra_sched_jiffies += 1;
+                               unscheduled_userspace_iterations = 0;
+                       }
+               }
+
                time_travel_print_bc_msg();
 
                current_mm_sync();
@@ -539,6 +563,8 @@ void new_thread(void *stack, jmp_buf *buf, void (*handler)(void))
 
 void switch_threads(jmp_buf *me, jmp_buf *you)
 {
+       unscheduled_userspace_iterations = 0;
+
        if (UML_SETJMP(me) == 0)
                UML_LONGJMP(you, 1);
 }