From ec8126d72378a25152dc7e4a80b65fc9d12f951e Mon Sep 17 00:00:00 2001
From: =?utf8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= <zbyszek@in.waw.pl>
Date: Wed, 19 Dec 2018 11:32:26 +0100
Subject: [PATCH] Revert "core/mount: minimize impact on mount storm."

This reverts commit 89f9752ea08f516b5d77f8e577bb772073c70c01.

This patch causes various problems during boot, where a "mount storm" occurs
naturally. Current approach is flakey, and it seems very risky to push a
feature like this which impacts boot right before a release. So let's revert
for now, and consider a more robust solution after later.

Fixes #11209.

> https://github.com/systemd/systemd/pull/11196#issuecomment-448523186:
"Reverting 89f9752ea08f516b5d77f8e577bb772073c70c01 and fcfb1f775ed0e9d282607bb118ba788b98952855 fixes this test."
---
 src/core/manager.h |  3 --
 src/core/mount.c   | 80 ++++------------------------------------------
 2 files changed, 7 insertions(+), 76 deletions(-)

diff --git a/src/core/manager.h b/src/core/manager.h
index 9f8fc46434e..bce8020cfd4 100644
--- a/src/core/manager.h
+++ b/src/core/manager.h
@@ -227,9 +227,6 @@ struct Manager {
         /* Data specific to the mount subsystem */
         struct libmnt_monitor *mount_monitor;
         sd_event_source *mount_event_source;
-        sd_event_source *mount_timeout_source;
-        usec_t mount_last_read_usec;
-        usec_t mount_last_duration_usec;
 
         /* Data specific to the swap filesystem */
         FILE *proc_swaps;
diff --git a/src/core/mount.c b/src/core/mount.c
index cfdcc6e6f54..ead9bc1f441 100644
--- a/src/core/mount.c
+++ b/src/core/mount.c
@@ -55,7 +55,6 @@ static const UnitActiveState state_translation_table[_MOUNT_STATE_MAX] = {
 
 static int mount_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata);
 static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata);
-static int mount_dispatch_proc_self_mountinfo_timer(sd_event_source *source, usec_t usec, void *userdata);
 
 static bool MOUNT_STATE_WITH_PROCESS(MountState state) {
         return IN_SET(state,
@@ -1666,7 +1665,6 @@ static int mount_load_proc_self_mountinfo(Manager *m, bool set_flags) {
 static void mount_shutdown(Manager *m) {
         assert(m);
 
-        m->mount_timeout_source = sd_event_source_unref(m->mount_timeout_source);
         m->mount_event_source = sd_event_source_unref(m->mount_event_source);
 
         mnt_unref_monitor(m->mount_monitor);
@@ -1782,50 +1780,13 @@ fail:
         mount_shutdown(m);
 }
 
-static void mount_process_proc_self_mountinfo(Manager *m);
-
 static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+        _cleanup_set_free_free_ Set *around = NULL, *gone = NULL;
         Manager *m = userdata;
+        const char *what;
+        Iterator i;
+        Unit *u;
         int r;
-        usec_t next_read = usec_add(m->mount_last_read_usec,
-                                    m->mount_last_duration_usec * 10);
-
-        if (now(CLOCK_MONOTONIC) < next_read) {
-                /* The (current) API for getting mount events from the Linux kernel
-                 * involves getting a "something changed" notification, and then having
-                 * to re-read the entire /proc/self/mountinfo file.  When there are lots
-                 * of mountpoints, this file is large and parsing it can take noticeable
-                 * time.  As most of the file won't have changed, this can be seen as wasted time.
-                 * If there is a "mount storm" such as 1000 mount points being created
-                 * in quick succession, this will result in 1000 successive notification.
-                 * If we respond to every notification, we will do quadratically more work
-                 * than if we respond just once after all the notifications have arrived.
-                 * In this (pathological) case, a delay in scheduling would actually
-                 * improve throughput as we would combine notifications and parse
-                 * the file less often.  We cannot expect the scheduler to notice
-                 * this pathology without help.
-                 * So when the rate of notifications means we are spending more than
-                 * 10% of real time handling them, we set a timer and stop listening
-                 * to notifications for a while.
-                 * If/when Linux provides an API which provides only details of what
-                 * has changed, this rate-limiting can be removed.
-                 */
-
-                r = sd_event_source_set_enabled(source, SD_EVENT_OFF);
-                if (r < 0)
-                        log_warning_errno(r, "Failed to disable monitoring of /proc/self/mounting, ignoring: %m");
-                if (!m->mount_timeout_source) {
-                        r = sd_event_add_time(m->event, &m->mount_timeout_source,
-                                              CLOCK_MONOTONIC,
-                                              next_read,
-                                              0,
-                                              mount_dispatch_proc_self_mountinfo_timer,
-                                              m);
-                        if (r < 0)
-                                log_warning_errno(r, "Failed to set timeout to reread /proc/self/mounting, ignoring: %m");
-                }
-                return 0;
-        }
 
         assert(m);
         assert(revents & EPOLLIN);
@@ -1853,40 +1814,13 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents,
                         return 0;
         }
 
-        mount_process_proc_self_mountinfo(m);
-        return 0;
-}
-
-static int mount_dispatch_proc_self_mountinfo_timer(sd_event_source *source, usec_t usec, void *userdata) {
-        Manager *m = userdata;
-        int r;
-
-        r = sd_event_source_set_enabled(m->mount_event_source, SD_EVENT_ON);
-        if (r < 0)
-                log_warning_errno(r, "Failed to reenable /proc/self/mountinfo monitor, ignoring: %m");
-        m->mount_timeout_source = sd_event_source_unref(source);
-        mount_process_proc_self_mountinfo(m);
-        return 0;
-}
-
-static void mount_process_proc_self_mountinfo(Manager *m) {
-        _cleanup_set_free_free_ Set *around = NULL, *gone = NULL;
-        const char *what;
-        Iterator i;
-        Unit *u;
-        int r;
-
-        m->mount_last_read_usec = now(CLOCK_MONOTONIC);
-        /* If an error occurs, assume 10ms */
-        m->mount_last_duration_usec = 10 * USEC_PER_MSEC;
-
         r = mount_load_proc_self_mountinfo(m, true);
         if (r < 0) {
                 /* Reset flags, just in case, for later calls */
                 LIST_FOREACH(units_by_type, u, m->units_by_type[UNIT_MOUNT])
                         MOUNT(u)->proc_flags = 0;
 
-                return;
+                return 0;
         }
 
         manager_dispatch_load_queue(m);
@@ -1974,8 +1908,8 @@ static void mount_process_proc_self_mountinfo(Manager *m) {
                 /* Let the device units know that the device is no longer mounted */
                 device_found_node(m, what, 0, DEVICE_FOUND_MOUNT);
         }
-        m->mount_last_duration_usec = usec_sub_unsigned(now(CLOCK_MONOTONIC),
-                                                        m->mount_last_read_usec);
+
+        return 0;
 }
 
 static void mount_reset_failed(Unit *u) {
-- 
2.39.2