]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
mm: memcg: move cgroup v1 oom handling code into memcontrol-v1.c
authorRoman Gushchin <roman.gushchin@linux.dev>
Tue, 25 Jun 2024 00:59:00 +0000 (17:59 -0700)
committerAndrew Morton <akpm@linux-foundation.org>
Fri, 5 Jul 2024 01:05:53 +0000 (18:05 -0700)
Cgroup v1 supports a complicated OOM handling in userspace mechanism,
which is not supported by cgroup v2.  Let's move the corresponding code
into memcontrol-v1.c.

Aside from mechanical code movement this patch introduces two new
functions: memcg1_oom_prepare() and memcg1_oom_finish().  Those are
implementing cgroup v1-specific parts of the common memcg OOM handling
path.

Link: https://lkml.kernel.org/r/20240625005906.106920-9-roman.gushchin@linux.dev
Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Muchun Song <muchun.song@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
mm/memcontrol-v1.c
mm/memcontrol-v1.h
mm/memcontrol.c

index d7b5c4c147324310a492f8c2218eee4f3d723291..253d49d5fb12cb4ef4ae71dbcdf2ff5ad893ca45 100644 (file)
@@ -110,7 +110,13 @@ struct mem_cgroup_event {
        struct work_struct remove;
 };
 
-extern spinlock_t memcg_oom_lock;
+#ifdef CONFIG_LOCKDEP
+static struct lockdep_map memcg_oom_lock_dep_map = {
+       .name = "memcg_oom_lock",
+};
+#endif
+
+DEFINE_SPINLOCK(memcg_oom_lock);
 
 static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_node *mz,
                                         struct mem_cgroup_tree_per_node *mctz,
@@ -1469,7 +1475,7 @@ static int mem_cgroup_oom_notify_cb(struct mem_cgroup *memcg)
        return 0;
 }
 
-void mem_cgroup_oom_notify(struct mem_cgroup *memcg)
+static void mem_cgroup_oom_notify(struct mem_cgroup *memcg)
 {
        struct mem_cgroup *iter;
 
@@ -1959,6 +1965,225 @@ void memcg1_css_offline(struct mem_cgroup *memcg)
        spin_unlock_irq(&memcg->event_list_lock);
 }
 
+/*
+ * Check OOM-Killer is already running under our hierarchy.
+ * If someone is running, return false.
+ */
+static bool mem_cgroup_oom_trylock(struct mem_cgroup *memcg)
+{
+       struct mem_cgroup *iter, *failed = NULL;
+
+       spin_lock(&memcg_oom_lock);
+
+       for_each_mem_cgroup_tree(iter, memcg) {
+               if (iter->oom_lock) {
+                       /*
+                        * this subtree of our hierarchy is already locked
+                        * so we cannot give a lock.
+                        */
+                       failed = iter;
+                       mem_cgroup_iter_break(memcg, iter);
+                       break;
+               } else
+                       iter->oom_lock = true;
+       }
+
+       if (failed) {
+               /*
+                * OK, we failed to lock the whole subtree so we have
+                * to clean up what we set up to the failing subtree
+                */
+               for_each_mem_cgroup_tree(iter, memcg) {
+                       if (iter == failed) {
+                               mem_cgroup_iter_break(memcg, iter);
+                               break;
+                       }
+                       iter->oom_lock = false;
+               }
+       } else
+               mutex_acquire(&memcg_oom_lock_dep_map, 0, 1, _RET_IP_);
+
+       spin_unlock(&memcg_oom_lock);
+
+       return !failed;
+}
+
+static void mem_cgroup_oom_unlock(struct mem_cgroup *memcg)
+{
+       struct mem_cgroup *iter;
+
+       spin_lock(&memcg_oom_lock);
+       mutex_release(&memcg_oom_lock_dep_map, _RET_IP_);
+       for_each_mem_cgroup_tree(iter, memcg)
+               iter->oom_lock = false;
+       spin_unlock(&memcg_oom_lock);
+}
+
+static void mem_cgroup_mark_under_oom(struct mem_cgroup *memcg)
+{
+       struct mem_cgroup *iter;
+
+       spin_lock(&memcg_oom_lock);
+       for_each_mem_cgroup_tree(iter, memcg)
+               iter->under_oom++;
+       spin_unlock(&memcg_oom_lock);
+}
+
+static void mem_cgroup_unmark_under_oom(struct mem_cgroup *memcg)
+{
+       struct mem_cgroup *iter;
+
+       /*
+        * Be careful about under_oom underflows because a child memcg
+        * could have been added after mem_cgroup_mark_under_oom.
+        */
+       spin_lock(&memcg_oom_lock);
+       for_each_mem_cgroup_tree(iter, memcg)
+               if (iter->under_oom > 0)
+                       iter->under_oom--;
+       spin_unlock(&memcg_oom_lock);
+}
+
+static DECLARE_WAIT_QUEUE_HEAD(memcg_oom_waitq);
+
+struct oom_wait_info {
+       struct mem_cgroup *memcg;
+       wait_queue_entry_t      wait;
+};
+
+static int memcg_oom_wake_function(wait_queue_entry_t *wait,
+       unsigned mode, int sync, void *arg)
+{
+       struct mem_cgroup *wake_memcg = (struct mem_cgroup *)arg;
+       struct mem_cgroup *oom_wait_memcg;
+       struct oom_wait_info *oom_wait_info;
+
+       oom_wait_info = container_of(wait, struct oom_wait_info, wait);
+       oom_wait_memcg = oom_wait_info->memcg;
+
+       if (!mem_cgroup_is_descendant(wake_memcg, oom_wait_memcg) &&
+           !mem_cgroup_is_descendant(oom_wait_memcg, wake_memcg))
+               return 0;
+       return autoremove_wake_function(wait, mode, sync, arg);
+}
+
+void memcg_oom_recover(struct mem_cgroup *memcg)
+{
+       /*
+        * For the following lockless ->under_oom test, the only required
+        * guarantee is that it must see the state asserted by an OOM when
+        * this function is called as a result of userland actions
+        * triggered by the notification of the OOM.  This is trivially
+        * achieved by invoking mem_cgroup_mark_under_oom() before
+        * triggering notification.
+        */
+       if (memcg && memcg->under_oom)
+               __wake_up(&memcg_oom_waitq, TASK_NORMAL, 0, memcg);
+}
+
+/**
+ * mem_cgroup_oom_synchronize - complete memcg OOM handling
+ * @handle: actually kill/wait or just clean up the OOM state
+ *
+ * This has to be called at the end of a page fault if the memcg OOM
+ * handler was enabled.
+ *
+ * Memcg supports userspace OOM handling where failed allocations must
+ * sleep on a waitqueue until the userspace task resolves the
+ * situation.  Sleeping directly in the charge context with all kinds
+ * of locks held is not a good idea, instead we remember an OOM state
+ * in the task and mem_cgroup_oom_synchronize() has to be called at
+ * the end of the page fault to complete the OOM handling.
+ *
+ * Returns %true if an ongoing memcg OOM situation was detected and
+ * completed, %false otherwise.
+ */
+bool mem_cgroup_oom_synchronize(bool handle)
+{
+       struct mem_cgroup *memcg = current->memcg_in_oom;
+       struct oom_wait_info owait;
+       bool locked;
+
+       /* OOM is global, do not handle */
+       if (!memcg)
+               return false;
+
+       if (!handle)
+               goto cleanup;
+
+       owait.memcg = memcg;
+       owait.wait.flags = 0;
+       owait.wait.func = memcg_oom_wake_function;
+       owait.wait.private = current;
+       INIT_LIST_HEAD(&owait.wait.entry);
+
+       prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE);
+       mem_cgroup_mark_under_oom(memcg);
+
+       locked = mem_cgroup_oom_trylock(memcg);
+
+       if (locked)
+               mem_cgroup_oom_notify(memcg);
+
+       schedule();
+       mem_cgroup_unmark_under_oom(memcg);
+       finish_wait(&memcg_oom_waitq, &owait.wait);
+
+       if (locked)
+               mem_cgroup_oom_unlock(memcg);
+cleanup:
+       current->memcg_in_oom = NULL;
+       css_put(&memcg->css);
+       return true;
+}
+
+
+bool memcg1_oom_prepare(struct mem_cgroup *memcg, bool *locked)
+{
+       /*
+        * We are in the middle of the charge context here, so we
+        * don't want to block when potentially sitting on a callstack
+        * that holds all kinds of filesystem and mm locks.
+        *
+        * cgroup1 allows disabling the OOM killer and waiting for outside
+        * handling until the charge can succeed; remember the context and put
+        * the task to sleep at the end of the page fault when all locks are
+        * released.
+        *
+        * On the other hand, in-kernel OOM killer allows for an async victim
+        * memory reclaim (oom_reaper) and that means that we are not solely
+        * relying on the oom victim to make a forward progress and we can
+        * invoke the oom killer here.
+        *
+        * Please note that mem_cgroup_out_of_memory might fail to find a
+        * victim and then we have to bail out from the charge path.
+        */
+       if (READ_ONCE(memcg->oom_kill_disable)) {
+               if (current->in_user_fault) {
+                       css_get(&memcg->css);
+                       current->memcg_in_oom = memcg;
+               }
+               return false;
+       }
+
+       mem_cgroup_mark_under_oom(memcg);
+
+       *locked = mem_cgroup_oom_trylock(memcg);
+
+       if (*locked)
+               mem_cgroup_oom_notify(memcg);
+
+       mem_cgroup_unmark_under_oom(memcg);
+
+       return true;
+}
+
+void memcg1_oom_finish(struct mem_cgroup *memcg, bool locked)
+{
+       if (locked)
+               mem_cgroup_oom_unlock(memcg);
+}
+
 static int __init memcg1_init(void)
 {
        int node;
index ef1b7037cbdcc7dfcbed6a613d825c633686acf8..3de956b2422f1bac1f44c3b5ebbb3d586bee72c8 100644 (file)
@@ -87,9 +87,10 @@ enum res_type {
 bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
                                enum mem_cgroup_events_target target);
 unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap);
-void mem_cgroup_oom_notify(struct mem_cgroup *memcg);
 ssize_t memcg_write_event_control(struct kernfs_open_file *of,
                                  char *buf, size_t nbytes, loff_t off);
 
+bool memcg1_oom_prepare(struct mem_cgroup *memcg, bool *locked);
+void memcg1_oom_finish(struct mem_cgroup *memcg, bool locked);
 
 #endif /* __MM_MEMCONTROL_V1_H */
index 71794161271ce0dbe00ba1de746752debf451bf6..4c37e7003f774361244f1a538bb2bc44dec4ea3f 100644 (file)
@@ -1615,130 +1615,6 @@ unlock:
        return ret;
 }
 
-#ifdef CONFIG_LOCKDEP
-static struct lockdep_map memcg_oom_lock_dep_map = {
-       .name = "memcg_oom_lock",
-};
-#endif
-
-DEFINE_SPINLOCK(memcg_oom_lock);
-
-/*
- * Check OOM-Killer is already running under our hierarchy.
- * If someone is running, return false.
- */
-static bool mem_cgroup_oom_trylock(struct mem_cgroup *memcg)
-{
-       struct mem_cgroup *iter, *failed = NULL;
-
-       spin_lock(&memcg_oom_lock);
-
-       for_each_mem_cgroup_tree(iter, memcg) {
-               if (iter->oom_lock) {
-                       /*
-                        * this subtree of our hierarchy is already locked
-                        * so we cannot give a lock.
-                        */
-                       failed = iter;
-                       mem_cgroup_iter_break(memcg, iter);
-                       break;
-               } else
-                       iter->oom_lock = true;
-       }
-
-       if (failed) {
-               /*
-                * OK, we failed to lock the whole subtree so we have
-                * to clean up what we set up to the failing subtree
-                */
-               for_each_mem_cgroup_tree(iter, memcg) {
-                       if (iter == failed) {
-                               mem_cgroup_iter_break(memcg, iter);
-                               break;
-                       }
-                       iter->oom_lock = false;
-               }
-       } else
-               mutex_acquire(&memcg_oom_lock_dep_map, 0, 1, _RET_IP_);
-
-       spin_unlock(&memcg_oom_lock);
-
-       return !failed;
-}
-
-static void mem_cgroup_oom_unlock(struct mem_cgroup *memcg)
-{
-       struct mem_cgroup *iter;
-
-       spin_lock(&memcg_oom_lock);
-       mutex_release(&memcg_oom_lock_dep_map, _RET_IP_);
-       for_each_mem_cgroup_tree(iter, memcg)
-               iter->oom_lock = false;
-       spin_unlock(&memcg_oom_lock);
-}
-
-static void mem_cgroup_mark_under_oom(struct mem_cgroup *memcg)
-{
-       struct mem_cgroup *iter;
-
-       spin_lock(&memcg_oom_lock);
-       for_each_mem_cgroup_tree(iter, memcg)
-               iter->under_oom++;
-       spin_unlock(&memcg_oom_lock);
-}
-
-static void mem_cgroup_unmark_under_oom(struct mem_cgroup *memcg)
-{
-       struct mem_cgroup *iter;
-
-       /*
-        * Be careful about under_oom underflows because a child memcg
-        * could have been added after mem_cgroup_mark_under_oom.
-        */
-       spin_lock(&memcg_oom_lock);
-       for_each_mem_cgroup_tree(iter, memcg)
-               if (iter->under_oom > 0)
-                       iter->under_oom--;
-       spin_unlock(&memcg_oom_lock);
-}
-
-static DECLARE_WAIT_QUEUE_HEAD(memcg_oom_waitq);
-
-struct oom_wait_info {
-       struct mem_cgroup *memcg;
-       wait_queue_entry_t      wait;
-};
-
-static int memcg_oom_wake_function(wait_queue_entry_t *wait,
-       unsigned mode, int sync, void *arg)
-{
-       struct mem_cgroup *wake_memcg = (struct mem_cgroup *)arg;
-       struct mem_cgroup *oom_wait_memcg;
-       struct oom_wait_info *oom_wait_info;
-
-       oom_wait_info = container_of(wait, struct oom_wait_info, wait);
-       oom_wait_memcg = oom_wait_info->memcg;
-
-       if (!mem_cgroup_is_descendant(wake_memcg, oom_wait_memcg) &&
-           !mem_cgroup_is_descendant(oom_wait_memcg, wake_memcg))
-               return 0;
-       return autoremove_wake_function(wait, mode, sync, arg);
-}
-
-void memcg_oom_recover(struct mem_cgroup *memcg)
-{
-       /*
-        * For the following lockless ->under_oom test, the only required
-        * guarantee is that it must see the state asserted by an OOM when
-        * this function is called as a result of userland actions
-        * triggered by the notification of the OOM.  This is trivially
-        * achieved by invoking mem_cgroup_mark_under_oom() before
-        * triggering notification.
-        */
-       if (memcg && memcg->under_oom)
-               __wake_up(&memcg_oom_waitq, TASK_NORMAL, 0, memcg);
-}
-
 /*
  * Returns true if successfully killed one or more processes. Though in some
  * corner cases it can return true even without killing any process.
@@ -1752,104 +1628,16 @@ static bool mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order)
 
        memcg_memory_event(memcg, MEMCG_OOM);
 
-       /*
-        * We are in the middle of the charge context here, so we
-        * don't want to block when potentially sitting on a callstack
-        * that holds all kinds of filesystem and mm locks.
-        *
-        * cgroup1 allows disabling the OOM killer and waiting for outside
-        * handling until the charge can succeed; remember the context and put
-        * the task to sleep at the end of the page fault when all locks are
-        * released.
-        *
-        * On the other hand, in-kernel OOM killer allows for an async victim
-        * memory reclaim (oom_reaper) and that means that we are not solely
-        * relying on the oom victim to make a forward progress and we can
-        * invoke the oom killer here.
-        *
-        * Please note that mem_cgroup_out_of_memory might fail to find a
-        * victim and then we have to bail out from the charge path.
-        */
-       if (READ_ONCE(memcg->oom_kill_disable)) {
-               if (current->in_user_fault) {
-                       css_get(&memcg->css);
-                       current->memcg_in_oom = memcg;
-               }
+       if (!memcg1_oom_prepare(memcg, &locked))
                return false;
-       }
-
-       mem_cgroup_mark_under_oom(memcg);
 
-       locked = mem_cgroup_oom_trylock(memcg);
-
-       if (locked)
-               mem_cgroup_oom_notify(memcg);
-
-       mem_cgroup_unmark_under_oom(memcg);
        ret = mem_cgroup_out_of_memory(memcg, mask, order);
 
-       if (locked)
-               mem_cgroup_oom_unlock(memcg);
+       memcg1_oom_finish(memcg, locked);
 
        return ret;
 }
 
-/**
- * mem_cgroup_oom_synchronize - complete memcg OOM handling
- * @handle: actually kill/wait or just clean up the OOM state
- *
- * This has to be called at the end of a page fault if the memcg OOM
- * handler was enabled.
- *
- * Memcg supports userspace OOM handling where failed allocations must
- * sleep on a waitqueue until the userspace task resolves the
- * situation.  Sleeping directly in the charge context with all kinds
- * of locks held is not a good idea, instead we remember an OOM state
- * in the task and mem_cgroup_oom_synchronize() has to be called at
- * the end of the page fault to complete the OOM handling.
- *
- * Returns %true if an ongoing memcg OOM situation was detected and
- * completed, %false otherwise.
- */
-bool mem_cgroup_oom_synchronize(bool handle)
-{
-       struct mem_cgroup *memcg = current->memcg_in_oom;
-       struct oom_wait_info owait;
-       bool locked;
-
-       /* OOM is global, do not handle */
-       if (!memcg)
-               return false;
-
-       if (!handle)
-               goto cleanup;
-
-       owait.memcg = memcg;
-       owait.wait.flags = 0;
-       owait.wait.func = memcg_oom_wake_function;
-       owait.wait.private = current;
-       INIT_LIST_HEAD(&owait.wait.entry);
-
-       prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE);
-       mem_cgroup_mark_under_oom(memcg);
-
-       locked = mem_cgroup_oom_trylock(memcg);
-
-       if (locked)
-               mem_cgroup_oom_notify(memcg);
-
-       schedule();
-       mem_cgroup_unmark_under_oom(memcg);
-       finish_wait(&memcg_oom_waitq, &owait.wait);
-
-       if (locked)
-               mem_cgroup_oom_unlock(memcg);
-cleanup:
-       current->memcg_in_oom = NULL;
-       css_put(&memcg->css);
-       return true;
-}
-
 /**
  * mem_cgroup_get_oom_group - get a memory cgroup to clean up after OOM
  * @victim: task to be killed by the OOM killer