]> git.ipfire.org Git - thirdparty/linux.git/blobdiff - mm/vmscan.c
Merge branch 'locking-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[thirdparty/linux.git] / mm / vmscan.c
index e979705bbf325531b0cf2d90d26660fe308ea310..a815f73ee4d5b2d1a9872cca19db055845499aa1 100644 (file)
@@ -374,7 +374,7 @@ unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone
  */
 int prealloc_shrinker(struct shrinker *shrinker)
 {
-       size_t size = sizeof(*shrinker->nr_deferred);
+       unsigned int size = sizeof(*shrinker->nr_deferred);
 
        if (shrinker->flags & SHRINKER_NUMA_AWARE)
                size *= nr_node_ids;
@@ -952,7 +952,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
                 */
                if (reclaimed && page_is_file_cache(page) &&
                    !mapping_exiting(mapping) && !dax_mapping(mapping))
-                       shadow = workingset_eviction(mapping, page);
+                       shadow = workingset_eviction(page);
                __delete_from_page_cache(page, shadow);
                xa_unlock_irqrestore(&mapping->i_pages, flags);
 
@@ -1106,16 +1106,9 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 {
        LIST_HEAD(ret_pages);
        LIST_HEAD(free_pages);
-       int pgactivate = 0;
-       unsigned nr_unqueued_dirty = 0;
-       unsigned nr_dirty = 0;
-       unsigned nr_congested = 0;
        unsigned nr_reclaimed = 0;
-       unsigned nr_writeback = 0;
-       unsigned nr_immediate = 0;
-       unsigned nr_ref_keep = 0;
-       unsigned nr_unmap_fail = 0;
 
+       memset(stat, 0, sizeof(*stat));
        cond_resched();
 
        while (!list_empty(page_list)) {
@@ -1159,10 +1152,10 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                 */
                page_check_dirty_writeback(page, &dirty, &writeback);
                if (dirty || writeback)
-                       nr_dirty++;
+                       stat->nr_dirty++;
 
                if (dirty && !writeback)
-                       nr_unqueued_dirty++;
+                       stat->nr_unqueued_dirty++;
 
                /*
                 * Treat this page as congested if the underlying BDI is or if
@@ -1174,7 +1167,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                if (((dirty || writeback) && mapping &&
                     inode_write_congested(mapping->host)) ||
                    (writeback && PageReclaim(page)))
-                       nr_congested++;
+                       stat->nr_congested++;
 
                /*
                 * If a page at the tail of the LRU is under writeback, there
@@ -1223,7 +1216,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                        if (current_is_kswapd() &&
                            PageReclaim(page) &&
                            test_bit(PGDAT_WRITEBACK, &pgdat->flags)) {
-                               nr_immediate++;
+                               stat->nr_immediate++;
                                goto activate_locked;
 
                        /* Case 2 above */
@@ -1241,7 +1234,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                                 * and it's also appropriate in global reclaim.
                                 */
                                SetPageReclaim(page);
-                               nr_writeback++;
+                               stat->nr_writeback++;
                                goto activate_locked;
 
                        /* Case 3 above */
@@ -1261,7 +1254,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                case PAGEREF_ACTIVATE:
                        goto activate_locked;
                case PAGEREF_KEEP:
-                       nr_ref_keep++;
+                       stat->nr_ref_keep++;
                        goto keep_locked;
                case PAGEREF_RECLAIM:
                case PAGEREF_RECLAIM_CLEAN:
@@ -1326,7 +1319,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                        if (unlikely(PageTransHuge(page)))
                                flags |= TTU_SPLIT_HUGE_PMD;
                        if (!try_to_unmap(page, flags)) {
-                               nr_unmap_fail++;
+                               stat->nr_unmap_fail++;
                                goto activate_locked;
                        }
                }
@@ -1474,7 +1467,7 @@ activate_locked:
                VM_BUG_ON_PAGE(PageActive(page), page);
                if (!PageMlocked(page)) {
                        SetPageActive(page);
-                       pgactivate++;
+                       stat->nr_activate++;
                        count_memcg_page_event(page, PGACTIVATE);
                }
 keep_locked:
@@ -1489,18 +1482,8 @@ keep:
        free_unref_page_list(&free_pages);
 
        list_splice(&ret_pages, page_list);
-       count_vm_events(PGACTIVATE, pgactivate);
-
-       if (stat) {
-               stat->nr_dirty = nr_dirty;
-               stat->nr_congested = nr_congested;
-               stat->nr_unqueued_dirty = nr_unqueued_dirty;
-               stat->nr_writeback = nr_writeback;
-               stat->nr_immediate = nr_immediate;
-               stat->nr_activate = pgactivate;
-               stat->nr_ref_keep = nr_ref_keep;
-               stat->nr_unmap_fail = nr_unmap_fail;
-       }
+       count_vm_events(PGACTIVATE, stat->nr_activate);
+
        return nr_reclaimed;
 }
 
@@ -1512,6 +1495,7 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
                .priority = DEF_PRIORITY,
                .may_unmap = 1,
        };
+       struct reclaim_stat dummy_stat;
        unsigned long ret;
        struct page *page, *next;
        LIST_HEAD(clean_pages);
@@ -1525,7 +1509,7 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
        }
 
        ret = shrink_page_list(&clean_pages, zone->zone_pgdat, &sc,
-                       TTU_IGNORE_ACCESS, NULL, true);
+                       TTU_IGNORE_ACCESS, &dummy_stat, true);
        list_splice(&clean_pages, page_list);
        mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, -ret);
        return ret;
@@ -1630,8 +1614,8 @@ static __always_inline void update_lru_sizes(struct lruvec *lruvec,
 
 }
 
-/*
- * zone_lru_lock is heavily contended.  Some of the functions that
+/**
+ * pgdat->lru_lock is heavily contended.  Some of the functions that
  * shrink the lists perform better by taking out a batch of pages
  * and working on them outside the LRU lock.
  *
@@ -1653,7 +1637,7 @@ static __always_inline void update_lru_sizes(struct lruvec *lruvec,
 static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
                struct lruvec *lruvec, struct list_head *dst,
                unsigned long *nr_scanned, struct scan_control *sc,
-               isolate_mode_t mode, enum lru_list lru)
+               enum lru_list lru)
 {
        struct list_head *src = &lruvec->lists[lru];
        unsigned long nr_taken = 0;
@@ -1662,6 +1646,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
        unsigned long skipped = 0;
        unsigned long scan, total_scan, nr_pages;
        LIST_HEAD(pages_skipped);
+       isolate_mode_t mode = (sc->may_unmap ? 0 : ISOLATE_UNMAPPED);
 
        scan = 0;
        for (total_scan = 0;
@@ -1765,11 +1750,11 @@ int isolate_lru_page(struct page *page)
        WARN_RATELIMIT(PageTail(page), "trying to isolate tail page");
 
        if (PageLRU(page)) {
-               struct zone *zone = page_zone(page);
+               pg_data_t *pgdat = page_pgdat(page);
                struct lruvec *lruvec;
 
-               spin_lock_irq(zone_lru_lock(zone));
-               lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
+               spin_lock_irq(&pgdat->lru_lock);
+               lruvec = mem_cgroup_page_lruvec(page, pgdat);
                if (PageLRU(page)) {
                        int lru = page_lru(page);
                        get_page(page);
@@ -1777,7 +1762,7 @@ int isolate_lru_page(struct page *page)
                        del_page_from_lru_list(page, lruvec, lru);
                        ret = 0;
                }
-               spin_unlock_irq(zone_lru_lock(zone));
+               spin_unlock_irq(&pgdat->lru_lock);
        }
        return ret;
 }
@@ -1899,8 +1884,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
        unsigned long nr_scanned;
        unsigned long nr_reclaimed = 0;
        unsigned long nr_taken;
-       struct reclaim_stat stat = {};
-       isolate_mode_t isolate_mode = 0;
+       struct reclaim_stat stat;
        int file = is_file_lru(lru);
        struct pglist_data *pgdat = lruvec_pgdat(lruvec);
        struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
@@ -1921,13 +1905,10 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
 
        lru_add_drain();
 
-       if (!sc->may_unmap)
-               isolate_mode |= ISOLATE_UNMAPPED;
-
        spin_lock_irq(&pgdat->lru_lock);
 
        nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &page_list,
-                                    &nr_scanned, sc, isolate_mode, lru);
+                                    &nr_scanned, sc, lru);
 
        __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, nr_taken);
        reclaim_stat->recent_scanned[file] += nr_taken;
@@ -2009,9 +1990,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
  * processes, from rmap.
  *
  * If the pages are mostly unmapped, the processing is fast and it is
- * appropriate to hold zone_lru_lock across the whole operation.  But if
+ * appropriate to hold pgdat->lru_lock across the whole operation.  But if
  * the pages are mapped, the processing is slow (page_referenced()) so we
- * should drop zone_lru_lock around each page.  It's impossible to balance
+ * should drop pgdat->lru_lock around each page.  It's impossible to balance
  * this, so instead we remove the pages from the LRU while processing them.
  * It is safe to rely on PG_active against the non-LRU pages in here because
  * nobody will play with that bit on a non-LRU page.
@@ -2084,19 +2065,15 @@ static void shrink_active_list(unsigned long nr_to_scan,
        struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
        unsigned nr_deactivate, nr_activate;
        unsigned nr_rotated = 0;
-       isolate_mode_t isolate_mode = 0;
        int file = is_file_lru(lru);
        struct pglist_data *pgdat = lruvec_pgdat(lruvec);
 
        lru_add_drain();
 
-       if (!sc->may_unmap)
-               isolate_mode |= ISOLATE_UNMAPPED;
-
        spin_lock_irq(&pgdat->lru_lock);
 
        nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold,
-                                    &nr_scanned, sc, isolate_mode, lru);
+                                    &nr_scanned, sc, lru);
 
        __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, nr_taken);
        reclaim_stat->recent_scanned[file] += nr_taken;
@@ -2199,7 +2176,6 @@ static void shrink_active_list(unsigned long nr_to_scan,
  *   10TB     320        32GB
  */
 static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
-                                struct mem_cgroup *memcg,
                                 struct scan_control *sc, bool actual_reclaim)
 {
        enum lru_list active_lru = file * LRU_FILE + LRU_ACTIVE;
@@ -2220,16 +2196,12 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
        inactive = lruvec_lru_size(lruvec, inactive_lru, sc->reclaim_idx);
        active = lruvec_lru_size(lruvec, active_lru, sc->reclaim_idx);
 
-       if (memcg)
-               refaults = memcg_page_state(memcg, WORKINGSET_ACTIVATE);
-       else
-               refaults = node_page_state(pgdat, WORKINGSET_ACTIVATE);
-
        /*
         * When refaults are being observed, it means a new workingset
         * is being established. Disable active list protection to get
         * rid of the stale workingset quickly.
         */
+       refaults = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE);
        if (file && actual_reclaim && lruvec->refaults != refaults) {
                inactive_ratio = 0;
        } else {
@@ -2250,12 +2222,10 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
 }
 
 static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
-                                struct lruvec *lruvec, struct mem_cgroup *memcg,
-                                struct scan_control *sc)
+                                struct lruvec *lruvec, struct scan_control *sc)
 {
        if (is_active_lru(lru)) {
-               if (inactive_list_is_low(lruvec, is_file_lru(lru),
-                                        memcg, sc, true))
+               if (inactive_list_is_low(lruvec, is_file_lru(lru), sc, true))
                        shrink_active_list(nr_to_scan, lruvec, sc, lru);
                return 0;
        }
@@ -2355,7 +2325,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
                         * anonymous pages on the LRU in eligible zones.
                         * Otherwise, the small LRU gets thrashed.
                         */
-                       if (!inactive_list_is_low(lruvec, false, memcg, sc, false) &&
+                       if (!inactive_list_is_low(lruvec, false, sc, false) &&
                            lruvec_lru_size(lruvec, LRU_INACTIVE_ANON, sc->reclaim_idx)
                                        >> sc->priority) {
                                scan_balance = SCAN_ANON;
@@ -2373,7 +2343,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
         * lruvec even if it has plenty of old anonymous pages unless the
         * system is under heavy pressure.
         */
-       if (!inactive_list_is_low(lruvec, true, memcg, sc, false) &&
+       if (!inactive_list_is_low(lruvec, true, sc, false) &&
            lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, sc->reclaim_idx) >> sc->priority) {
                scan_balance = SCAN_FILE;
                goto out;
@@ -2526,7 +2496,7 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc
                                nr[lru] -= nr_to_scan;
 
                                nr_reclaimed += shrink_list(lru, nr_to_scan,
-                                                           lruvec, memcg, sc);
+                                                           lruvec, sc);
                        }
                }
 
@@ -2593,7 +2563,7 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc
         * Even if we did not try to evict anon pages at all, we want to
         * rebalance the anon lru active/inactive ratio.
         */
-       if (inactive_list_is_low(lruvec, false, memcg, sc, true))
+       if (inactive_list_is_low(lruvec, false, sc, true))
                shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
                                   sc, LRU_ACTIVE_ANON);
 }
@@ -2754,16 +2724,15 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
                                   sc->nr_reclaimed - reclaimed);
 
                        /*
-                        * Direct reclaim and kswapd have to scan all memory
-                        * cgroups to fulfill the overall scan target for the
-                        * node.
+                        * Kswapd have to scan all memory cgroups to fulfill
+                        * the overall scan target for the node.
                         *
                         * Limit reclaim, on the other hand, only cares about
                         * nr_to_reclaim pages to be reclaimed and it will
                         * retry with decreasing priority if one round over the
                         * whole hierarchy is not sufficient.
                         */
-                       if (!global_reclaim(sc) &&
+                       if (!current_is_kswapd() &&
                                        sc->nr_reclaimed >= sc->nr_to_reclaim) {
                                mem_cgroup_iter_break(root, memcg);
                                break;
@@ -2993,12 +2962,8 @@ static void snapshot_refaults(struct mem_cgroup *root_memcg, pg_data_t *pgdat)
                unsigned long refaults;
                struct lruvec *lruvec;
 
-               if (memcg)
-                       refaults = memcg_page_state(memcg, WORKINGSET_ACTIVATE);
-               else
-                       refaults = node_page_state(pgdat, WORKINGSET_ACTIVATE);
-
                lruvec = mem_cgroup_lruvec(pgdat, memcg);
+               refaults = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE);
                lruvec->refaults = refaults;
        } while ((memcg = mem_cgroup_iter(root_memcg, memcg, NULL)));
 }
@@ -3363,7 +3328,7 @@ static void age_active_anon(struct pglist_data *pgdat,
        do {
                struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg);
 
-               if (inactive_list_is_low(lruvec, false, memcg, sc, true))
+               if (inactive_list_is_low(lruvec, false, sc, true))
                        shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
                                           sc, LRU_ACTIVE_ANON);
 
@@ -3527,7 +3492,7 @@ static bool kswapd_shrink_node(pg_data_t *pgdat,
  *
  * kswapd scans the zones in the highmem->normal->dma direction.  It skips
  * zones which have free_pages > high_wmark_pages(zone), but once a zone is
- * found to have free_pages <= high_wmark_pages(zone), any page is that zone
+ * found to have free_pages <= high_wmark_pages(zone), any page in that zone
  * or lower is eligible for reclaim until at least one usable zone is
  * balanced.
  */