]> git.ipfire.org Git - thirdparty/kernel/linux.git/blobdiff - mm/page_alloc.c
mm/page_alloc: convert zone_pcp_update() to rely on memory barriers instead of stop_m...
[thirdparty/kernel/linux.git] / mm / page_alloc.c
index c3edb624fccf30c303ccff94cf139e090b416d7e..8125263be60f98f848acd1ee375a3d8d3f199baa 100644 (file)
@@ -65,6 +65,9 @@
 #include <asm/div64.h>
 #include "internal.h"
 
+/* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */
+static DEFINE_MUTEX(pcp_batch_high_lock);
+
 #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID
 DEFINE_PER_CPU(int, numa_node);
 EXPORT_PER_CPU_SYMBOL(numa_node);
@@ -1179,10 +1182,12 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
 {
        unsigned long flags;
        int to_drain;
+       unsigned long batch;
 
        local_irq_save(flags);
-       if (pcp->count >= pcp->batch)
-               to_drain = pcp->batch;
+       batch = ACCESS_ONCE(pcp->batch);
+       if (pcp->count >= batch)
+               to_drain = batch;
        else
                to_drain = pcp->count;
        if (to_drain > 0) {
@@ -1350,8 +1355,9 @@ void free_hot_cold_page(struct page *page, int cold)
                list_add(&page->lru, &pcp->lists[migratetype]);
        pcp->count++;
        if (pcp->count >= pcp->high) {
-               free_pcppages_bulk(zone, pcp->batch, pcp);
-               pcp->count -= pcp->batch;
+               unsigned long batch = ACCESS_ONCE(pcp->batch);
+               free_pcppages_bulk(zone, batch, pcp);
+               pcp->count -= batch;
        }
 
 out:
@@ -4032,6 +4038,39 @@ static int __meminit zone_batchsize(struct zone *zone)
 #endif
 }
 
+/*
+ * pcp->high and pcp->batch values are related and dependent on one another:
+ * ->batch must never be higher then ->high.
+ * The following function updates them in a safe manner without read side
+ * locking.
+ *
+ * Any new users of pcp->batch and pcp->high should ensure they can cope with
+ * those fields changing asynchronously (acording the the above rule).
+ *
+ * mutex_is_locked(&pcp_batch_high_lock) required when calling this function
+ * outside of boot time (or some other assurance that no concurrent updaters
+ * exist).
+ */
+static void pageset_update(struct per_cpu_pages *pcp, unsigned long high,
+               unsigned long batch)
+{
+       /* start with a fail safe value for batch */
+       pcp->batch = 1;
+       smp_wmb();
+
+       /* Update high, then batch, in order */
+       pcp->high = high;
+       smp_wmb();
+
+       pcp->batch = batch;
+}
+
+/* a companion to setup_pagelist_highmark() */
+static void pageset_set_batch(struct per_cpu_pageset *p, unsigned long batch)
+{
+       pageset_update(&p->pcp, 6 * batch, max(1UL, 1 * batch));
+}
+
 static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
 {
        struct per_cpu_pages *pcp;
@@ -4041,8 +4080,7 @@ static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
 
        pcp = &p->pcp;
        pcp->count = 0;
-       pcp->high = 6 * batch;
-       pcp->batch = max(1UL, 1 * batch);
+       pageset_set_batch(p, batch);
        for (migratetype = 0; migratetype < MIGRATE_PCPTYPES; migratetype++)
                INIT_LIST_HEAD(&pcp->lists[migratetype]);
 }
@@ -4051,17 +4089,14 @@ static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
  * setup_pagelist_highmark() sets the high water mark for hot per_cpu_pagelist
  * to the value high for the pageset p.
  */
-
 static void setup_pagelist_highmark(struct per_cpu_pageset *p,
                                unsigned long high)
 {
-       struct per_cpu_pages *pcp;
+       unsigned long batch = max(1UL, high / 4);
+       if ((high / 4) > (PAGE_SHIFT * 8))
+               batch = PAGE_SHIFT * 8;
 
-       pcp = &p->pcp;
-       pcp->high = high;
-       pcp->batch = max(1UL, high/4);
-       if ((high/4) > (PAGE_SHIFT * 8))
-               pcp->batch = PAGE_SHIFT * 8;
+       pageset_update(&p->pcp, high, batch);
 }
 
 static void __meminit setup_zone_pageset(struct zone *zone)
@@ -5551,6 +5586,8 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
        ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
        if (!write || (ret < 0))
                return ret;
+
+       mutex_lock(&pcp_batch_high_lock);
        for_each_populated_zone(zone) {
                for_each_possible_cpu(cpu) {
                        unsigned long  high;
@@ -5559,6 +5596,7 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
                                per_cpu_ptr(zone->pageset, cpu), high);
                }
        }
+       mutex_unlock(&pcp_batch_high_lock);
        return 0;
 }
 
@@ -6047,32 +6085,19 @@ void free_contig_range(unsigned long pfn, unsigned nr_pages)
 #endif
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-static int __meminit __zone_pcp_update(void *data)
-{
-       struct zone *zone = data;
-       int cpu;
-       unsigned long batch = zone_batchsize(zone), flags;
-
-       for_each_possible_cpu(cpu) {
-               struct per_cpu_pageset *pset;
-               struct per_cpu_pages *pcp;
-
-               pset = per_cpu_ptr(zone->pageset, cpu);
-               pcp = &pset->pcp;
-
-               local_irq_save(flags);
-               if (pcp->count > 0)
-                       free_pcppages_bulk(zone, pcp->count, pcp);
-               drain_zonestat(zone, pset);
-               setup_pageset(pset, batch);
-               local_irq_restore(flags);
-       }
-       return 0;
-}
-
+/*
+ * The zone indicated has a new number of managed_pages; batch sizes and percpu
+ * page high values need to be recalulated.
+ */
 void __meminit zone_pcp_update(struct zone *zone)
 {
-       stop_machine(__zone_pcp_update, zone, NULL);
+       unsigned cpu;
+       unsigned long batch;
+       mutex_lock(&pcp_batch_high_lock);
+       batch = zone_batchsize(zone);
+       for_each_possible_cpu(cpu)
+               pageset_set_batch(per_cpu_ptr(zone->pageset, cpu), batch);
+       mutex_unlock(&pcp_batch_high_lock);
 }
 #endif