};
#endif
- node n;
+ #define EP_POS_MAX ((page_size - OFFSETOF(struct empty_pages, pages)) / sizeof (void *))
+
+ struct empty_pages {
-struct free_pages {
- list pages; /* List of (struct free_page) keeping free pages without releasing them (hot) */
- list empty; /* List of (struct empty_pages) keeping invalidated pages mapped for us (cold) */
- u16 min, max; /* Minimal and maximal number of free pages kept */
- uint cnt; /* Number of free pages in list */
- event cleanup;
-};
++ struct empty_pages *next;
+ uint pos;
+ void *pages[0];
+ };
+
-static void global_free_pages_cleanup_event(void *);
++DEFINE_DOMAIN(resource);
++static DOMAIN(resource) empty_pages_domain;
++static struct empty_pages *empty_pages = NULL;
+
+static struct free_page * _Atomic page_stack = NULL;
+static _Thread_local struct free_page * local_page_stack = NULL;
-static struct free_pages global_free_pages = {
- .min = KEEP_PAGES_MAIN_MIN,
- .max = KEEP_PAGES_MAIN_MAX,
- .cleanup = { .hook = global_free_pages_cleanup_event },
-};
+static void page_cleanup(void *);
+static event page_cleanup_event = { .hook = page_cleanup, };
+#define SCHEDULE_CLEANUP do if (initialized && !shutting_down) ev_send(&global_event_list, &page_cleanup_event); while (0)
-uint *pages_kept = &global_free_pages.cnt;
+_Atomic int pages_kept = 0;
+_Atomic int pages_kept_locally = 0;
+static int pages_kept_here = 0;
static void *
alloc_sys_page(void)
}
#ifdef HAVE_MMAP
- struct free_pages *fps = &global_free_pages;
-
- /* If there is any free page kept hot, we use it. */
- if (fps->cnt)
++ /* If there is any free page kept hot in this thread, we use it. */
+ struct free_page *fp = local_page_stack;
+ if (fp)
{
- struct free_page *fp = SKIP_BACK(struct free_page, n, HEAD(fps->pages));
- rem_node(&fp->n);
+ local_page_stack = atomic_load_explicit(&fp->next, memory_order_acquire);
+ atomic_fetch_sub_explicit(&pages_kept_locally, 1, memory_order_relaxed);
+ pages_kept_here--;
+ return fp;
+ }
- /* If the hot-free-page cache is getting short, request the cleanup routine to replenish the cache */
- if ((--fps->cnt < fps->min) && !shutting_down)
- ev_schedule(&fps->cleanup);
++ /* If there is any free page kept hot in global storage, we use it. */
+ rcu_read_lock();
+ fp = atomic_load_explicit(&page_stack, memory_order_acquire);
+ while (fp && !atomic_compare_exchange_strong_explicit(
+ &page_stack, &fp, atomic_load_explicit(&fp->next, memory_order_acquire),
+ memory_order_acq_rel, memory_order_acquire))
+ ;
+ rcu_read_unlock();
- if (!fp)
++ if (fp)
+ {
- void *ptr = alloc_sys_page();
- for (int i=1; i<ALLOC_PAGES_AT_ONCE; i++)
- free_page(ptr + page_size * i);
- return ptr;
++ atomic_fetch_sub_explicit(&pages_kept, 1, memory_order_relaxed);
+ return fp;
+ }
+
+ /* If there is any free page kept cold, we use that. */
- if (!EMPTY_LIST(fps->empty))
- {
- struct empty_pages *ep = HEAD(fps->empty);
++ LOCK_DOMAIN(resource, empty_pages_domain);
++ if (empty_pages) {
++ if (empty_pages->pos)
++ /* Either the keeper page contains at least one cold page pointer, return that */
++ fp = empty_pages->pages[--empty_pages->pos];
++ else
++ {
++ /* Or the keeper page has no more cold page pointer, return the keeper page */
++ fp = (struct free_page *) empty_pages;
++ empty_pages = empty_pages->next;
++ }
+ }
++ UNLOCK_DOMAIN(resource, empty_pages_domain);
- atomic_fetch_sub_explicit(&pages_kept, 1, memory_order_relaxed);
- return fp;
- /* Either the keeper page contains at least one cold page pointer, return that */
- if (ep->pos)
- return ep->pages[--ep->pos];
++ if (fp)
++ return fp;
+
- /* Or the keeper page has no more cold page pointer, return the keeper page */
- rem_node(&ep->n);
- return ep;
- }
++ /* And in the worst case, allocate some new pages by mmap() */
++ void *ptr = alloc_sys_page();
++ for (int i=1; i<ALLOC_PAGES_AT_ONCE; i++)
++ free_page(ptr + page_size * i);
+
- /* And in the worst case, allocate a new page by mmap() */
- return alloc_sys_page();
++ return ptr;
#endif
}
}
#ifdef HAVE_MMAP
- struct free_pages *fps = &global_free_pages;
++ /* We primarily try to keep the pages locally. */
struct free_page *fp = ptr;
+ if (shutting_down || (pages_kept_here < KEEP_PAGES_MAX_LOCAL))
+ {
+ atomic_store_explicit(&fp->next, local_page_stack, memory_order_relaxed);
+ atomic_fetch_add_explicit(&pages_kept_locally, 1, memory_order_relaxed);
+ pages_kept_here++;
+ return;
+ }
+
++ /* If there are too many local pages, we add the free page to the global hot-free-page list */
+ rcu_read_lock();
+ struct free_page *next = atomic_load_explicit(&page_stack, memory_order_acquire);
- /* Otherwise, we add the free page to the hot-free-page list */
- fp->n = (node) {};
- add_tail(&fps->pages, &fp->n);
+ do atomic_store_explicit(&fp->next, next, memory_order_release);
+ while (!atomic_compare_exchange_strong_explicit(
+ &page_stack, &next, fp,
+ memory_order_acq_rel, memory_order_acquire));
+ rcu_read_unlock();
- /* And if there are too many hot free pages, we ask for page cleanup */
- if ((++fps->cnt > fps->max) && !shutting_down)
- ev_schedule(&fps->cleanup);
++ /* And if there are too many global hot free pages, we ask for page cleanup */
+ if (atomic_fetch_add_explicit(&pages_kept, 1, memory_order_relaxed) >= KEEP_PAGES_MAX)
+ SCHEDULE_CLEANUP;
#endif
}
++/* When the routine is going to sleep for a long time, we flush the local
++ * hot page cache to not keep dirty pages for nothing. */
+void
+flush_local_pages(void)
+{
+ if (use_fake || !local_page_stack || shutting_down)
+ return;
+
++ /* We first count the pages to enable consistency checking.
++ * Also, we need to know the last page. */
+ struct free_page *last = local_page_stack, *next;
+ int check_count = 1;
+ while (next = atomic_load_explicit(&last->next, memory_order_acquire))
+ {
+ check_count++;
+ last = next;
+ }
+
++ /* The actual number of pages must be equal to the counter value. */
+ ASSERT_DIE(check_count == pages_kept_here);
+
++ /* Repeatedly trying to insert the whole page list into global page stack at once. */
+ rcu_read_lock();
+ next = atomic_load_explicit(&page_stack, memory_order_acquire);
+
++ /* First we set the outwards pointer (from our last),
++ * then we try to set the inwards pointer to our first page. */
+ do atomic_store_explicit(&last->next, next, memory_order_release);
+ while (!atomic_compare_exchange_strong_explicit(
+ &page_stack, &next, local_page_stack,
+ memory_order_acq_rel, memory_order_acquire));
+ rcu_read_unlock();
+
++ /* Finished. Now the local stack is empty. */
+ local_page_stack = NULL;
+ pages_kept_here = 0;
+
++ /* Check the state of global page cache and maybe schedule its cleanup. */
+ atomic_fetch_sub_explicit(&pages_kept_locally, check_count, memory_order_relaxed);
+ if (atomic_fetch_add_explicit(&pages_kept, check_count, memory_order_relaxed) >= KEEP_PAGES_MAX)
+ SCHEDULE_CLEANUP;
+}
+
#ifdef HAVE_MMAP
static void
-global_free_pages_cleanup_event(void *data UNUSED)
+page_cleanup(void *_ UNUSED)
{
+ /* Cleanup on shutdown is ignored. All pages may be kept hot, OS will take care. */
if (shutting_down)
return;
- struct free_pages *fps = &global_free_pages;
+ struct free_page *stack = atomic_exchange_explicit(&page_stack, NULL, memory_order_acq_rel);
+ if (!stack)
+ return;
- synchronize_rcu();
- /* Cleanup may get called when hot free page cache is short of pages. Replenishing. */
- while (fps->cnt / 2 < fps->min)
- {
- struct free_page *fp = alloc_sys_page();
- fp->n = (node) {};
- add_tail(&fps->pages, &fp->n);
- fps->cnt++;
- }
++ /* Cleanup gets called when hot free page cache is too big.
++ * Moving some pages to the cold free page cache. */
- /* Or the hot free page cache is too big. Moving some pages to the cold free page cache. */
- for (int limit = CLEANUP_PAGES_BULK; limit && (fps->cnt > fps->max / 2); fps->cnt--, limit--)
- {
- struct free_page *fp = SKIP_BACK(struct free_page, n, TAIL(fps->pages));
- rem_node(&fp->n);
+ do {
- struct free_page *f = stack;
- stack = atomic_load_explicit(&f->next, memory_order_acquire);
-
- if (munmap(f, page_size) == 0)
- continue;
- else if (errno != ENOMEM)
- bug("munmap(%p) failed: %m", f);
++ synchronize_rcu();
++ struct free_page *fp = stack;
++ stack = atomic_load_explicit(&fp->next, memory_order_acquire);
+
++ LOCK_DOMAIN(resource, empty_pages_domain);
+ /* Empty pages are stored as pointers. To store them, we need a pointer block. */
- struct empty_pages *ep;
- if (EMPTY_LIST(fps->empty) || ((ep = HEAD(fps->empty))->pos == EP_POS_MAX))
++ if (!empty_pages || (empty_pages->pos == EP_POS_MAX))
+ {
+ /* There is either no pointer block or the last block is full. We use this block as a pointer block. */
- ep = (struct empty_pages *) fp;
- *ep = (struct empty_pages) {};
- add_head(&fps->empty, &ep->n);
++ empty_pages = (struct empty_pages *) fp;
++ *empty_pages = (struct empty_pages) {};
+ }
else
- free_page(f);
+ {
+ /* We store this block as a pointer into the first free place
+ * and tell the OS that the underlying memory is trash. */
- ep->pages[ep->pos++] = fp;
++ empty_pages->pages[empty_pages->pos++] = fp;
+ if (madvise(fp, page_size,
+ #ifdef CONFIG_MADV_DONTNEED_TO_FREE
+ MADV_DONTNEED
+ #else
+ MADV_FREE
+ #endif
+ ) < 0)
+ bug("madvise(%p) failed: %m", fp);
+ }
++ UNLOCK_DOMAIN(resource, empty_pages_domain);
}
+ while ((atomic_fetch_sub_explicit(&pages_kept, 1, memory_order_relaxed) >= KEEP_PAGES_MAX / 2) && stack);
- /* If the hot free page cleanup hit the limit, re-schedule this routine
- * to allow for other routines to run. */
- if (fps->cnt > fps->max)
- ev_schedule(&fps->cleanup);
+ while (stack)
+ {
+ struct free_page *f = stack;
+ stack = atomic_load_explicit(&f->next, memory_order_acquire);
+ free_page(f);
+
+ atomic_fetch_sub_explicit(&pages_kept, 1, memory_order_relaxed);
+ }
}
#endif
resource_sys_init(void)
{
#ifdef HAVE_MMAP
- ASSERT_DIE(global_free_pages.cnt == 0);
-
+ /* Check what page size the system supports */
if (!(page_size = sysconf(_SC_PAGESIZE)))
die("System page size must be non-zero");
- if (u64_popcount(page_size) == 1)
+ if ((u64_popcount(page_size) == 1) && (page_size >= (1 << 10)) && (page_size <= (1 << 18)))
{
+ /* We assume that page size has only one bit and is between 1K and 256K (incl.).
+ * Otherwise, the assumptions in lib/slab.c (sl_head's num_full range) aren't met. */
- for (int i = 0; i < (KEEP_PAGES_MIN * 2); i++)
- free_page(alloc_page());
- struct free_pages *fps = &global_free_pages;
--
- page_cleanup(NULL);
- init_list(&fps->pages);
- init_list(&fps->empty);
- global_free_pages_cleanup_event(NULL);
++ empty_pages_domain = DOMAIN_NEW(resource, "Empty Pages");
+ initialized = 1;
return;
}