* Cache priorities range from BASE to MAX.
*
* For prefetch support, the top half of the range starts at
- * CACHE_PREFETCH_PRIORITY and everytime the buffer is fetched
- * and is at or above this priority level, it is reduced to
- * below this level (refer to libxfs_getbuf).
+ * CACHE_PREFETCH_PRIORITY and everytime the buffer is fetched and is at or
+ * above this priority level, it is reduced to below this level (refer to
+ * libxfs_getbuf).
+ *
+ * If we have dirty nodes, we can't recycle them until they've been cleaned. To
+ * keep these out of the reclaimable lists (as there can be lots of them) give
+ * them their own priority that the shaker doesn't attempt to walk.
*/
#define CACHE_BASE_PRIORITY 0
#define CACHE_PREFETCH_PRIORITY 8
#define CACHE_MAX_PRIORITY 15
+#define CACHE_DIRTY_PRIORITY (CACHE_MAX_PRIORITY + 1)
+#define CACHE_NR_PRIORITIES CACHE_DIRTY_PRIORITY
/*
* Simple, generic implementation of a cache (arbitrary data).
unsigned int c_hashsize; /* hash bucket count */
unsigned int c_hashshift; /* hash key shift */
struct cache_hash *c_hash; /* hash table buckets */
- struct cache_mru c_mrus[CACHE_MAX_PRIORITY + 1];
+ struct cache_mru c_mrus[CACHE_DIRTY_PRIORITY + 1];
unsigned long long c_misses; /* cache misses */
unsigned long long c_hits; /* cache hits */
unsigned int c_max; /* max nodes ever used */
pthread_mutex_init(&cache->c_hash[i].ch_mutex, NULL);
}
- for (i = 0; i <= CACHE_MAX_PRIORITY; i++) {
+ for (i = 0; i <= CACHE_DIRTY_PRIORITY; i++) {
list_head_init(&cache->c_mrus[i].cm_list);
cache->c_mrus[i].cm_count = 0;
pthread_mutex_init(&cache->c_mrus[i].cm_mutex, NULL);
list_head_destroy(&cache->c_hash[i].ch_list);
pthread_mutex_destroy(&cache->c_hash[i].ch_mutex);
}
- for (i = 0; i <= CACHE_MAX_PRIORITY; i++) {
+ for (i = 0; i <= CACHE_DIRTY_PRIORITY; i++) {
list_head_destroy(&cache->c_mrus[i].cm_list);
pthread_mutex_destroy(&cache->c_mrus[i].cm_mutex);
}
}
/*
- * We've hit the limit on cache size, so we need to start reclaiming
- * nodes we've used. The MRU specified by the priority is shaken.
- * Returns new priority at end of the call (in case we call again).
+ * Park unflushable nodes on their own special MRU so that cache_shake() doesn't
+ * end up repeatedly scanning them in the futile attempt to clean them before
+ * reclaim.
+ */
+static void
+cache_add_to_dirty_mru(
+ struct cache *cache,
+ struct cache_node *node)
+{
+ struct cache_mru *mru = &cache->c_mrus[CACHE_DIRTY_PRIORITY];
+
+ pthread_mutex_lock(&mru->cm_mutex);
+ node->cn_priority = CACHE_DIRTY_PRIORITY;
+ list_add(&node->cn_mru, &mru->cm_list);
+ mru->cm_count++;
+ pthread_mutex_unlock(&mru->cm_mutex);
+}
+
+/*
+ * We've hit the limit on cache size, so we need to start reclaiming nodes we've
+ * used. The MRU specified by the priority is shaken. Returns new priority at
+ * end of the call (in case we call again). We are not allowed to reclaim dirty
+ * objects, so we have to flush them first. If flushing fails, we move them to
+ * the "dirty, unreclaimable" list.
+ *
+ * Hence we skip priorities > CACHE_MAX_PRIORITY unless "purge" is set as we
+ * park unflushable (and hence unreclaimable) buffers at these priorities.
+ * Trying to shake unreclaimable buffer lists when there is memory pressure is a
+ * waste of time and CPU and greatly slows down cache node recycling operations.
+ * Hence we only try to free them if we are being asked to purge the cache of
+ * all entries.
*/
static unsigned int
cache_shake(
struct cache * cache,
unsigned int priority,
- int all)
+ bool purge)
{
struct cache_mru *mru;
struct cache_hash * hash;
struct cache_node * node;
unsigned int count;
- ASSERT(priority <= CACHE_MAX_PRIORITY);
- if (priority > CACHE_MAX_PRIORITY)
+ ASSERT(priority <= CACHE_DIRTY_PRIORITY);
+ if (priority > CACHE_MAX_PRIORITY && !purge)
priority = 0;
mru = &cache->c_mrus[priority];
if (pthread_mutex_trylock(&node->cn_mutex) != 0)
continue;
- /* can't release dirty objects */
- if (cache->flush(node)) {
+ /* memory pressure is not allowed to release dirty objects */
+ if (cache->flush(node) && !purge) {
+ list_del(&node->cn_mru);
+ mru->cm_count--;
+ node->cn_priority = -1;
pthread_mutex_unlock(&node->cn_mutex);
+ cache_add_to_dirty_mru(cache, node);
continue;
}
pthread_mutex_unlock(&node->cn_mutex);
count++;
- if (!all && count == CACHE_SHAKE_COUNT)
+ if (!purge && count == CACHE_SHAKE_COUNT)
break;
}
pthread_mutex_unlock(&mru->cm_mutex);
node = cache_node_allocate(cache, key);
if (node)
break;
- priority = cache_shake(cache, priority, 0);
+ priority = cache_shake(cache, priority, false);
/*
* We start at 0; if we free CACHE_SHAKE_COUNT we get
* back the same priority, if not we get back priority+1.
{
int i;
- for (i = 0; i <= CACHE_MAX_PRIORITY; i++)
- cache_shake(cache, i, 1);
+ for (i = 0; i <= CACHE_DIRTY_PRIORITY; i++)
+ cache_shake(cache, i, true);
#ifdef CACHE_DEBUG
if (cache->c_count != 0) {
#define HASH_REPORT (3 * HASH_CACHE_RATIO)
void
cache_report(
- FILE *fp,
- const char *name,
- struct cache *cache)
+ FILE *fp,
+ const char *name,
+ struct cache *cache)
{
- int i;
- unsigned long count, index, total;
- unsigned long hash_bucket_lengths[HASH_REPORT + 2];
+ int i;
+ unsigned long count, index, total;
+ unsigned long hash_bucket_lengths[HASH_REPORT + 2];
if ((cache->c_hits + cache->c_misses) == 0)
return;
i, cache->c_mrus[i].cm_count,
cache->c_mrus[i].cm_count * 100 / cache->c_count);
+ i = CACHE_DIRTY_PRIORITY;
+ fprintf(fp, "Dirty MRU %d entries = %6u (%3u%%)\n",
+ i, cache->c_mrus[i].cm_count,
+ cache->c_mrus[i].cm_count * 100 / cache->c_count);
+
/* report hash bucket lengths */
bzero(hash_bucket_lengths, sizeof(hash_bucket_lengths));