lib/cache: handle MDB_READERS_FULL

author Vladimír Čunát <vladimir.cunat@nic.cz>

Wed, 10 Jul 2019 10:34:11 +0000 (12:34 +0200)

committer Petr Špaček <petr.spacek@nic.cz>

Wed, 10 Jul 2019 11:54:59 +0000 (13:54 +0200)
author Vladimír Čunát <vladimir.cunat@nic.cz>
Wed, 10 Jul 2019 10:34:11 +0000 (12:34 +0200)
committer Petr Špaček <petr.spacek@nic.cz>
Wed, 10 Jul 2019 11:54:59 +0000 (13:54 +0200)
diff --git a/NEWS b/NEWS

index 1a6d7a86e611bfa4f10bcc86b41920ec39a69268..dcf525adb31a8abd2f5e113c6e2262973d9187bf 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -27,6 +27,7 @@ Bugfixes
  - rebinding module: avoid excessive iteration on blocked attempts (!842)
  - rebinding module: fix crash caused by race condition (!842)
  - rebinding module: log each blocked query only in verbose mode (!842)
+- cache: automatically clear stale reader locks (!844)
  
  
  Knot Resolver 4.0.0 (2019-04-18)
diff --git a/lib/cache/cdb_lmdb.c b/lib/cache/cdb_lmdb.c

index db929592e8156db71f8ded3074e681b772a3de83..48129635826116104695e9455ec6b27d26f88f6d 100644 (file)
--- a/lib/cache/cdb_lmdb.c
+++ b/lib/cache/cdb_lmdb.c
@@ -25,6 +25,7 @@
  #include <lmdb.h>
  
  #include "contrib/cleanup.h"
+#include "contrib/ucw/lib.h"
  #include "lib/cache/cdb_lmdb.h"
  #include "lib/cache/cdb_api.h"
  #include "lib/cache/api.h"
@@ -110,36 +111,45 @@ static int set_mapsize(MDB_env *env, size_t map_size)
  }
  
  #define FLAG_RENEW (2*MDB_RDONLY)
-/** mdb_txn_begin or _renew + handle MDB_MAP_RESIZED.
+/** mdb_txn_begin or _renew + handle retries in some situations
   *
- * The retrying logic for MDB_MAP_RESIZED is so ugly that it has its own function.
+ * The retrying logic is so ugly that it has its own function.
   * \note this assumes no transactions are active
   * \return MDB_ errcode, not usual kr_error(...)
   */
  static int txn_get_noresize(struct lmdb_env *env, unsigned int flag, MDB_txn **txn)
  {
         assert(!env->txn.rw && (!env->txn.ro || !env->txn.ro_active));
+       int attempts = 0;
         int ret;
-       if (flag == FLAG_RENEW) {
-               ret = mdb_txn_renew(*txn);
-       } else {
-               ret = mdb_txn_begin(env->env, NULL, flag, txn);
-       }
-       if (ret != MDB_MAP_RESIZED) {
-               return ret;
-       }
-       //:unlikely
-       /* Another process increased the size; let's try to recover. */
-       kr_log_info("[cache] detected size increased by another process\n");
-       ret = mdb_env_set_mapsize(env->env, 0);
-       if (ret != MDB_SUCCESS) {
-               return ret;
+retry:
+       /* Do a few attempts in case we encounter multiple issues at once. */
+       if (++attempts > 2) {
+               return kr_error(1);
         }
+
         if (flag == FLAG_RENEW) {
                 ret = mdb_txn_renew(*txn);
         } else {
                 ret = mdb_txn_begin(env->env, NULL, flag, txn);
         }
+
+       if (unlikely(ret == MDB_MAP_RESIZED)) {
+               kr_log_info("[cache] detected size increased by another process\n");
+               ret = mdb_env_set_mapsize(env->env, 0);
+               if (ret == MDB_SUCCESS) {
+                       goto retry;
+               }
+       } else if (unlikely(ret == MDB_READERS_FULL)) {
+               int cleared;
+               ret = mdb_reader_check(env->env, &cleared);
+               if (ret == MDB_SUCCESS)
+                       kr_log_info("[cache] cleared %d stale reader locks\n", cleared);
+               else
+                       kr_log_error("[cache] failed to clear stale reader locks: "
+                                       "LMDB error %d %s\n", ret, mdb_strerror(ret));
+               goto retry;
+       }
         return ret;
  }
author	Vladimír Čunát <vladimir.cunat@nic.cz>
	Wed, 10 Jul 2019 10:34:11 +0000 (12:34 +0200)
committer	Petr Špaček <petr.spacek@nic.cz>
	Wed, 10 Jul 2019 11:54:59 +0000 (13:54 +0200)
NEWS		patch \| blob \| blame \| history
lib/cache/cdb_lmdb.c		patch \| blob \| blame \| history