lib/cache: abort() if emergency cache-clear fails

author Vladimír Čunát <vladimir.cunat@nic.cz>

Tue, 18 Aug 2020 16:45:28 +0000 (18:45 +0200)

committer Petr Špaček <petr.spacek@nic.cz>

Mon, 7 Sep 2020 15:47:11 +0000 (17:47 +0200)
author Vladimír Čunát <vladimir.cunat@nic.cz>
Tue, 18 Aug 2020 16:45:28 +0000 (18:45 +0200)
committer Petr Špaček <petr.spacek@nic.cz>
Mon, 7 Sep 2020 15:47:11 +0000 (17:47 +0200)
diff --git a/lib/cache/api.c b/lib/cache/api.c

index 17da2c10618690f8742f492a2f7ed5103c0a84fc..145afb12688f22b78a2a863714078cb2495b67d7 100644 (file)
--- a/lib/cache/api.c
+++ b/lib/cache/api.c
@@ -72,7 +72,7 @@ static int assert_right_version(struct kr_cache *cache)
         int ret = cache_op(cache, read, &key, &val, 1);
         if (ret == 0 && val.len == sizeof(CACHE_VERSION)
             && memcmp(val.data, &CACHE_VERSION, sizeof(CACHE_VERSION)) == 0) {
-               ret = kr_error(EEXIST);
+               ret = kr_ok();
         } else {
                 int oldret = ret;
                 /* Version doesn't match. Recreate cache and write version key. */
diff --git a/lib/cache/api.h b/lib/cache/api.h

index 6250f904638f0d4cf0d032a15fc6cc8d03a23cae..85bf8d90ae2c89b234b30c304548a722e82e1683 100644 (file)
--- a/lib/cache/api.h
+++ b/lib/cache/api.h
@@ -95,7 +95,8 @@ int kr_cache_insert_rr(struct kr_cache *cache, const knot_rrset_t *rr, const kno
  /**
   * Clear all items from the cache.
   * @param cache cache structure
- * @return 0 or an errcode
+ * @return if nonzero is returned, there's a big problem - you probably want to abort(),
+ *     perhaps except for kr_error(EAGAIN) which probably indicates transient errors.
   */
  KR_EXPORT
  int kr_cache_clear(struct kr_cache *cache);
diff --git a/lib/cache/cdb_lmdb.c b/lib/cache/cdb_lmdb.c

index 796c3c4204efee5047c1aff300d020d916b8b131..81708ee5c73ccf6a7766e6144b79f7968e67432c 100644 (file)
--- a/lib/cache/cdb_lmdb.c
+++ b/lib/cache/cdb_lmdb.c
@@ -484,10 +484,11 @@ static int cdb_clear(knot_db_t *db, struct kr_cdb_stats *stats)
         /* Find if we get a lock on lockfile. */
         ret = open(lockfile, O_CREAT|O_EXCL|O_RDONLY, S_IRUSR);
         if (ret == -1) {
-               kr_log_error("[cache] clearing failed to get ./.cachelock; retry later\n");
+               kr_log_error("[cache] clearing failed to get ./.cachelock (%s); retry later\n",
+                               strerror(errno));
                 /* As we're out of space (almost certainly - mdb_drop didn't work),
                  * we will retry on the next failing write operation. */
-               return kr_error(errno);
+               return kr_error(EAGAIN);
         }
         close(ret);
  
diff --git a/lib/cache/entry_list.c b/lib/cache/entry_list.c

index de90795518849a7d6afb78bc1e514e0f0058a6db..39c57dae54dd0003291bce91685ed6d828381a71 100644 (file)
--- a/lib/cache/entry_list.c
+++ b/lib/cache/entry_list.c
@@ -168,20 +168,25 @@ static int cache_write_or_clear(struct kr_cache *cache, const knot_db_val_t *key
  {
         int ret = cache_op(cache, write, key, val, 1);
         if (!ret) return kr_ok();
-       /* Clear cache if overfull.  Using kres-cache-gc service should prevent this. */
-       if (ret == kr_error(ENOSPC)) {
-               ret = kr_cache_clear(cache);
-               const char *msg = "[cache] clearing because overfull, ret = %d\n";
-               if (ret) {
-                       kr_log_error(msg, ret);
-               } else {
-                       kr_log_info(msg, ret);
-                       ret = kr_error(ENOSPC);
-               }
-               return ret;
+
+       if (ret != kr_error(ENOSPC)) { /* failing a write isn't too bad */
+               VERBOSE_MSG(qry, "=> failed backend write, ret = %d\n", ret);
+               return kr_error(ret);
+       }
+
+       /* Cache is overfull.  Using kres-cache-gc service should prevent this.
+        * As a fallback, try clearing it. */
+       ret = kr_cache_clear(cache);
+       switch (ret) {
+       default:
+               kr_log_error("CRITICAL: clearing cache failed with %s\n",
+                               kr_strerror(ret));
+               abort();
+       case 0:
+               kr_log_info("[cache] overfull cache cleared\n");
+       case -EAGAIN: // fall-through; .cachelock race -> retry later
+               return kr_error(ENOSPC);
         }
-       VERBOSE_MSG(qry, "=> failed backend write, ret = %d\n", ret);
-       return kr_error(ret ? ret : ENOSPC);
  }
author	Vladimír Čunát <vladimir.cunat@nic.cz>
	Tue, 18 Aug 2020 16:45:28 +0000 (18:45 +0200)
committer	Petr Špaček <petr.spacek@nic.cz>
	Mon, 7 Sep 2020 15:47:11 +0000 (17:47 +0200)
lib/cache/api.c		patch \| blob \| blame \| history
lib/cache/api.h		patch \| blob \| blame \| history
lib/cache/cdb_lmdb.c		patch \| blob \| blame \| history
lib/cache/entry_list.c		patch \| blob \| blame \| history