]> git.ipfire.org Git - thirdparty/openldap.git/commitdiff
Page-level encryption support
authorHoward Chu <hyc@openldap.org>
Thu, 29 Jun 2017 15:18:52 +0000 (16:18 +0100)
committerHoward Chu <hyc@openldap.org>
Sat, 10 Oct 2020 11:51:54 +0000 (12:51 +0100)
Currently encrypts all but the meta pages
Still needs to store/retrieve the initialization vector

libraries/liblmdb/lmdb.h
libraries/liblmdb/mdb.c
libraries/liblmdb/midl.h

index 7eb0a1ea16fa833d525be4020cea4a223b520193..c471706d8b336a5948fbb6f22cb53b8118063284 100644 (file)
@@ -312,11 +312,28 @@ typedef int  (MDB_cmp_func)(const MDB_val *a, const MDB_val *b);
  */
 typedef void (MDB_rel_func)(MDB_val *item, void *oldptr, void *newptr, void *relctx);
 
+#if MDB_RPAGE_CACHE
+/** @brief A callback function used to encrypt/decrypt pages in the env.
+ *
+ * Encrypt or decrypt the data in src and store the result in dst using the
+ * provided key. The result must be the same number of bytes as the input.
+ * The input size will always be a multiple of the page size.
+ * @param[in] src The input data to be transformed.
+ * @param[out] dst Storage for the result.
+ * @param[in] key An array of two values: key[0] is the encryption key,
+ * and key[1] is the initialization vector.
+ * @param[in] encdec 1 to encrypt, 0 to decrypt.
+ */
+typedef void (MDB_enc_func)(const MDB_val *src, MDB_val *dst, const MDB_val *key, int encdec);
+#endif
+
 /** @defgroup  mdb_env Environment Flags
  *     @{
  */
        /** mmap at a fixed address (experimental) */
 #define MDB_FIXEDMAP   0x01
+       /** encrypted DB - read-only flag, set by #mdb_env_set_encrypt() */
+#define MDB_ENCRYPT            0x2000U
        /** no environment directory */
 #define MDB_NOSUBDIR   0x4000
        /** don't fsync after commit */
@@ -975,6 +992,20 @@ typedef void MDB_assert_func(MDB_env *env, const char *msg);
         */
 int  mdb_env_set_assert(MDB_env *env, MDB_assert_func *func);
 
+#if MDB_RPAGE_CACHE
+       /** @brief Set encryption on an environment.
+        *
+        * This must be called before #mdb_env_open(). It implicitly sets #MDB_REMAP_CHUNK
+        * on the env.
+        * @param[in] env An environment handle returned by #mdb_env_create().
+        * @param[in] func An #MDB_enc_func function.
+        * @param[in] key An array of two values: key[0] is the encryption key,
+        * and key[1] is the initialization vector.
+        * @return A non-zero error value on failure and 0 on success.
+        */
+int mdb_env_set_encrypt(MDB_env *env, MDB_enc_func *func, const MDB_val *key);
+#endif
+
        /** @brief Create a transaction for use with the environment.
         *
         * The transaction handle may be discarded using #mdb_txn_abort() or #mdb_txn_commit().
index 81d1649b8d133ce604e50fa8304d367fb00d7ba7..f2c3557fcc0d3ab4dcab1edca2e71cd4ed027c7c 100644 (file)
@@ -1521,6 +1521,8 @@ struct MDB_env {
 #define MDB_ERPAGE_SIZE        16384
 #define MDB_ERPAGE_MAX (MDB_ERPAGE_SIZE-1)
        unsigned int me_rpcheck;
+       MDB_enc_func *me_encfunc;       /**< encrypt env data */
+       MDB_val         me_enckey[2];   /**< key and IV for env encryption */
 #endif
        void            *me_userctx;     /**< User-settable context */
        MDB_assert_func *me_assert_func; /**< Callback for assertion failures */
@@ -1985,7 +1987,7 @@ mdb_dcmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b)
  * Set #MDB_TXN_ERROR on failure.
  */
 static MDB_page *
-mdb_page_malloc(MDB_txn *txn, unsigned num)
+mdb_page_malloc(MDB_txn *txn, unsigned num, int init)
 {
        MDB_env *env = txn->mt_env;
        MDB_page *ret = env->me_dpages;
@@ -2009,7 +2011,7 @@ mdb_page_malloc(MDB_txn *txn, unsigned num)
        }
        if ((ret = malloc(sz)) != NULL) {
                VGMEMP_ALLOC(env, ret, sz);
-               if (!(env->me_flags & MDB_NOMEMINIT)) {
+               if (init && !(env->me_flags & MDB_NOMEMINIT)) {
                        memset((char *)ret + off, 0, psize);
                        ret->mp_pad = 0;
                }
@@ -2043,6 +2045,24 @@ mdb_dpage_free(MDB_env *env, MDB_page *dp)
        }
 }
 
+#if MDB_RPAGE_CACHE
+/** Free an encrypted dirty page
+ * We can't check if it's an overflow page,
+ * caller must tell us how many are being freed.
+ */
+static void
+mdb_dpage_free_n(MDB_env *env, MDB_page *dp, int num)
+{
+       if (num == 1)
+               mdb_page_free(env, dp);
+       else {
+               /* large pages just get freed directly */
+               VGMEMP_FREE(env, dp);
+               free(dp);
+       }
+}
+#endif
+
 /**    Return all dirty pages to dpage list */
 static void
 mdb_dlist_free(MDB_txn *txn)
@@ -2589,7 +2609,7 @@ search_done:
        if (env->me_flags & MDB_WRITEMAP) {
                np = (MDB_page *)(env->me_map + env->me_psize * pgno);
        } else {
-               if (!(np = mdb_page_malloc(txn, num))) {
+               if (!(np = mdb_page_malloc(txn, num, 1))) {
                        rc = ENOMEM;
                        goto fail;
                }
@@ -2669,7 +2689,7 @@ mdb_page_unspill(MDB_txn *txn, MDB_page *mp, MDB_page **ret)
                        if (env->me_flags & MDB_WRITEMAP) {
                                np = mp;
                        } else {
-                               np = mdb_page_malloc(txn, num);
+                               np = mdb_page_malloc(txn, num, 1);
                                if (!np)
                                        return ENOMEM;
                                if (num > 1)
@@ -2757,7 +2777,7 @@ mdb_page_touch(MDB_cursor *mc)
                }
                mdb_cassert(mc, dl[0].mid < MDB_IDL_UM_MAX);
                /* No - copy it */
-               np = mdb_page_malloc(txn, 1);
+               np = mdb_page_malloc(txn, 1, 1);
                if (!np)
                        return ENOMEM;
                mid.mid = pgno;
@@ -3372,13 +3392,23 @@ mdb_txn_end(MDB_txn *txn, unsigned mode)
                        if (tl[i].mid & (MDB_RPAGE_CHUNK-1)) {
                                /* tmp overflow pages that we didn't share in env */
                                munmap(tl[i].mptr, tl[i].mcnt * env->me_psize);
+                               if (tl[i].menc) {
+                                       free(tl[i].menc);
+                                       tl[i].menc = NULL;
+                               }
                        } else {
                                x = mdb_mid3l_search(el, tl[i].mid);
                                if (tl[i].mptr == el[x].mptr) {
                                        el[x].mref--;
+                                       if (!el[x].mref)
+                                               el[x].muse = 0;
                                } else {
                                        /* another tmp overflow page */
                                        munmap(tl[i].mptr, tl[i].mcnt * env->me_psize);
+                                       if (tl[i].menc) {
+                                               free(tl[i].menc);
+                                               tl[i].menc = NULL;
+                                       }
                                }
                        }
                }
@@ -3628,6 +3658,10 @@ mdb_page_flush(MDB_txn *txn, int keep)
        off_t           pos = 0;
        pgno_t          pgno = 0;
        MDB_page        *dp = NULL;
+#if MDB_RPAGE_CACHE
+       int                     nump = 1;
+       MDB_page        *encp;
+#endif
 #ifdef _WIN32
        OVERLAPPED      ov;
 #else
@@ -3669,11 +3703,33 @@ mdb_page_flush(MDB_txn *txn, int keep)
                        dp->mp_flags &= ~P_DIRTY;
                        pos = pgno * psize;
                        size = psize;
+#if MDB_RPAGE_CACHE
+                       nump = 1;
+                       if (IS_OVERFLOW(dp)) {
+                               nump = dp->mp_pages;
+                               size *= dp->mp_pages;
+                       }
+#else
                        if (IS_OVERFLOW(dp)) size *= dp->mp_pages;
+#endif
                }
 #ifdef _WIN32
                else break;
 
+#if MDB_RPAGE_CACHE
+               if (env->me_encfunc) {
+                       MDB_val in, out;
+                       encp = mdb_page_malloc(txn, nump. 0);
+                       if (!encp)
+                               return ENOMEM;
+                       in.mv_size = size;
+                       in.mv_data = dp;
+                       out.mv_size = size;
+                       out.mv_data = encp;
+                       env->me_encfunc(&in, &out, env->me_enckey, 1);
+                       dp = encp;
+               }
+#endif
                /* Windows actually supports scatter/gather I/O, but only on
                 * unbuffered file handles. Since we're relying on the OS page
                 * cache for all our data, that's self-defeating. So we just
@@ -3685,8 +3741,14 @@ mdb_page_flush(MDB_txn *txn, int keep)
                memset(&ov, 0, sizeof(ov));
                ov.Offset = pos & 0xffffffff;
                ov.OffsetHigh = pos >> 16 >> 16;
-               if (!WriteFile(env->me_fd, dp, size, NULL, &ov)) {
+               rc = 0;
+               if (!WriteFile(env->me_fd, dp, size, NULL, &ov))
                        rc = ErrCode();
+#if MDB_RPAGE_CACHE
+               if (env->me_encfunc)
+                       mdb_dpage_free_n(env, dp, nump);
+#endif
+               if (rc) {
                        DPRINTF(("WriteFile: %d", rc));
                        return rc;
                }
@@ -3695,6 +3757,7 @@ mdb_page_flush(MDB_txn *txn, int keep)
                if (pos!=next_pos || n==MDB_COMMIT_PAGES || wsize+size>MAX_WRITE) {
                        if (n) {
 retry_write:
+                               rc = 0;
                                /* Write previous page(s) */
 #ifdef MDB_USE_PWRITEV
                                wres = pwritev(env->me_fd, iov, n, wpos);
@@ -3708,9 +3771,11 @@ retry_seek:
                                                if (rc == EINTR)
                                                        goto retry_seek;
                                                DPRINTF(("lseek: %s", strerror(rc)));
-                                               return rc;
+                                               wres = wsize;
+                                       } else {
+                                               rc = 0;
+                                               wres = writev(env->me_fd, iov, n);
                                        }
-                                       wres = writev(env->me_fd, iov, n);
                                }
 #endif
                                if (wres != wsize) {
@@ -3723,8 +3788,18 @@ retry_seek:
                                                rc = EIO; /* TODO: Use which error code? */
                                                DPUTS("short write, filesystem full?");
                                        }
-                                       return rc;
                                }
+#if MDB_RPAGE_CACHE
+                               if (env->me_encfunc) {
+                                       int j, num1;
+                                       for (j=0; j<n; j++) {
+                                               num1 = 1 + (iov[j].iov_len > psize);
+                                               mdb_dpage_free_n(env, (MDB_page *)iov[j].iov_base, num1);
+                                       }
+                               }
+#endif
+                               if (rc)
+                                       return rc;
                                n = 0;
                        }
                        if (i > pagecount)
@@ -3732,6 +3807,20 @@ retry_seek:
                        wpos = pos;
                        wsize = 0;
                }
+#if MDB_RPAGE_CACHE
+               if (env->me_encfunc) {
+                       MDB_val in, out;
+                       encp = mdb_page_malloc(txn, nump, 0);
+                       if (!encp)
+                               return ENOMEM;
+                       in.mv_size = size;
+                       in.mv_data = dp;
+                       out.mv_size = size;
+                       out.mv_data = encp;
+                       env->me_encfunc(&in, &out, env->me_enckey, 1);
+                       dp = encp;
+               }
+#endif
                DPRINTF(("committing page %"Yu, pgno));
                next_pos = pos + size;
                iov[n].iov_len = size;
@@ -4820,6 +4909,8 @@ mdb_env_open2(MDB_env *env)
                        return i;
                }
        }
+       if ((env->me_flags ^ env->me_metas[0]->mm_flags) & MDB_ENCRYPT)
+               return MDB_INCOMPATIBLE;
 
        env->me_maxfree_1pg = (env->me_psize - PAGEHDRSZ) / sizeof(pgno_t) - 1;
        env->me_nodemax = (((env->me_psize - PAGEHDRSZ) / MDB_MINKEYS) & -2)
@@ -5660,6 +5751,9 @@ mdb_env_close(MDB_env *env)
        }
 
        mdb_env_close_active(env, 0);
+#if MDB_RPAGE_CACHE
+       free(env->me_enckey[0].mv_data);
+#endif
        free(env);
 }
 
@@ -5909,6 +6003,8 @@ mdb_cursor_push(MDB_cursor *mc, MDB_page *mp)
 }
 
 #if MDB_RPAGE_CACHE
+static void mdb_rpage_decrypt(MDB_env *env, MDB_ID3 *id3, int rem, int numpgs);
+
 /** Map a read-only page.
  * There are two levels of tracking in use, a per-txn list and a per-env list.
  * ref'ing and unref'ing the per-txn list is faster since it requires no
@@ -5966,6 +6062,7 @@ mdb_rpage_get(MDB_txn *txn, pgno_t pg0, int numpgs, MDB_page **ret)
        MDB_ID3L tl = txn->mt_rpages;
        MDB_ID3L el = env->me_rpages;
        MDB_ID3 id3;
+       char *base;
        unsigned x, rem;
        pgno_t pgno;
        int rc, retries = 1;
@@ -5994,6 +6091,8 @@ mdb_rpage_get(MDB_txn *txn, pgno_t pg0, int numpgs, MDB_page **ret)
        pgno = pg0 ^ rem;
 
        id3.mid = 0;
+       id3.menc = NULL;
+       id3.muse = 0;
        x = mdb_mid3l_search(tl, pgno);
        if (x <= tl[0].mid && tl[x].mid == pgno) {
                if (x != tl[0].mid && tl[x+1].mid == pg0)
@@ -6006,6 +6105,12 @@ mdb_rpage_get(MDB_txn *txn, pgno_t pg0, int numpgs, MDB_page **ret)
                        MAP(rc, env, id3.mptr, len, off);
                        if (rc)
                                return rc;
+                       /* setup for encryption */
+                       if (env->me_encfunc) {
+                               id3.menc = malloc(len);
+                               if (!id3.menc)
+                                       return ENOMEM;
+                       }
                        /* check for local-only page */
                        if (rem) {
                                mdb_tassert(txn, tl[x].mid != pg0);
@@ -6019,6 +6124,10 @@ mdb_rpage_get(MDB_txn *txn, pgno_t pg0, int numpgs, MDB_page **ret)
                                /* ignore the mapping we got from env, use new one */
                                tl[x].mptr = id3.mptr;
                                tl[x].mcnt = id3.mcnt;
+                               if (tl[x].menc)
+                                       free(tl[x].menc);
+                               tl[x].menc = id3.menc;
+                               tl[x].muse = id3.muse;
                                /* if no active ref, see if we can replace in env */
                                if (!tl[x].mref) {
                                        unsigned i;
@@ -6029,6 +6138,10 @@ mdb_rpage_get(MDB_txn *txn, pgno_t pg0, int numpgs, MDB_page **ret)
                                                munmap(el[i].mptr, el[i].mcnt * env->me_psize);
                                                el[i].mptr = tl[x].mptr;
                                                el[i].mcnt = tl[x].mcnt;
+                                               if (el[i].menc)
+                                                       free(el[i].menc);
+                                               el[i].menc = tl[x].menc;
+                                               el[i].muse = tl[x].muse;
                                        } else {
                                                /* there are others, remove ourself */
                                                el[i].mref--;
@@ -6039,7 +6152,13 @@ mdb_rpage_get(MDB_txn *txn, pgno_t pg0, int numpgs, MDB_page **ret)
                }
                id3.mptr = tl[x].mptr;
                id3.mcnt = tl[x].mcnt;
+               id3.menc = tl[x].menc;
+               id3.muse = tl[x].muse;
                tl[x].mref++;
+               if (env->me_encfunc) {
+                       mdb_rpage_decrypt(env, &id3, rem, numpgs);
+                       tl[x].muse = id3.muse;
+               }
                goto ok;
        }
 
@@ -6056,6 +6175,8 @@ retry:
                                /* tmp overflow pages don't go to env */
                                if (tl[i].mid & (MDB_RPAGE_CHUNK-1)) {
                                        munmap(tl[i].mptr, tl[i].mcnt * env->me_psize);
+                                       if (tl[i].menc)
+                                               free(tl[i].menc);
                                        continue;
                                }
                                x = mdb_mid3l_search(el, tl[i].mid);
@@ -6103,23 +6224,45 @@ retry:
                x = mdb_mid3l_search(el, pgno);
                if (x <= el[0].mid && el[x].mid == pgno) {
                        id3.mptr = el[x].mptr;
+                       id3.menc = el[x].menc;
+                       id3.muse = el[x].muse;
                        /* check for overflow size */
                        if (id3.mcnt > el[x].mcnt) {
                                SET_OFF(off, pgno * env->me_psize);
                                MAP(rc, env, id3.mptr, len, off);
                                if (rc)
                                        goto fail;
+                               if (env->me_encfunc) {
+                                       id3.menc = malloc(len);
+                                       if (!id3.menc) {
+                                               rc = ENOMEM;
+                                               goto fail;
+                                       }
+                                       id3.muse = 0;
+                               }
                                if (!el[x].mref) {
                                        munmap(el[x].mptr, env->me_psize * el[x].mcnt);
                                        el[x].mptr = id3.mptr;
                                        el[x].mcnt = id3.mcnt;
+                                       if (el[x].menc)
+                                               free(el[x].menc);
+                                       el[x].menc = id3.menc;
+                                       el[x].muse = id3.muse;
                                } else {
                                        id3.mid = pg0;
+                                       if (env->me_encfunc) {
+                                               mdb_rpage_decrypt(env, &id3, rem, numpgs);
+                                               el[x].muse = id3.muse;
+                                       }
                                        pthread_mutex_unlock(&env->me_rpmutex);
                                        goto found;
                                }
                        }
                        el[x].mref++;
+                       if (env->me_encfunc) {
+                               mdb_rpage_decrypt(env, &id3, rem, numpgs);
+                               el[x].muse = id3.muse;
+                       }
                        pthread_mutex_unlock(&env->me_rpmutex);
                        goto found;
                }
@@ -6130,6 +6273,8 @@ retry:
                                if (!el[i].mref) {
                                        if (!y) y = i;
                                        munmap(el[i].mptr, env->me_psize * el[i].mcnt);
+                                       if (el[i].menc)
+                                               free(el[i].menc);
                                }
                        }
                        if (!y) {
@@ -6162,6 +6307,14 @@ fail:
                        pthread_mutex_unlock(&env->me_rpmutex);
                        return rc;
                }
+               if (env->me_encfunc) {
+                       id3.menc = malloc(len);
+                       if (!id3.menc) {
+                               rc = ENOMEM;
+                               goto fail;
+                       }
+                       mdb_rpage_decrypt(env, &id3, rem, numpgs);
+               }
                mdb_mid3l_insert(el, &id3);
                pthread_mutex_unlock(&env->me_rpmutex);
 found:
@@ -6170,7 +6323,10 @@ found:
                return MDB_TXN_FULL;
        }
 ok:
-       p = (MDB_page *)((char *)id3.mptr + rem * env->me_psize);
+       base = (char *)(env->me_encfunc ? id3.menc : id3.mptr);
+       p = (MDB_page *)(base + rem * env->me_psize);
+       if (env->me_encfunc)
+               mdb_rpage_decrypt(env, &id3, rem, numpgs);
 #if MDB_DEBUG  /* we don't need this check any more */
        if (IS_OVERFLOW(p)) {
                mdb_tassert(txn, p->mp_pages + rem <= id3.mcnt);
@@ -6179,6 +6335,19 @@ ok:
        *ret = p;
        return MDB_SUCCESS;
 }
+
+static void mdb_rpage_decrypt(MDB_env *env, MDB_ID3 *id3, int rem, int numpgs)
+{
+       if (!(id3->muse & (1 << rem))) {
+               MDB_val in, out;
+               id3->muse |= (1 << rem);
+               in.mv_size = numpgs * env->me_psize;
+               in.mv_data = (char *)id3->mptr + rem * env->me_psize;
+               out.mv_size = in.mv_size;
+               out.mv_data = (char *)id3->menc + rem * env->me_psize;
+               env->me_encfunc(&in, &out, env->me_enckey, 0);
+       }
+}
 #endif
 
 /** Find the address of the page corresponding to a given page number.
@@ -6557,15 +6726,14 @@ mdb_node_read(MDB_cursor *mc, MDB_node *leaf, MDB_val *data)
                MDB_PAGE_UNREF(mc->mc_txn, MC_OVPG(mc));
                MC_SET_OVPG(mc, NULL);
        }
+       data->mv_size = NODEDSZ(leaf);
        if (!F_ISSET(leaf->mn_flags, F_BIGDATA)) {
-               data->mv_size = NODEDSZ(leaf);
                data->mv_data = NODEDATA(leaf);
                return MDB_SUCCESS;
        }
 
        /* Read overflow data.
         */
-       data->mv_size = NODEDSZ(leaf);
        memcpy(&pgno, NODEDATA(leaf), sizeof(pgno));
        {
 #if MDB_RPAGE_CACHE
@@ -7702,7 +7870,7 @@ current:
                                if (level > 1) {
                                        /* It is writable only in a parent txn */
                                        size_t sz = (size_t) env->me_psize * ovpages, off;
-                                       MDB_page *np = mdb_page_malloc(mc->mc_txn, ovpages);
+                                       MDB_page *np = mdb_page_malloc(mc->mc_txn, ovpages, 1);
                                        MDB_ID2 id2;
                                        if (!np)
                                                return ENOMEM;
@@ -9556,7 +9724,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
                        nsize = EVEN(nsize);
 
                        /* grab a page to hold a temporary copy */
-                       copy = mdb_page_malloc(mc->mc_txn, 1);
+                       copy = mdb_page_malloc(mc->mc_txn, 1, 1);
                        if (copy == NULL) {
                                rc = ENOMEM;
                                goto done;
@@ -10474,6 +10642,33 @@ mdb_env_set_assert(MDB_env *env, MDB_assert_func *func)
        return MDB_SUCCESS;
 }
 
+#if MDB_RPAGE_CACHE
+int ESECT
+mdb_env_set_encrypt(MDB_env *env, MDB_enc_func *func, const MDB_val *key)
+{
+       char *kdata, *ivdata;
+
+       if (!env || !func || !key)
+               return EINVAL;
+       if (env->me_flags & MDB_ENV_ACTIVE)
+               return EINVAL;
+       if (! (kdata = malloc(key[0].mv_size + key[1].mv_size)))
+               return ENOMEM;
+
+       ivdata = kdata + key[0].mv_size;
+       memcpy(kdata,  key[0].mv_data, key[0].mv_size);
+       memcpy(ivdata, key[1].mv_data, key[1].mv_size);
+       free(env->me_enckey[0].mv_data);
+       env->me_enckey[0].mv_data = kdata;
+       env->me_enckey[0].mv_size = key[0].mv_size;
+       env->me_enckey[1].mv_data = ivdata;
+       env->me_enckey[1].mv_size = key[1].mv_size;
+       env->me_encfunc = func;
+       env->me_flags |= MDB_REMAP_CHUNKS | MDB_ENCRYPT;
+       return MDB_SUCCESS;
+}
+#endif
+
 int ESECT
 mdb_env_get_path(MDB_env *env, const char **arg)
 {
index ac6bdd0ecbd77cc04105e4bd186a0ae0fddabe69..1a603da332e8cb82cd97210105dab537d6ef4cf4 100644 (file)
@@ -182,8 +182,10 @@ int mdb_mid2l_append( MDB_ID2L ids, MDB_ID2 *id );
 typedef struct MDB_ID3 {
        MDB_ID mid;             /**< The ID */
        void *mptr;             /**< The pointer */
+       void *menc;             /**< Decrypted pointer */
        unsigned int mcnt;              /**< Number of pages */
-       unsigned int mref;              /**< Refcounter */
+       unsigned short mref;    /**< Refcounter */
+       unsigned short muse;    /**< Bitmap of used pages */
 } MDB_ID3;
 
 typedef MDB_ID3 *MDB_ID3L;