From: Hallvard Furuseth Date: Tue, 25 Jul 2017 19:27:36 +0000 (+0200) Subject: Drop P_DIRTY, and MDB_WRITEMAP dirty/spill lists X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=adfa8f758adce1233e8a87bb52a0e41a7ab02c8b;p=thirdparty%2Fopenldap.git Drop P_DIRTY, and MDB_WRITEMAP dirty/spill lists mt_workid = mt_txnid when WRITEMAP, so dirty pages == "spilled" pages and mdb_page_flush() does nothing. --- diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 1d051e3ba0..4dd4f6e7a2 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -975,7 +975,10 @@ typedef struct MDB_page_header { * Thus an #MDB_txn can write to pages with mp_txnid >= txn.mt_workid. * A page with smaller mp_txnid is dirty in an ancestor txn or clean. * - * txn.mt_workid > txn.mt_txnid, to tell apart spilled and dirty pages. + * Non-#MDB_WRITEMAP sets txn.mt_workid > txn.mt_txnid, to tell apart + * spilled and dirty pages. WRITEMAP sets mt_workid = mt_txnid, since + * it does not copy/spill pages. Thus (page.mp_txnid == txn.mt_txnid) + * says "spilled page" without WRITEMAP, "dirty page" with WRITEMAP. * * Finally, ((dirty page).mp_txnid & #MDB_PGTXNID_FLAGMASK) can be used * for flags with non-WRITEMAP; it keeps low bits in workid = 0. @@ -992,14 +995,13 @@ typedef struct MDB_page_header { #define P_LEAF 0x02 /**< leaf page */ #define P_OVERFLOW 0x04 /**< overflow page */ #define P_META 0x08 /**< meta page */ -#define P_DIRTY 0x10 /**< dirty page, also set for #P_SUBP pages */ #define P_LEAF2 0x20 /**< for #MDB_DUPFIXED records */ #define P_SUBP 0x40 /**< for #MDB_DUPSORT sub-pages */ #define P_DIRTY_OVF 0x2000 /**< page has dirty overflow nodes */ #define P_LOOSE 0x4000 /**< page was dirtied then freed, can be reused */ #define P_KEEP 0x8000 /**< leave this page alone during spill */ /** Persistent flags for page administration rather than page contents */ -#define P_ADM_FLAGS (P_DIRTY) +#define P_ADM_FLAGS 0 /* later... */ /** @} */ uint16_t mh_flags; /**< @ref mdb_page */ #define mp_lower mp_pb.pb.pb_lower @@ -1058,6 +1060,8 @@ typedef struct MDB_page { /** Test if a page is a sub page */ #define IS_SUBP(p) F_ISSET((p)->mp_flags, P_SUBP) + /** Test if (this non-sub page is dirty && env is non-#MDB_WRITEMAP) */ +#define IS_DIRTY_NW(txn, p) ((p)->mp_txnid > (txn)->mt_txnid) /** Test if this non-sub page belongs to the current snapshot */ #define IS_MUTABLE(txn, p) ((p)->mp_txnid >= (txn)->mt_txnid) /** Test if this non-sub page is writable in this txn (not an ancestor) */ @@ -1331,7 +1335,7 @@ struct MDB_txn { /** Written to mp_txnid of dirty pages, to be fixed by #mdb_page_flush(). * - * Value >= 1 + (parent ? parent.last_workid : txnid). + * Value >= WRITEMAP ? txnid : 1 + (parent ? parent.last_workid : txnid). * See #MDB_page.%mp_txnid. * * An MDB_txn can write to a page when page.mp_txnid >= txn.mt_workid. @@ -1358,10 +1362,11 @@ struct MDB_txn { /** The sorted list of dirty pages we temporarily wrote to disk * because the dirty list was full. page numbers in here are * shifted left by 1, deleted slots have the LSB set. + * Unused with #MDB_WRITEMAP, which does not use a dirty list. */ MDB_IDL mt_spill_pgs; union { - /** For write txns: Modified pages. Sorted when not MDB_WRITEMAP. */ + /** For write txns: Modified pages, sorted. Unused when MDB_WRITEMAP. */ MDB_ID2L dirty_list; /** For read txns: This thread/txn's reader table slot, or NULL. */ MDB_reader *reader; @@ -1434,6 +1439,7 @@ struct MDB_txn { * Includes ancestor txns' dirty pages not hidden by other txns' * dirty/spilled pages. Thus commit(nested txn) has room to merge * dirty_list into mt_parent after freeing hidden mt_parent pages. + * When #MDB_WRITEMAP, it is nonzero but otherwise irrelevant. */ unsigned int mt_dirty_room; }; @@ -1586,7 +1592,9 @@ struct MDB_env { MDB_page *me_dpages; /**< list of malloc'd blocks for re-use */ /** IDL of pages that became unused in a write txn */ MDB_IDL me_free_pgs; - /** ID2L of pages written during a write txn. Length MDB_IDL_UM_SIZE. */ + /** ID2L of pages written during a write txn. Length MDB_IDL_UM_SIZE. + * Unused except for a dummy element when #MDB_WRITEMAP. + */ MDB_ID2L me_dirty_list; /** Max number of freelist items that can fit in a single overflow page */ int me_maxfree_1pg; @@ -1919,7 +1927,7 @@ void mdb_page_list(MDB_page *mp) { pgno_t pgno = mdb_dbg_pgno(mp); - const char *type, *state = (mp->mp_flags & P_DIRTY) ? ", dirty" : ""; + const char *type; MDB_node *node; unsigned int i, nkeys, nsize, total = 0; MDB_val key; @@ -1932,8 +1940,7 @@ mdb_page_list(MDB_page *mp) case P_LEAF|P_LEAF2: type = "LEAF2 page"; break; case P_LEAF|P_LEAF2|P_SUBP: type = "LEAF2 sub-page"; break; case P_OVERFLOW: - fprintf(stderr, "Overflow page %"Yu" pages %u%s\n", - pgno, mp->mp_pages, state); + fprintf(stderr, "Overflow page %"Yu" pages %u\n", pgno, mp->mp_pages); return; case P_META: fprintf(stderr, "Meta-page %"Yu" txnid %"Yu"\n", @@ -1945,7 +1952,7 @@ mdb_page_list(MDB_page *mp) } nkeys = NUMKEYS(mp); - fprintf(stderr, "%s %"Yu" numkeys %d%s\n", type, pgno, nkeys, state); + fprintf(stderr, "%s %"Yu" numkeys %d\n", type, pgno, nkeys); for (i=0; imt_rpages; unsigned x, rem; - if (mp->mp_flags & (P_SUBP|P_DIRTY)) + if (IS_SUBP(mp) || IS_DIRTY_NW(txn, mp)) return; rem = mp->mp_pgno & (MDB_RPAGE_CHUNK-1); pgno = mp->mp_pgno ^ rem; @@ -2264,14 +2271,14 @@ mdb_page_loose(MDB_cursor *mc, MDB_page *mp) /** Set or clear P_KEEP in dirty, non-overflow, non-sub pages watched by txn. * @param[in] mc A cursor handle for the current operation. * @param[in] pflags Flags of the pages to update: - * P_DIRTY to set P_KEEP, P_DIRTY|P_KEEP to clear it. + * 0 to set P_KEEP, P_KEEP to clear it. * @param[in] all No shortcuts. Needed except after a full #mdb_page_flush(). * @return 0 on success, non-zero on failure. */ static int mdb_pages_xkeep(MDB_cursor *mc, unsigned pflags, int all) { - enum { Mask = P_SUBP|P_DIRTY|P_LOOSE|P_KEEP }; + enum { Mask = P_SUBP|P_LOOSE|P_KEEP }; MDB_txn *txn = mc->mc_txn; MDB_cursor *m3, *m0 = mc; MDB_xcursor *mx; @@ -2288,6 +2295,7 @@ mdb_pages_xkeep(MDB_cursor *mc, unsigned pflags, int all) for (j=0; jmc_snum; j++) { mp = m3->mc_pg[j]; if ((mp->mp_flags & Mask) == pflags) + if (IS_DIRTY_NW(txn, mp)) mp->mp_flags ^= P_KEEP; } mx = m3->mc_xcursor; @@ -2343,7 +2351,7 @@ static int mdb_page_flush(MDB_txn *txn, int keep); * * Otherwise, if not using nested txns, it is expected that apps will * not run into #MDB_TXN_FULL any more. The pages are flushed to disk - * the same way as for a txn commit, e.g. their P_DIRTY flag is cleared. + * the same way as for a txn commit. * If the txn never references them again, they can be left alone. * If the txn only reads them, they can be used without any fuss. * If the txn writes them again, they can be dirtied immediately without @@ -2372,7 +2380,7 @@ mdb_page_spill(MDB_cursor *m0, MDB_val *key, MDB_val *data) unsigned int i, j, need; int rc; - if (m0->mc_flags & C_SUB) + if (m0->mc_flags & (C_SUB|C_WRITEMAP)) return MDB_SUCCESS; /* Estimate how much space this op will take */ @@ -2406,7 +2414,7 @@ mdb_page_spill(MDB_cursor *m0, MDB_val *key, MDB_val *data) } /* Preserve pages which may soon be dirtied again */ - if ((rc = mdb_pages_xkeep(m0, P_DIRTY, 1)) != MDB_SUCCESS) + if ((rc = mdb_pages_xkeep(m0, 0, 1)) != MDB_SUCCESS) goto done; /* Less aggressive spill - we originally spilled the entire dirty list, @@ -2454,7 +2462,7 @@ mdb_page_spill(MDB_cursor *m0, MDB_val *key, MDB_val *data) goto done; /* Reset any dirty pages we kept that page_flush didn't see */ - rc = mdb_pages_xkeep(m0, P_DIRTY|P_KEEP, i); + rc = mdb_pages_xkeep(m0, P_KEEP, i); done: txn->mt_flags |= rc ? MDB_TXN_ERROR : MDB_TXN_SPILLS; @@ -2480,23 +2488,20 @@ mdb_find_oldest(MDB_txn *txn) return oldest; } -/** Mark a page as dirty and add it to the txn's dirty list */ +/** Add a page to the txn's dirty list, if there is one */ static void mdb_page_dirty(MDB_txn *txn, MDB_page *mp) { MDB_ID2 mid; - int rc, (*insert)(MDB_ID2L, MDB_ID2 *); - - mp->mp_flags |= P_DIRTY; + int rc; if (txn->mt_flags & MDB_TXN_WRITEMAP) { - insert = mdb_mid2l_append; - } else { - insert = mdb_mid2l_insert; + txn->mt_flags |= MDB_TXN_DIRTY; + return; } mid.mid = mp->mp_pgno; mid.mptr = mp; - rc = insert(txn->mt_u.dirty_list, &mid); + rc = mdb_mid2l_insert(txn->mt_u.dirty_list, &mid); mdb_tassert(txn, rc == 0); txn->mt_dirty_room--; } @@ -2806,9 +2811,7 @@ mdb_page_unspill(MDB_txn *txn, MDB_page *mp, MDB_page **ret) num = mp->mp_pages; else num = 1; - if (env->me_flags & MDB_WRITEMAP) { - np = mp; - } else { + { np = mdb_page_malloc(txn, num, 1); if (!np) return ENOMEM; @@ -2873,7 +2876,7 @@ mdb_page_touch(MDB_cursor *mc) } else { mc->mc_db->md_root = pgno; } - } else if (!F_ISSET(mp->mp_flags, P_DIRTY)) { + } else if (!IS_DIRTY_NW(txn, mp)) { rc = mdb_page_unspill(txn, mp, &np); if (rc) goto fail; @@ -2896,7 +2899,6 @@ mdb_page_touch(MDB_cursor *mc) mid.mptr = np; rc = mdb_mid2l_insert(dl, &mid); mdb_cassert(mc, rc == 0); - np->mp_flags |= P_DIRTY; } np_flags = np->mp_flags; /* P_ADM_FLAGS */ @@ -3201,9 +3203,14 @@ mdb_txn_renew0(MDB_txn *txn) txn->mt_child = NULL; txn->mt_loose_pgs = NULL; txn->mt_loose_count = 0; - txn->mt_workid = (txn->mt_txnid | MDB_PGTXNID_FLAGMASK) + 1; + if (env->me_flags & MDB_WRITEMAP) { + txn->mt_workid = txn->mt_txnid; + txn->mt_dirty_room = 1; + } else { + txn->mt_workid = (txn->mt_txnid | MDB_PGTXNID_FLAGMASK) + 1; + txn->mt_dirty_room = MDB_IDL_UM_MAX; + } txn->mt_last_workid = txn->mt_workid; - txn->mt_dirty_room = MDB_IDL_UM_MAX; txn->mt_u.dirty_list = env->me_dirty_list; txn->mt_u.dirty_list[0].mid = 0; txn->mt_free_pgs = env->me_free_pgs; @@ -3845,18 +3852,6 @@ mdb_page_flush(MDB_txn *txn, int keep) j = i = keep; if (env->me_flags & MDB_WRITEMAP) { - /* Mark the pages as clean */ - while (++i <= pagecount) { - dp = dl[i].mptr; - /* Don't flush this page yet */ - if (dp->mp_flags & (P_LOOSE|P_KEEP)) { - dp->mp_flags &= ~P_KEEP; - dl[++j] = dl[i]; - continue; - } - dp->mp_txnid = txn->mt_txnid; - dp->mp_flags &= ~P_DIRTY; - } goto done; } @@ -3873,7 +3868,6 @@ mdb_page_flush(MDB_txn *txn, int keep) pgno = dl[i].mid; /* Mark the page as clean */ dp->mp_txnid = txn->mt_txnid; - dp->mp_flags &= ~P_DIRTY; pos = pgno * psize; size = psize; #if MDB_RPAGE_CACHE @@ -4023,11 +4017,11 @@ retry_seek: } mdb_dpage_free(env, dp); } - -done: i--; txn->mt_dirty_room += i - j; dl[0].mid = j; + +done: return MDB_SUCCESS; } @@ -5765,8 +5759,10 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode /* silently ignore WRITEMAP when we're only getting read access */ flags &= ~MDB_WRITEMAP; } else { + /* WRITEMAP has a dummy element to match dirty_room = 1 */ + size_t dl_size = (flags & MDB_WRITEMAP) ? 2 : MDB_IDL_UM_SIZE; if (!((env->me_free_pgs = mdb_midl_alloc(MDB_IDL_UM_MAX)) && - (env->me_dirty_list = calloc(MDB_IDL_UM_SIZE, sizeof(MDB_ID2))))) + (env->me_dirty_list = calloc(dl_size, sizeof(MDB_ID2))))) rc = ENOMEM; } @@ -6956,18 +6952,19 @@ mdb_ovpage_free(MDB_cursor *mc, MDB_page *mp) rc = mdb_midl_need(&env->me_pghead, ovpages); if (rc) return rc; - if (!(mp->mp_flags & P_DIRTY)) { - MDB_IDL sl = txn->mt_spill_pgs; - if (sl) - x = mdb_midl_search(sl, pn); - if (! (sl && x <= sl[0] && sl[x] == pn)) + if (!IS_DIRTY_NW(txn, mp)) { /* spilled or WRITEMAP */ + MDB_IDL sl = txn->mt_spill_pgs; + if (sl) { + x = mdb_midl_search(sl, pn); + if (! (x <= sl[0] && sl[x] == pn)) return MDB_PROBLEM; /* This page is no longer spilled */ if (x == sl[0]) sl[0]--; else sl[x] |= 1; - goto release; + } + goto release; } /* Remove from dirty list */ dl = txn->mt_u.dirty_list; @@ -8037,7 +8034,7 @@ more: dkey.mv_data = memcpy(fp+1, olddata.mv_data, olddata.mv_size); /* Make sub-page header for the dup items, with dummy body */ - fp->mp_flags = P_LEAF|P_DIRTY|P_SUBP; + fp->mp_flags = P_LEAF|P_SUBP; fp->mp_lower = (PAGEHDRSZ-PAGEBASE); xdata.mv_size = PAGEHDRSZ + dkey.mv_size + data->mv_size; if (mc->mc_db->md_flags & MDB_DUPFIXED) { @@ -8153,7 +8150,7 @@ current: * is smaller than the overflow threshold. */ if (!IS_WRITABLE(mc->mc_txn, omp)) { - if (!(omp->mp_flags & P_DIRTY)) { + if (!IS_DIRTY_NW(mc->mc_txn, omp)) { rc = mdb_page_unspill(mc->mc_txn, omp, &omp); if (rc) return rc;