From: Vladimír Čunát Date: Mon, 28 Aug 2023 09:10:53 +0000 (+0200) Subject: MDB_DUPSORT for rules X-Git-Tag: v6.0.4~1^2~6 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c72e6f0dca6f11c24a3afd907b9161ee426aa71c;p=thirdparty%2Fknot-resolver.git MDB_DUPSORT for rules This doesn't yet search among the multiple values. The rules/api parts that write rules were adapted or commented on. --- diff --git a/lib/cache/cdb_lmdb.c b/lib/cache/cdb_lmdb.c index a9c29f9a6..96cc2cd0e 100644 --- a/lib/cache/cdb_lmdb.c +++ b/lib/cache/cdb_lmdb.c @@ -386,11 +386,7 @@ static int cdb_open_env(struct lmdb_env *env, const char *path, const size_t map ret = mdb_txn_begin(env->env, NULL, 0, &txn); if (ret != MDB_SUCCESS) goto error_mdb; - - //FIXME: perhaps we want MDB_DUPSORT in future, - // but for that we'd have to avoid MDB_RESERVE. - // (including a proper assertion, instead of sometimes-crash inside lmdb) - const unsigned dbi_flags = 0; //is_cache ? 0 : MDB_DUPSORT; + const unsigned dbi_flags = env->is_cache ? 0 : MDB_DUPSORT; ret = mdb_dbi_open(txn, NULL, dbi_flags, &env->dbi); if (ret != MDB_SUCCESS) { mdb_txn_abort(txn); @@ -675,17 +671,34 @@ static int cdb_readv(kr_cdb_pt db, struct kr_cdb_stats *stats, } static int cdb_write(struct lmdb_env *env, MDB_txn **txn, const knot_db_val_t *key, - knot_db_val_t *val, unsigned flags, - struct kr_cdb_stats *stats) + knot_db_val_t *val, struct kr_cdb_stats *stats) { /* Convert key structs and write */ MDB_val _key = val_knot2mdb(*key); MDB_val _val = val_knot2mdb(*val); stats->write++; - int ret = mdb_put(*txn, env->dbi, &_key, &_val, flags); + /* This is LMDB specific optimisation, + * if caller specifies value with NULL data and non-zero length, + * LMDB will preallocate the entry for caller and leave write + * transaction open, caller is responsible for syncing thus committing transaction. + */ + unsigned mdb_flags = 0; + if (val->len > 0 && val->data == NULL) { + if (kr_fails_assert(env->is_cache)) + return kr_error(EINVAL); // incompatible with MDB_DUPSORT + mdb_flags |= MDB_RESERVE; + } + /* If the same key-value pair was there, we do not add yet another copy + * and consider it kr_ok() and ignore MDB_KEYEXIST. */ + if (!env->is_cache) + mdb_flags |= MDB_NODUPDATA; + + int ret = mdb_put(*txn, env->dbi, &_key, &_val, mdb_flags); + + const bool is_dup = !env->is_cache && ret == MDB_KEYEXIST; /* We don't try to recover from MDB_TXN_FULL. */ - if (ret != MDB_SUCCESS) { + if (ret != MDB_SUCCESS && !is_dup) { txn_abort(env); return lmdb_error(env, ret); } @@ -703,18 +716,8 @@ static int cdb_writev(kr_cdb_pt db, struct kr_cdb_stats *stats, MDB_txn *txn = NULL; int ret = txn_get(env, &txn, false); - for (int i = 0; ret == kr_ok() && i < maxcount; ++i) { - /* This is LMDB specific optimisation, - * if caller specifies value with NULL data and non-zero length, - * LMDB will preallocate the entry for caller and leave write - * transaction open, caller is responsible for syncing thus committing transaction. - */ - unsigned mdb_flags = 0; - if (val[i].len > 0 && val[i].data == NULL) { - mdb_flags |= MDB_RESERVE; - } - ret = cdb_write(env, &txn, &key[i], &val[i], mdb_flags, stats); - } + for (int i = 0; ret == kr_ok() && i < maxcount; ++i) + ret = cdb_write(env, &txn, &key[i], &val[i], stats); return ret; } @@ -729,9 +732,8 @@ static int cdb_remove(kr_cdb_pt db, struct kr_cdb_stats *stats, for (int i = 0; ret == kr_ok() && i < maxcount; ++i) { MDB_val _key = val_knot2mdb(keys[i]); - MDB_val val = { 0, NULL }; stats->remove++; - ret = lmdb_error(env, mdb_del(txn, env->dbi, &_key, &val)); + ret = lmdb_error(env, mdb_del(txn, env->dbi, &_key, NULL)); if (ret == kr_ok()) deleted++; else if (ret == KNOT_ENOENT) { @@ -860,7 +862,8 @@ static int cdb_read_less(kr_cdb_pt db, struct kr_cdb_stats *stats, MDB_val key2_m = val_knot2mdb(*key); MDB_val val2_m = { 0, NULL }; stats->read_less++; - ret = mdb_cursor_get(curs, &key2_m, &val2_m, MDB_PREV); + // It could keep on the same `key` when MDB_PREV was used. + ret = mdb_cursor_get(curs, &key2_m, &val2_m, MDB_PREV_NODUP); if (!ret) { /* finalize the output */ *key = val_mdb2knot(key2_m); diff --git a/lib/rules/api.c b/lib/rules/api.c index 84339e6f4..238eeda78 100644 --- a/lib/rules/api.c +++ b/lib/rules/api.c @@ -64,7 +64,7 @@ static int tag_names_default(void) kr_rule_tags_t empty = 0; val.data = ∅ val.len = sizeof(empty); - return ruledb_op(write, &key, &val, 1); + return ruledb_op(write, &key, &val, 1); // we got ENOENT, so simple write is OK } int kr_rule_tag_add(const char *tag, kr_rule_tags_t *tagset) @@ -117,17 +117,19 @@ int kr_rule_tag_add(const char *tag, kr_rule_tags_t *tagset) const kr_rule_tags_t tag_new = 1 << ix; kr_require((tag_new & bmp) == 0); - // Update the mappings + // Update the bitmap. ATM ruledb does not overwrite, so we `remove` before `write`. bmp |= tag_new; val.data = &bmp; val.len = sizeof(bmp); + ret = ruledb_op(remove, &key_tb, 1); kr_assert(ret == 1); ret = ruledb_op(write, &key_tb, &val, 1); if (ret != 0) return kr_error(ret); + // Record this tag's mapping. uint8_t ix_8t = ix; val.data = &ix_8t; val.len = sizeof(ix_8t); - ret = ruledb_op(write, &key, &val, 1); // key remained correct + ret = ruledb_op(write, &key, &val, 1); // key remained correct since ENOENT if (ret != 0) return kr_error(ret); *tagset |= tag_new; @@ -180,6 +182,7 @@ int kr_rules_init(const char *path, size_t maxsize) uint8_t key_rs[] = "\0rulesets"; knot_db_val_t key = { .data = key_rs, .len = sizeof(key_rs) }; knot_db_val_t rulesets = { .data = &RULESET_DEFAULT, .len = strlen(RULESET_DEFAULT) + 1 }; + ret = ruledb_op(remove, &key, 1); kr_assert(ret == 0 || ret == 1); ret = ruledb_op(write, &key, &rulesets, 1); if (ret == 0) return kr_ok(); failure: @@ -562,7 +565,7 @@ int local_data_ins(knot_db_val_t key, const knot_rrset_t *rrs, rdataset_dematerialize(sig_rds, data); knot_db_val_t val = { .data = buf, .len = val_len }; - int ret = ruledb_op(write, &key, &val, 1); + int ret = ruledb_op(write, &key, &val, 1); // TODO: overwriting on ==tags? // ENOSPC seems to be the only expectable error. kr_assert(ret == 0 || ret == kr_error(ENOSPC)); return ret; @@ -583,6 +586,7 @@ int kr_rule_local_data_merge(const knot_rrset_t *rrs, const kr_rule_tags_t tags) knot_db_val_t val; // Transaction: we assume that we're in a RW transaction already, // so that here we already "have a lock" on the last version. + // FIXME: iterate over multiple tags, once iterator supports RW TXN int ret = ruledb_op(read, &key, &val, 1); if (abs(ret) == abs(ENOENT)) goto fallback; @@ -806,7 +810,7 @@ int kr_rule_local_subtree(const knot_dname_t *apex, enum kr_rule_sub_t type, kr_require(data == buf + val_len); knot_db_val_t val = { .data = buf, .len = val_len }; - int ret = ruledb_op(write, &key, &val, 1); + int ret = ruledb_op(write, &key, &val, 1); // TODO: overwriting on ==tags? // ENOSPC seems to be the only expectable error. kr_assert(ret == 0 || ret == kr_error(ENOSPC)); return ret; @@ -898,11 +902,12 @@ int kr_view_insert_action(const char *subnet, const char *action) memcpy(key.data, RULESET_DEFAULT, rsp_len); } - // Insert & commit. + // Insert. We overwrite, as subnet is the only condition so far and that's in key. knot_db_val_t val = { .data = (void *)/*const-cast*/action, .len = strlen(action), }; + int ret = ruledb_op(remove, &key, 1); kr_assert(ret == 0 || ret == 1); return ruledb_op(write, &key, &val, 1); } diff --git a/lib/rules/api.h b/lib/rules/api.h index 7998560f3..52715c5cf 100644 --- a/lib/rules/api.h +++ b/lib/rules/api.h @@ -68,10 +68,11 @@ const uint32_t KR_RULE_TTL_DEFAULT; /* APIs to modify the rule DB. * * FIXME: + * - overwriting semantics; often even the API docs is wrong here ATM * - a way to read/modify a rule? */ -/** Insert/overwrite a local data rule. +/** Add a local data rule. * * Into the default rule-set ATM. * Special NODATA case: use a CNAME type with zero records (TTL matters). */ @@ -80,7 +81,8 @@ int kr_rule_local_data_ins(const knot_rrset_t *rrs, const knot_rdataset_t *sig_r kr_rule_tags_t tags); /** Merge RRs into a local data rule. * - * - If tags don't match, overwrite the data and return kr_error(EEXIST). + * - FIXME: with multiple tags variants for the same name-type pair, + * you typically end up with a single RR per RRset * - RRSIGs get dropped, if any were attached. * - We assume that this is called with a RW transaction open already, * which is always true in normal usage (long RW txn covering whole config). diff --git a/lib/rules/forward.c b/lib/rules/forward.c index f010603f8..12ad14d52 100644 --- a/lib/rules/forward.c +++ b/lib/rules/forward.c @@ -162,8 +162,12 @@ int kr_rule_forward(const knot_dname_t *apex, kr_rule_fwd_flags_t flags, } kr_require(data == buf + val_len); + // We don't allow combining forwarding rule with anything else + // on the same apex, including another forwarding rule (at least not yet). + int ret = ruledb_op(remove, &key, 1); + kr_assert(ret == 0 || ret == 1); knot_db_val_t val = { .data = buf, .len = val_len }; - int ret = ruledb_op(write, &key, &val, 1); + ret = ruledb_op(write, &key, &val, 1); // ENOSPC seems to be the only expectable error. kr_assert(ret == 0 || ret == kr_error(ENOSPC)); return ret;