From: Aram Sargsyan Date: Wed, 4 Mar 2026 16:25:33 +0000 (+0000) Subject: Fix a race condition in xfrin_recv_done() when calling xfrin_reset() X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=141ff7bfa7bf97b5d2b55a8417c847ac81ca5bc1;p=thirdparty%2Fbind9.git Fix a race condition in xfrin_recv_done() when calling xfrin_reset() When the xfrin_recv_done() function decides to retry the transfer using AXFR because of a previous error, it calls the xfrin_reset() function which calls dns_db_closeversion() on 'xfr->ver'. The problem is that the ixfr processing of a previous message could be still in process in a worker thread, which then can use freed 'xfr->ver'. If there is an ongoing worker thread delay the AXFR retry until after the worker thread has finished its work. --- diff --git a/lib/dns/xfrin.c b/lib/dns/xfrin.c index 20bd836ef43..eca48700a92 100644 --- a/lib/dns/xfrin.c +++ b/lib/dns/xfrin.c @@ -141,7 +141,7 @@ struct dns_xfrin { _Atomic xfrin_state_t state; uint32_t expireopt; - bool edns, expireoptset; + bool edns, expireoptset, retry_axfr; atomic_bool is_ixfr; /* @@ -270,6 +270,10 @@ xfrin_idledout(void *); static void xfrin_minratecheck(void *); static void +xfrin_reset(dns_xfrin_t *xfr); +static void +xfrin_ixfrcleanup(dns_xfrin_t *xfr); +static void xfrin_fail(dns_xfrin_t *xfr, isc_result_t result, const char *msg); static isc_result_t render(dns_message_t *msg, isc_mem_t *mctx, isc_buffer_t *buf); @@ -627,7 +631,9 @@ ixfr_apply_done(void *arg) { CHECK(result); /* Reschedule */ - if (!cds_wfcq_empty(&xfr->diff_head, &xfr->diff_tail)) { + if (!xfr->retry_axfr && + !cds_wfcq_empty(&xfr->diff_head, &xfr->diff_tail)) + { isc_work_enqueue(xfr->loop, ixfr_apply, ixfr_apply_done, work); return; } @@ -637,7 +643,18 @@ cleanup: isc_mem_put(xfr->mctx, work, sizeof(*work)); - if (result == ISC_R_SUCCESS) { + /* + * Don't retry with AXFR (even if it was requested) because there was + * an error or the transfer is shutting down. In case if it _was_ an + * error, xfrin_fail() will return a special result code which will + * still result in AXFR retry from the initiator of the transfer after + * the failure has been is logged. + */ + if (result != ISC_R_SUCCESS) { + xfr->retry_axfr = false; + } + + if (!xfr->retry_axfr && result == ISC_R_SUCCESS) { dns_db_closeversion(xfr->db, &xfr->ver, true); dns_zone_markdirty(xfr->zone); @@ -647,7 +664,21 @@ cleanup: } else { dns_db_closeversion(xfr->db, &xfr->ver, false); - xfrin_fail(xfr, result, "failed while processing responses"); + if (result != ISC_R_SUCCESS) { + xfrin_fail(xfr, result, + "failed while processing responses"); + } + } + + if (xfr->retry_axfr) { + xfr->reqtype = dns_rdatatype_soa; + atomic_store(&xfr->state, XFRST_SOAQUERY); + + xfrin_reset(xfr); + result = xfrin_start(xfr); + if (result != ISC_R_SUCCESS) { + xfrin_fail(xfr, result, "failed setting up socket"); + } } dns_xfrin_detach(&xfr); @@ -1175,13 +1206,18 @@ xfrin_cancelio(dns_xfrin_t *xfr) { static void xfrin_reset(dns_xfrin_t *xfr) { REQUIRE(VALID_XFRIN(xfr)); + REQUIRE(!xfr->diff_running); xfrin_log(xfr, ISC_LOG_INFO, "resetting"); + xfr->retry_axfr = false; + if (xfr->lasttsig != NULL) { isc_buffer_free(&xfr->lasttsig); } + xfrin_ixfrcleanup(xfr); + dns_diff_clear(&xfr->diff); xfr->ixfr.diffs = 0; @@ -1844,6 +1880,11 @@ xfrin_recv_done(isc_result_t result, isc_region_t *region, void *arg) { { xfr->edns = false; dns_message_detach(&msg); + /* + * With these states (see the conditions above) the diff + * process can't be currently in the running state, so + * it is safe to reset the 'xfr' and retry right away. + */ xfrin_reset(xfr); goto try_again; } else if (result == ISC_R_SUCCESS && @@ -1873,6 +1914,12 @@ xfrin_recv_done(isc_result_t result, isc_region_t *region, void *arg) { try_axfr: LIBDNS_XFRIN_RECV_TRY_AXFR(xfr, xfr->info, result); dns_message_detach(&msg); + /* If there is a running worker thread then delay the retry. */ + if (xfr->diff_running) { + xfr->retry_axfr = true; + dns_xfrin_detach(&xfr); + return; + } xfrin_reset(xfr); xfr->reqtype = dns_rdatatype_soa; atomic_store(&xfr->state, XFRST_SOAQUERY); @@ -2070,6 +2117,19 @@ cleanup: dns_xfrin_detach(&xfr); } +static void +xfrin_ixfrcleanup(dns_xfrin_t *xfr) { + struct cds_wfcq_node *node, *next; + __cds_wfcq_for_each_blocking_safe(&xfr->diff_head, &xfr->diff_tail, + node, next) { + ixfr_apply_data_t *data = + caa_container_of(node, ixfr_apply_data_t, wfcq_node); + /* We need to clear and free all data chunks */ + dns_diff_clear(&data->diff); + isc_mem_put(xfr->mctx, data, sizeof(*data)); + } +} + static void xfrin_destroy(dns_xfrin_t *xfr) { uint64_t msecs, persec; @@ -2120,15 +2180,7 @@ xfrin_destroy(dns_xfrin_t *xfr) { sep, expireopt); /* Cleanup unprocessed IXFR data */ - struct cds_wfcq_node *node, *next; - __cds_wfcq_for_each_blocking_safe(&xfr->diff_head, &xfr->diff_tail, - node, next) { - ixfr_apply_data_t *data = - caa_container_of(node, ixfr_apply_data_t, wfcq_node); - /* We need to clear and free all data chunks */ - dns_diff_clear(&data->diff); - isc_mem_put(xfr->mctx, data, sizeof(*data)); - } + xfrin_ixfrcleanup(xfr); /* Cleanup unprocessed AXFR data */ dns_diff_clear(&xfr->diff);