From: Lukáš Ondráček <lukas.ondracek@nic.cz>
Date: Tue, 6 May 2025 12:10:04 +0000 (+0200)
Subject: daemon/session2: use ref_count only for deferred iter_ctx (fixes #927)
X-Git-Tag: v6.0.13~8^2~3
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=216f37a89e2d1135fa0397bf5874eb0bf2569397;p=thirdparty%2Fknot-resolver.git

daemon/session2: use ref_count only for deferred iter_ctx (fixes #927)

Partially reverts bb1babf0, where the memory-leak bug was introduced.
---

diff --git a/NEWS b/NEWS
index f8671710a..ef42b0c1e 100644
--- a/NEWS
+++ b/NEWS
@@ -5,6 +5,7 @@ Bugfixes
 --------
 - fix `dnssec: false` (!1687)
 - fix undefined disable_defer (!1685)
+- daemon: fix a memory leak present since v6.0.9 (#927)
 
 
 Knot Resolver 6.0.12 (2025-04-24)
diff --git a/daemon/defer.c b/daemon/defer.c
index aeaded206..22a92238b 100644
--- a/daemon/defer.c
+++ b/daemon/defer.c
@@ -387,9 +387,7 @@ static inline struct protolayer_iter_ctx *pop_query(void)
 static inline void break_query(struct protolayer_iter_ctx *ctx, int err)
 {
 	if (ctx->session->stream) {
-		struct session2 *s = ctx->session;
 		struct pl_defer_sess_data *sdata = protolayer_sess_data_get_current(ctx);
-		s->ref_count++; // keep session and sdata alive for a while
 		waiting_requests_size -= sdata->size;
 		if (!ctx->session->closing) {
 			session2_force_close(ctx->session);
@@ -397,7 +395,7 @@ static inline void break_query(struct protolayer_iter_ctx *ctx, int err)
 		kr_assert(ctx == queue_head(sdata->queue));
 		while (true) {
 			queue_pop(sdata->queue);
-			if (ctx) {
+			if (ctx) {  // NULL can be queued to signal EOF
 				struct pl_defer_iter_data *idata = protolayer_iter_data_get_current(ctx);
 				waiting_requests_size -= idata->size;
 				protolayer_break(ctx, kr_error(err));
@@ -405,12 +403,12 @@ static inline void break_query(struct protolayer_iter_ctx *ctx, int err)
 			if (queue_len(sdata->queue) == 0) break;
 			ctx = queue_head(sdata->queue);
 		}
-		session2_unhandle(s); // decrease ref_count
 	} else {
 		struct pl_defer_iter_data *idata = protolayer_iter_data_get_current(ctx);
 		waiting_requests_size -= idata->size;
 		protolayer_break(ctx, kr_error(err));
 	}
+	session2_dec_refs(ctx->session);  // stream/datagram no more deferred
 	kr_assert(waiting_requests ? waiting_requests_size > 0 : waiting_requests_size == 0);
 }
 
@@ -478,6 +476,7 @@ static inline void process_single_deferred(void)
 		if (queue_len(sdata->queue) > 0) {
 			VERBOSE_LOG("    PUSH follow-up to head of %d\n", priority);
 			push_query(queue_head(sdata->queue), priority, true);
+			session2_inc_refs(session);  // still deferred
 		} else {
 			waiting_requests_size -= sdata->size;
 		}
@@ -486,20 +485,14 @@ static inline void process_single_deferred(void)
 	waiting_requests_size -= idata->size;
 	kr_assert(waiting_requests ? waiting_requests_size > 0 : waiting_requests_size == 0);
 
-	if (eof) {
-		// Keep session alive even if it is somehow force-closed during continuation.
-		// TODO Is it possible?
-		session->ref_count++;
-	}
-
 	VERBOSE_LOG("    CONTINUE\n");
 	protolayer_continue(ctx);
 
 	if (eof) {
 		VERBOSE_LOG("    CONTINUE EOF event\n");
 		session2_event_after(session, PROTOLAYER_TYPE_DEFER, PROTOLAYER_EVENT_EOF, NULL);
-		session2_unhandle(session); // decrease ref_count
 	}
+	session2_dec_refs(session); // no more deferred or incremented above
 }
 
 /// Process as many deferred requests as needed to get memory consumption under limit.
@@ -582,6 +575,7 @@ static enum protolayer_iter_cb_result pl_defer_unwrap(
 	push_query(ctx, priority, false);
 	waiting_requests_size += idata->size = protolayer_iter_size_est(ctx, !ctx->session->stream);
 		// for stream, payload is counted in session wire buffer
+	session2_inc_refs(ctx->session);  // keep session alive while deferred (1 per stream/datagram)
 
 	process_deferred_over_size_limit();
 	return protolayer_async();
diff --git a/daemon/io.c b/daemon/io.c
index 8093a4b0d..3f0f1065e 100644
--- a/daemon/io.c
+++ b/daemon/io.c
@@ -955,13 +955,13 @@ static void io_deinit(uv_handle_t *handle)
 		return;
 	}
 	if (handle->type != UV_POLL) {
-		session2_unhandle(handle->data);
+		session2_dec_refs(handle->data);
 	} else {
 	#if ENABLE_XDP
 		xdp_handle_data_t *xhd = handle->data;
 		uv_idle_stop(&xhd->tx_waker);
 		uv_close((uv_handle_t *)&xhd->tx_waker, NULL);
-		session2_unhandle(xhd->session);
+		session2_dec_refs(xhd->session);
 		knot_xdp_deinit(xhd->socket);
 		queue_deinit(xhd->tx_waker_queue);
 		free(xhd);
diff --git a/daemon/session2.c b/daemon/session2.c
index 2a1fe6881..fc7822e8c 100644
--- a/daemon/session2.c
+++ b/daemon/session2.c
@@ -449,7 +449,6 @@ static int protolayer_iter_ctx_finish(struct protolayer_iter_ctx *ctx, int ret)
 
 	mm_ctx_delete(&ctx->pool);
 	free(ctx);
-	session2_unhandle(s);
 
 	return ret;
 }
@@ -616,8 +615,6 @@ static int session2_submit(
 {
 	if (session->closing)
 		return kr_error(ECANCELED);
-	if (session->ref_count >= INT_MAX - 1)
-		return kr_error(ETOOMANYREFS);
 	if (kr_fails_assert(session->proto < KR_PROTO_COUNT))
 		return kr_error(EFAULT);
 
@@ -651,7 +648,6 @@ static int session2_submit(
 		.finished_cb = cb,
 		.finished_cb_baton = baton
 	};
-	session->ref_count++;
 	if (had_comm_param) {
 		struct comm_addr_storage *addrst = &ctx->comm_addr_storage;
 		if (comm->src_addr) {
@@ -877,7 +873,7 @@ struct session2 *session2_new(enum session2_transport_type transport_type,
 	ret = uv_timer_init(uv_default_loop(), &s->timer);
 	kr_require(!ret);
 	s->timer.data = s;
-	s->ref_count++; /* Session owns the timer */
+	session2_inc_refs(s); /* Session owns the timer */
 
 	/* Initialize the layer's session data */
 	for (size_t i = 0; i < grp->num_layers; i++) {
@@ -917,7 +913,13 @@ static void session2_free(struct session2 *s)
 	free(s);
 }
 
-void session2_unhandle(struct session2 *s)
+void session2_inc_refs(struct session2 *s)
+{
+	kr_assert(s->ref_count < INT_MAX);
+	s->ref_count++;
+}
+
+void session2_dec_refs(struct session2 *s)
 {
 	if (kr_fails_assert(s->ref_count > 0)) {
 		session2_free(s);
@@ -1709,7 +1711,7 @@ static void on_session2_handle_close(uv_handle_t *handle)
 
 static void on_session2_timer_close(uv_handle_t *handle)
 {
-	session2_unhandle(handle->data);
+	session2_dec_refs(handle->data);
 }
 
 static int session2_handle_close(struct session2 *s)
@@ -1724,8 +1726,8 @@ static int session2_handle_close(struct session2 *s)
 		 * been ended. We do not `uv_close` the handles, we just free
 		 * up the memory. */
 
-		session2_unhandle(s); /* For timer handle */
-		io_free(handle); /* This will unhandle the transport handle */
+		session2_dec_refs(s); /* For timer handle */
+		io_free(handle); /* This will decrement refs for the transport handle */
 		return kr_ok();
 	}
 
diff --git a/daemon/session2.h b/daemon/session2.h
index f0f1868b7..897c31b92 100644
--- a/daemon/session2.h
+++ b/daemon/session2.h
@@ -843,7 +843,7 @@ struct session2 {
 	uint32_t log_id; /**< Session ID for logging. */
 
 	int ref_count; /**< Number of unclosed libUV handles owned by this
-	               * session + iteration contexts referencing the session. */
+	               * session + 1 per deferred datagram/stream. */
 
 	/** Communication information. Typically written into by one of the
 	 * first layers facilitating transport protocol processing.
@@ -934,6 +934,11 @@ struct session2 *session2_new(enum session2_transport_type transport_type,
                               size_t layer_param_count,
                               bool outgoing);
 
+/** Used for counting references from unclosed libUV handles owned by the session and from defer.
+ * Once all owned handles are closed and nothing is deferred, the session is freed. */
+void session2_inc_refs(struct session2 *s);
+void session2_dec_refs(struct session2 *s);
+
 /** Allocates and initializes a new session with the specified protocol layer
  * group, using a *libuv handle* as its transport. */
 static inline struct session2 *session2_new_io(uv_handle_t *handle,
@@ -946,7 +951,7 @@ static inline struct session2 *session2_new_io(uv_handle_t *handle,
 			layer_param, layer_param_count, outgoing);
 	s->transport.io.handle = handle;
 	handle->data = s;
-	s->ref_count++; /* Session owns the handle */
+	session2_inc_refs(s); /* Session owns the handle */
 	return s;
 }
 
@@ -964,10 +969,6 @@ static inline struct session2 *session2_new_child(struct session2 *parent,
 	return s;
 }
 
-/** Used when a libUV handle owned by the session is closed. Once all owned
- * handles are closed, the session is freed. */
-void session2_unhandle(struct session2 *s);
-
 /** Start reading from the underlying transport. */
 int session2_start_read(struct session2 *session);