From 32d6cc84cf8e0cf278b5715b8a9d66b7c0a2a6d2 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl@samba.org>
Date: Mon, 30 Sep 2019 11:39:11 +0200
Subject: [PATCH] dbwrap_watch: Don't alert ourselves, fix raw.oplock.batch26
 race

This fixes the following flaky test:

UNEXPECTED(failure): samba3.raw.oplock.batch26(nt4_dc)
REASON: Exception: Exception: (../../source4/torture/raw/oplock.c:3718): wrong value for break_info.count got 0x2 - should be 0x1

You can reproduce it with two small msleeps, which means it's a race
condition:

diff --git a/source3/smbd/open.c b/source3/smbd/open.c
index 20b5a3e294c..126c7fc021d 100644
--- a/source3/smbd/open.c
+++ b/source3/smbd/open.c
@@ -1917,6 +1917,14 @@ NTSTATUS send_break_message(struct messaging_context *msg_ctx,
 	DATA_BLOB blob;
 	NTSTATUS status;

+	{
+		static bool sent = false;
+		if (sent) {
+			smb_msleep(500);
+		}
+		sent = true;
+	}
+
 	if (DEBUGLVL(10)) {
 		struct server_id_buf buf;
 		DBG_DEBUG("Sending break message to %s\n",
diff --git a/source3/smbd/oplock.c b/source3/smbd/oplock.c
index b3da84b1269..d9c4dbb9487 100644
--- a/source3/smbd/oplock.c
+++ b/source3/smbd/oplock.c
@@ -858,6 +858,8 @@ static void process_oplock_break_message(struct messaging_context *msg_ctx,
 	uint16_t break_to;
 	bool break_needed = true;

+	smb_msleep(100);
+
 	msg = talloc(talloc_tos(), struct oplock_break_message);
 	if (msg == NULL) {
 		DBG_WARNING("talloc failed\n");

15a8af075a2 introduced a bug where we immediately wake up ourselves
after doing a watch_send, leading to two inter-smbd oplock break
messages for this case. In theory, this should not matter, as in the
oplock break handler in the destination smbd we check

(fsp->sent_oplock_break != NO_BREAK_SENT)

so that the break does not get sent twice. However, with the above two
sleeps the oplock holding client could send out its oplock downgrade
while the second inter-smbd break messages was on its way.

The real fix would be to note in the share mode array that the
inter-smbd message has already been sent, but as other users of
dbwrap_watched_watch_send might also be affected by this bug, this fix
should be sufficient to get rid of this flaky test.

Unfortunately, dbwrap_watch.c is now pretty complex and needs some
serious refactoring to become understandable again. But that's
something for another day, sorry.

Signed-off-by: Volker Lendecke <vl@samba.org>
Reviewed-by: Ralph Boehme <slow@samba.org>
---
 source3/lib/dbwrap/dbwrap_watch.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/source3/lib/dbwrap/dbwrap_watch.c b/source3/lib/dbwrap/dbwrap_watch.c
index c7e62dfc4bf..36e445a4fd3 100644
--- a/source3/lib/dbwrap/dbwrap_watch.c
+++ b/source3/lib/dbwrap/dbwrap_watch.c
@@ -196,6 +196,7 @@ struct db_watched_ctx {
 struct db_watched_subrec {
 	struct db_record *subrec;
 	struct dbwrap_watch_rec wrec;
+	bool added_watcher;
 };
 
 static NTSTATUS dbwrap_watched_subrec_storev(
@@ -385,7 +386,7 @@ static void dbwrap_watched_subrec_wakeup(
 	struct db_context *db = rec->db;
 	struct db_watched_ctx *ctx = talloc_get_type_abort(
 		db->private_data, struct db_watched_ctx);
-	size_t i;
+	size_t i, num_to_wakeup;
 	size_t db_id_len = dbwrap_db_id(db, NULL, 0);
 	uint8_t db_id[db_id_len];
 	uint8_t len_buf[4];
@@ -403,7 +404,17 @@ static void dbwrap_watched_subrec_wakeup(
 
 	i = 0;
 
-	while (i < wrec->num_watchers) {
+	num_to_wakeup = wrec->num_watchers;
+
+	if (subrec->added_watcher) {
+		/*
+		 * Don't alert our own watcher that we just added to
+		 * the end of the array.
+		 */
+		num_to_wakeup -= 1;
+	}
+
+	while (i < num_to_wakeup) {
 		struct server_id watcher;
 		NTSTATUS status;
 		struct server_id_buf tmp;
@@ -916,6 +927,7 @@ struct tevent_req *dbwrap_watched_watch_send(TALLOC_CTX *mem_ctx,
 		      state->me);
 	wrec->watchers = watchers;
 	wrec->num_watchers += 1;
+	subrec->added_watcher = true;
 
 	status = dbwrap_watched_save(
 		subrec->subrec, wrec, &wrec->data, 1, 0);
-- 
2.47.3