From: Andrew Tridgell <tridge@samba.org>
Date: Thu, 18 Oct 2007 05:44:02 +0000 (+1000)
Subject: merge from ronnie
X-Git-Tag: tevent-0.9.20~348^2~2374
X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=d939a2901b8aa3128909cdd46b0cc1a0669d9a56;p=thirdparty%2Fsamba.git

merge from ronnie
(This used to be ctdb commit 75d4b386293e186a6bb8532515585ab72670d663)
---

d939a2901b8aa3128909cdd46b0cc1a0669d9a56
diff --cc ctdb/server/ctdb_recover.c
index 85ca2f0dcce,03e1db3f955..644f0b3c9c5
--- a/ctdb/server/ctdb_recover.c
+++ b/ctdb/server/ctdb_recover.c
@@@ -422,6 -427,78 +427,78 @@@ static void ctdb_recovered_callback(str
  	talloc_free(state);
  }
  
+ /*
+   called if our set_recmode child times out. this would happen if
+   ctdb_recovery_lock() would block.
+  */
+ static void ctdb_set_recmode_timeout(struct event_context *ev, struct timed_event *te, 
+ 					 struct timeval t, void *private_data)
+ {
+ 	struct ctdb_set_recmode_state *state = talloc_get_type(private_data, 
+ 					   struct ctdb_set_recmode_state);
+ 
+ 	ctdb_request_control_reply(state->ctdb, state->c, NULL, -1, "timeout in ctdb_set_recmode");
+ 	talloc_free(state);
+ }
+ 
+ 
+ /* when we free the recmode state we must kill any child process.
+ */
+ static int set_recmode_destructor(struct ctdb_set_recmode_state *state)
+ {
+ 	kill(state->child, SIGKILL);
+ 	waitpid(state->child, NULL, 0);
+ 	return 0;
+ }
+ 
+ /* this is called when the client process has completed ctdb_recovery_lock()
+    and has written data back to us through the pipe.
+ */
+ static void set_recmode_handler(struct event_context *ev, struct fd_event *fde, 
+ 			     uint16_t flags, void *private_data)
+ {
+ 	struct ctdb_set_recmode_state *state= talloc_get_type(private_data, 
+ 					     struct ctdb_set_recmode_state);
 -	char c;
++	char c = 0;
+ 	int ret;
+ 
+ 	/* we got a response from our child process so we can abort the
+ 	   timeout.
+ 	*/
+ 	talloc_free(state->te);
+ 	state->te = NULL;
+ 
+ 
+ 	/* read the childs status when trying to lock the reclock file.
+ 	   child wrote 0 if everything is fine and 1 if it did manage
+ 	   to lock the file, which would be a problem since that means
+ 	   we got a request to exit from recovery but we could still lock
+ 	   the file   which at this time SHOULD be locked by the recovery
+ 	   daemon on the recmaster
+ 	*/		
 -	read(state->fd[0], &c, 1);
 -	if (c != 0) {
++	ret = read(state->fd[0], &c, 1);
++	if (ret != 1 || c != 0) {
+ 		ctdb_request_control_reply(state->ctdb, state->c, NULL, -1, "managed to lock reclock file from inside daemon");
+ 		talloc_free(state);
+ 		return;
+ 	}
+ 
+ 
+ 	ctdb_stop_monitoring(state->ctdb);
+ 
+ 	/* call the events script to tell all subsystems that we have recovered */
+ 	ret = ctdb_event_script_callback(state->ctdb, 
+ 					 timeval_current_ofs(state->ctdb->tunable.script_timeout, 0),
+ 					 state, 
+ 					 ctdb_recovered_callback, 
+ 					 state, "recovered");
+ 	if (ret != 0) {
+ 		ctdb_request_control_reply(state->ctdb, state->c, NULL, -1, "failed to run eventscript from set_recmode");
+ 		talloc_free(state);
+ 		return;
+ 	}
+ }
+ 
  /*
    set the recovery mode
   */