a better way to resend calls after recovery

author Andrew Tridgell <tridge@samba.org>

Fri, 18 May 2007 14:56:49 +0000 (00:56 +1000)

committer Andrew Tridgell <tridge@samba.org>

Fri, 18 May 2007 14:56:49 +0000 (00:56 +1000)
author Andrew Tridgell <tridge@samba.org>
Fri, 18 May 2007 14:56:49 +0000 (00:56 +1000)
committer Andrew Tridgell <tridge@samba.org>
Fri, 18 May 2007 14:56:49 +0000 (00:56 +1000)
diff --git a/ctdb/common/ctdb_call.c b/ctdb/common/ctdb_call.c

index cd7244ff15785efd356c8505c3c5b763c59b5974..c19d88f660effd79e35b68f6414ef61af8686035 100644 (file)
--- a/ctdb/common/ctdb_call.c
+++ b/ctdb/common/ctdb_call.c
@@ -607,37 +607,20 @@ void ctdb_reply_error(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
  */
  static int ctdb_call_destructor(struct ctdb_call_state *state)
  {
+       DLIST_REMOVE(state->ctdb_db->ctdb->pending_calls, state);
         ctdb_reqid_remove(state->ctdb_db->ctdb, state->reqid);
         return 0;
  }
  
  
  /*
-  called when a ctdb_call times out
+  called when a ctdb_call needs to be resent after a reconfigure event
  */
-static void ctdb_call_timeout(struct event_context *ev, struct timed_event *te, 
-                             struct timeval t, void *private_data)
+static void ctdb_call_resend(struct ctdb_call_state *state)
  {
-       struct ctdb_call_state *state = talloc_get_type(private_data, struct ctdb_call_state);
         struct ctdb_context *ctdb = state->ctdb_db->ctdb;
  
-       ctdb->status.timeouts.call++;
-
-       event_add_timed(ctdb->ev, state, timeval_current_ofs(CTDB_CALL_TIMEOUT, 0), 
-                       ctdb_call_timeout, state);
-
-       if (++state->resend_count < 10 &&
-           (ctdb->vnn_map->generation == state->generation ||
-            ctdb->recovery_mode != CTDB_RECOVERY_NORMAL)) {
-               /* the call is just being slow, or we are curently
-                  recovering, give it more time */
-               return;
-       }
-
-       /* the generation count changed or we're timing out too much -
-          the call must be re-issued */
         state->generation = ctdb->vnn_map->generation;
-       state->resend_count = 0;
  
         /* use a new reqid, in case the old reply does eventually come in */
         ctdb_reqid_remove(ctdb, state->reqid);
@@ -651,7 +634,19 @@ static void ctdb_call_timeout(struct event_context *ev, struct timed_event *te,
         state->c->hdr.destnode = ctdb->vnn;
  
         ctdb_queue_packet(ctdb, &state->c->hdr);
-       DEBUG(0,("requeued ctdb_call after timeout\n"));
+       DEBUG(0,("resent ctdb_call\n"));
+}
+
+/*
+  resend all pending calls on recovery
+ */
+void ctdb_call_resend_all(struct ctdb_context *ctdb)
+{
+       struct ctdb_call_state *state, *next;
+       for (state=ctdb->pending_calls;state;state=next) {
+               next = state->next;
+               ctdb_call_resend(state);
+       }
  }
  
  /*
@@ -743,10 +738,10 @@ struct ctdb_call_state *ctdb_daemon_call_send_remote(struct ctdb_db_context *ctd
         state->state  = CTDB_CALL_WAIT;
         state->generation = ctdb->vnn_map->generation;
  
+       DLIST_ADD(ctdb->pending_calls, state);
+
         ctdb_queue_packet(ctdb, &state->c->hdr);
  
-       event_add_timed(ctdb->ev, state, timeval_current_ofs(CTDB_CALL_TIMEOUT, 0), 
-                       ctdb_call_timeout, state);
         return state;
  }
  
diff --git a/ctdb/common/ctdb_freeze.c b/ctdb/common/ctdb_freeze.c

index 96a128332e43ca1b58993220a473fd519a870039..5868ed099c942639a2a0bb6e7e44efb4e47763eb 100644 (file)
--- a/ctdb/common/ctdb_freeze.c
+++ b/ctdb/common/ctdb_freeze.c
@@ -223,5 +223,6 @@ int32_t ctdb_control_thaw(struct ctdb_context *ctdb)
  {
         talloc_free(ctdb->freeze_handle);
         ctdb->freeze_handle = NULL;
+       ctdb_call_resend_all(ctdb);
         return 0;
  }
diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h

index 57901ed6a99ac4035850c83755e29c8e6379527b..0149714c8566d2885c8d95d6b4cb4d01b70321d4 100644 (file)
--- a/ctdb/include/ctdb_private.h
+++ b/ctdb/include/ctdb_private.h
@@ -265,6 +265,7 @@ struct ctdb_context {
         uint32_t num_clients;
         uint32_t seqnum_frequency;
         uint32_t recovery_master;
+       struct ctdb_call_state *pending_calls;
  };
  
  struct ctdb_db_context {
@@ -300,11 +301,6 @@ struct ctdb_db_context {
            ctdb_fatal(ctdb, "Out of memory in " __location__ ); \
           }} while (0)
  
-/* timeout for ctdb call operations. When this timeout expires we
-   check if the generation count has changed, and if it has then
-   re-issue the call */
-#define CTDB_CALL_TIMEOUT 2
-
  /* maximum timeout for ctdb control calls */
  #define CTDB_CONTROL_TIMEOUT 60
  
@@ -390,6 +386,7 @@ enum call_state {CTDB_CALL_WAIT, CTDB_CALL_DONE, CTDB_CALL_ERROR};
    state of a in-progress ctdb call
  */
  struct ctdb_call_state {
+       struct ctdb_call_state *next, *prev;
         enum call_state state;
         uint32_t reqid;
         struct ctdb_req_call *c;
@@ -397,7 +394,6 @@ struct ctdb_call_state {
         const char *errmsg;
         struct ctdb_call call;
         uint32_t generation;
-       uint32_t resend_count;
         struct {
                 void (*fn)(struct ctdb_call_state *);
                 void *private_data;
@@ -828,5 +824,6 @@ int ctdb_start_monitoring(struct ctdb_context *ctdb);
  void ctdb_send_keepalive(struct ctdb_context *ctdb, uint32_t destnode);
  
  void ctdb_daemon_cancel_controls(struct ctdb_context *ctdb, struct ctdb_node *node);
+void ctdb_call_resend_all(struct ctdb_context *ctdb);
  
  #endif
author	Andrew Tridgell <tridge@samba.org>
	Fri, 18 May 2007 14:56:49 +0000 (00:56 +1000)
committer	Andrew Tridgell <tridge@samba.org>
	Fri, 18 May 2007 14:56:49 +0000 (00:56 +1000)
ctdb/common/ctdb_call.c		patch \| blob \| blame \| history
ctdb/common/ctdb_freeze.c		patch \| blob \| blame \| history
ctdb/include/ctdb_private.h		patch \| blob \| blame \| history