cassandra: Fallback consistency fix - it wasn't used permanently

author Timo Sirainen <timo.sirainen@dovecot.fi>

Mon, 27 Mar 2017 10:44:13 +0000 (13:44 +0300)

committer Timo Sirainen <timo.sirainen@dovecot.fi>

Mon, 27 Mar 2017 10:55:33 +0000 (13:55 +0300)
author Timo Sirainen <timo.sirainen@dovecot.fi>
Mon, 27 Mar 2017 10:44:13 +0000 (13:44 +0300)
committer Timo Sirainen <timo.sirainen@dovecot.fi>
Mon, 27 Mar 2017 10:55:33 +0000 (13:55 +0300)
diff --git a/src/lib-sql/driver-cassandra.c b/src/lib-sql/driver-cassandra.c

index d7fc8230e214123acc0c2ff02e04bbc87ee58404..9f1aca80f15c1230ea0bb60343f937fb94c21d7b 100644 (file)
--- a/src/lib-sql/driver-cassandra.c
+++ b/src/lib-sql/driver-cassandra.c
@@ -102,7 +102,7 @@ struct cassandra_db {
         struct timeout *to_metrics;
         uint64_t counters[CASSANDRA_COUNTER_COUNT];
  
-       struct timeval first_fallback_sent[CASSANDRA_QUERY_TYPE_COUNT];
+       struct timeval primary_query_last_sent[CASSANDRA_QUERY_TYPE_COUNT];
         time_t last_fallback_warning[CASSANDRA_QUERY_TYPE_COUNT];
         unsigned int fallback_failures[CASSANDRA_QUERY_TYPE_COUNT];
  
@@ -782,8 +782,7 @@ static void query_resend_with_fallback(struct cassandra_result *result)
                 db->last_fallback_warning[result->query_type] = ioloop_time;
         }
         i_free_and_null(result->error);
-       if (db->fallback_failures[result->query_type]++ == 0)
-               db->first_fallback_sent[result->query_type] = ioloop_timeval;
+       db->fallback_failures[result->query_type]++;
  
         result->consistency = result->fallback_consistency;
         driver_cassandra_result_send_query(result);
@@ -899,7 +898,7 @@ driver_cassandra_want_fallback_query(struct cassandra_result *result)
  
         if (failure_count == 0)
                 return FALSE;
-       tv = db->first_fallback_sent[result->query_type];
+       /* double the retries every time. */
         for (i = 1; i < failure_count; i++) {
                 msecs *= 2;
                 if (msecs >= CASSANDRA_FALLBACK_MAX_RETRY_MSECS) {
@@ -907,6 +906,19 @@ driver_cassandra_want_fallback_query(struct cassandra_result *result)
                         break;
                 }
         }
+       /* If last primary query sent timestamp + msecs is older than current
+          time, we need to retry the primary query. Note that this practically
+          prevents multiple primary queries from being attempted
+          simultaneously, because the caller updates primary_query_last_sent
+          immediately when returning.
+
+          The only time when multiple primary queries can be running in
+          parallel is when the earlier query is being slow and hasn't finished
+          early enough. This could even be a wanted feature, since while the
+          first query might have to wait for a timeout, Cassandra could have
+          been fixed in the meantime and the second query finishes
+          successfully. */
+       tv = db->primary_query_last_sent[result->query_type];
         timeval_add_msecs(&tv, msecs);
         return timeval_cmp(&ioloop_timeval, &tv) < 0;
  }
@@ -943,6 +955,8 @@ static int driver_cassandra_send_query(struct cassandra_result *result)
  
         if (driver_cassandra_want_fallback_query(result))
                 result->consistency = result->fallback_consistency;
+       else
+               db->primary_query_last_sent[result->query_type] = ioloop_timeval;
  
         driver_cassandra_result_send_query(result);
         result->query_sent = TRUE;
author	Timo Sirainen <timo.sirainen@dovecot.fi>
	Mon, 27 Mar 2017 10:44:13 +0000 (13:44 +0300)
committer	Timo Sirainen <timo.sirainen@dovecot.fi>
	Mon, 27 Mar 2017 10:55:33 +0000 (13:55 +0300)