From 55bb4dab7a4f132537b5ed93e504ffc618b643b9 Mon Sep 17 00:00:00 2001 From: Wayne Davison Date: Fri, 22 May 2020 15:06:20 -0700 Subject: [PATCH] Some checksum improvements - Improve csum negotation logic. - Define the csum names in a single structure. - Add --debug=CSUM. --- checksum.c | 167 +++++++++++++++++++++++++++++++++++++++++++---------- compat.c | 21 +++---- flist.c | 2 +- io.c | 2 +- options.c | 3 +- rsync.h | 3 +- 6 files changed, 152 insertions(+), 46 deletions(-) diff --git a/checksum.c b/checksum.c index 54e2c4aa..99c29d98 100644 --- a/checksum.c +++ b/checksum.c @@ -37,8 +37,17 @@ extern char *checksum_choice; #define CSUM_MD4 4 #define CSUM_MD5 5 -const char *default_checksum_list = - "md5 md4"; +#define CSUM_SAW_BUFLEN 10 + +struct csum_struct { + int num; + const char *name; +} valid_checksums[] = { + { CSUM_MD5, "md5" }, + { CSUM_MD4, "md4" }, + { CSUM_NONE, "none" }, + { -1, NULL } +}; #define MAX_CHECKSUM_LIST 1024 @@ -48,6 +57,8 @@ const char *negotiated_csum_name = NULL; static int parse_csum_name(const char *name, int len, int allow_auto) { + struct csum_struct *cs; + if (len < 0 && name) len = strlen(name); @@ -60,12 +71,11 @@ static int parse_csum_name(const char *name, int len, int allow_auto) return CSUM_MD4_BUSTED; return CSUM_MD4_ARCHAIC; } - if (len == 3 && strncasecmp(name, "md4", 3) == 0) - return CSUM_MD4; - if (len == 3 && strncasecmp(name, "md5", 3) == 0) - return CSUM_MD5; - if (len == 4 && strncasecmp(name, "none", 4) == 0) - return CSUM_NONE; + + for (cs = valid_checksums; cs->name; cs++) { + if (strncasecmp(name, cs->name, len) == 0 && cs->name[len] == '\0') + return cs->num; + } if (allow_auto) { rprintf(FERROR, "unknown checksum name: %s\n", name); @@ -75,7 +85,22 @@ static int parse_csum_name(const char *name, int len, int allow_auto) return -1; } -void parse_checksum_choice(void) +static const char *checksum_name(int num) +{ + struct csum_struct *cs; + + for (cs = valid_checksums; cs->name; cs++) { + if (num == cs->num) + return cs->name; + } + + if (num < CSUM_MD4) + return "MD4"; + + return "UNKNOWN"; +} + +void parse_checksum_choice(int final_call) { if (!negotiated_csum_name) { char *cp = checksum_choice ? strchr(checksum_choice, ',') : NULL; @@ -85,46 +110,124 @@ void parse_checksum_choice(void) } else xfersum_type = checksum_type = parse_csum_name(checksum_choice, -1, 1); } + if (xfersum_type == CSUM_NONE) whole_file = 1; + + if (final_call && DEBUG_GTE(CSUM, 1)) { + if (negotiated_csum_name) + rprintf(FINFO, "[%s] negotiated checksum: %s\n", who_am_i(), negotiated_csum_name); + else if (xfersum_type == checksum_type) { + rprintf(FINFO, "[%s] %s checksum: %s\n", who_am_i(), + checksum_choice ? "chosen" : "protocol-based", + checksum_name(xfersum_type)); + } else { + rprintf(FINFO, "[%s] chosen transfer checksum: %s\n", + who_am_i(), checksum_name(xfersum_type)); + rprintf(FINFO, "[%s] chosen pre-transfer checksum: %s\n", + who_am_i(), checksum_name(checksum_type)); + } + } +} + +static int parse_checksum_list(const char *from, char *sumbuf, int sumbuf_len, char *saw) +{ + char *to = sumbuf, *tok = NULL; + int cnt = 0; + + memset(saw, 0, CSUM_SAW_BUFLEN); + + while (1) { + if (*from == ' ' || !*from) { + if (tok) { + int sum_type = parse_csum_name(tok, to - tok, 0); + if (sum_type >= 0 && !saw[sum_type]) + saw[sum_type] = ++cnt; + else + to = tok - (tok != sumbuf); + tok = NULL; + } + if (!*from++) + break; + continue; + } + if (!tok) { + if (to != sumbuf) + *to++ = ' '; + tok = to; + } + if (to - sumbuf >= sumbuf_len - 1) { + to = tok - (tok != sumbuf); + break; + } + *to++ = *from++; + } + *to = '\0'; + + return to - sumbuf; } -void negotiate_checksum(int f_in, int f_out, const char *csum_list) +void negotiate_checksum(int f_in, int f_out, const char *csum_list, int saw_fail) { - char *tok, sumbuf[MAX_CHECKSUM_LIST]; + char *tok, sumbuf[MAX_CHECKSUM_LIST], saw[CSUM_SAW_BUFLEN]; int sum_type, len; - if (!am_server || local_server) { - if (!csum_list || !*csum_list) - csum_list = default_checksum_list; - len = strlen(csum_list); - if (len >= (int)sizeof sumbuf) { - rprintf(FERROR, "The checksum list is too long.\n"); - exit_cleanup(RERR_UNSUPPORTED); + /* Simplify the user-provided string so that it contains valid + * checksum names without any duplicates. The client side also + * makes use of the saw values when scanning the server's list. */ + if (csum_list && *csum_list && (!am_server || local_server)) { + len = parse_checksum_list(csum_list, sumbuf, sizeof sumbuf, saw); + if (saw_fail && !len) + len = strlcpy(sumbuf, "FAIL", sizeof sumbuf); + csum_list = sumbuf; + } else + csum_list = NULL; + + if (!csum_list || !*csum_list) { + struct csum_struct *cs; + for (tok = sumbuf, cs = valid_checksums, len = 0; cs->name; cs++) { + if (cs->num == CSUM_NONE) + continue; + if (tok != sumbuf) + *tok++ = ' '; + tok += strlcpy(tok, cs->name, sizeof sumbuf - (tok - sumbuf)); + saw[cs->num] = ++len; } - if (!local_server) - write_vstring(f_out, csum_list, len); + *tok = '\0'; + len = tok - sumbuf; } - if (local_server && !read_batch) - memcpy(sumbuf, csum_list, len+1); - else + /* Each side sends their list of valid checksum names to the other side and + * then both sides pick the first name in the client's list that is also in + * the server's list. */ + if (!local_server) + write_vstring(f_out, sumbuf, len); + + if (!local_server || read_batch) len = read_vstring(f_in, sumbuf, sizeof sumbuf); if (len > 0) { + int best = CSUM_SAW_BUFLEN; /* We want best == 1 from the client list */ + if (am_server) + memset(saw, 1, CSUM_SAW_BUFLEN); /* The first client's choice is the best choice */ for (tok = strtok(sumbuf, " \t"); tok; tok = strtok(NULL, " \t")) { - len = strlen(tok); - sum_type = parse_csum_name(tok, len, 0); - if (sum_type >= CSUM_NONE) { - xfersum_type = checksum_type = sum_type; - if (am_server && !local_server) - write_vstring(f_out, tok, len); - negotiated_csum_name = strdup(tok); - return; - } + sum_type = parse_csum_name(tok, -1, 0); + if (sum_type < 0 || !saw[sum_type] || best < saw[sum_type]) + continue; + xfersum_type = checksum_type = sum_type; + negotiated_csum_name = tok; + best = saw[sum_type]; + if (best == 1) + break; + } + if (negotiated_csum_name) { + negotiated_csum_name = strdup(negotiated_csum_name); + return; } } + if (!am_server) + msleep(20); rprintf(FERROR, "Failed to negotiate a common checksum\n"); exit_cleanup(RERR_UNSUPPORTED); } diff --git a/compat.c b/compat.c index b29b9637..8c77ea69 100644 --- a/compat.c +++ b/compat.c @@ -367,24 +367,25 @@ void setup_protocol(int f_out,int f_in) } #endif - if (am_server) { - if (!checksum_seed) - checksum_seed = time(NULL) ^ (getpid() << 6); - write_int(f_out, checksum_seed); - } else { - checksum_seed = read_int(f_in); - } - if (!checksum_choice) { const char *rcl = getenv("RSYNC_CHECKSUM_LIST"); + int saw_fail = rcl && strstr(rcl, "FAIL"); if (csum_exchange) - negotiate_checksum(f_in, f_out, rcl); - else if (!am_server && rcl && *rcl && strstr(rcl, "FAIL")) { + negotiate_checksum(f_in, f_out, rcl, saw_fail); + else if (!am_server && saw_fail) { rprintf(FERROR, "Remote rsync is too old for checksum negotation\n"); exit_cleanup(RERR_UNSUPPORTED); } } + if (am_server) { + if (!checksum_seed) + checksum_seed = time(NULL) ^ (getpid() << 6); + write_int(f_out, checksum_seed); + } else { + checksum_seed = read_int(f_in); + } + init_flist(); } diff --git a/flist.c b/flist.c index 1f2b278d..940071fd 100644 --- a/flist.c +++ b/flist.c @@ -142,7 +142,7 @@ void init_flist(void) rprintf(FINFO, "FILE_STRUCT_LEN=%d, EXTRA_LEN=%d\n", (int)FILE_STRUCT_LEN, (int)EXTRA_LEN); } - parse_checksum_choice(); /* Sets checksum_type && xfersum_type */ + parse_checksum_choice(1); /* Sets checksum_type && xfersum_type */ flist_csum_len = csum_len_for_type(checksum_type, 1); show_filelist_progress = INFO_GTE(FLIST, 1) && xfer_dirs && !am_server && !inc_recurse; diff --git a/io.c b/io.c index 446a5f34..189bc232 100644 --- a/io.c +++ b/io.c @@ -2369,8 +2369,8 @@ void start_write_batch(int fd) write_int(batch_fd, protocol_version); if (protocol_version >= 30) write_varint(batch_fd, compat_flags); - write_int(batch_fd, checksum_seed); maybe_write_checksum(batch_fd); + write_int(batch_fd, checksum_seed); if (am_sender) write_batch_monitor_out = fd; diff --git a/options.c b/options.c index e2adcf83..6926f96c 100644 --- a/options.c +++ b/options.c @@ -276,6 +276,7 @@ static struct output_struct debug_words[COUNT_DEBUG+1] = { DEBUG_WORD(CHDIR, W_CLI|W_SRV, "Debug when the current directory changes"), DEBUG_WORD(CONNECT, W_CLI, "Debug connection events (levels 1-2)"), DEBUG_WORD(CMD, W_CLI, "Debug commands+options that are issued (levels 1-2)"), + DEBUG_WORD(CSUM, W_CLI|W_SRV, "Debug checksum negotiation"), DEBUG_WORD(DEL, W_REC, "Debug delete actions (levels 1-3)"), DEBUG_WORD(DELTASUM, W_SND|W_REC, "Debug delta-transfer checksumming (levels 1-4)"), DEBUG_WORD(DUP, W_REC, "Debug weeding of duplicate names"), @@ -1932,7 +1933,7 @@ int parse_arguments(int *argc_p, const char ***argv_p) /* Call this early to verify the args and figure out if we need to force * --whole-file. Note that the parse function will get called again later, * just in case an "auto" choice needs to know the protocol_version. */ - parse_checksum_choice(); + parse_checksum_choice(0); } else checksum_choice = NULL; diff --git a/rsync.h b/rsync.h index 0eb4fbc0..d6c5de52 100644 --- a/rsync.h +++ b/rsync.h @@ -1317,7 +1317,8 @@ extern short info_levels[], debug_levels[]; #define DEBUG_CHDIR (DEBUG_BIND+1) #define DEBUG_CONNECT (DEBUG_CHDIR+1) #define DEBUG_CMD (DEBUG_CONNECT+1) -#define DEBUG_DEL (DEBUG_CMD+1) +#define DEBUG_CSUM (DEBUG_CMD+1) +#define DEBUG_DEL (DEBUG_CSUM+1) #define DEBUG_DELTASUM (DEBUG_DEL+1) #define DEBUG_DUP (DEBUG_DELTASUM+1) #define DEBUG_EXIT (DEBUG_DUP+1) -- 2.47.2