From: Michal Rakowski Date: Wed, 28 Apr 2021 11:38:12 +0000 (+0200) Subject: Introduce 'o' option for accurate backup X-Git-Tag: Release-11.3.2~461 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=3de0ec782477ac7aa43a4eb59849e527e83969f6;p=thirdparty%2Fbacula.git Introduce 'o' option for accurate backup This feature can be used -only- if all of the daemons (DIR, SD and FD) are updated to this version. When the new 'o' accurate option is used, only metadata is being backed up in case file's contents did not change (meaning that file's checksum stays the same). New stream was added to handle that during the backup and restore phase. --- diff --git a/bacula/src/cats/sql_create.c b/bacula/src/cats/sql_create.c index 6fdea955d..be70433cc 100644 --- a/bacula/src/cats/sql_create.c +++ b/bacula/src/cats/sql_create.c @@ -1037,6 +1037,7 @@ bool BDB::bdb_create_attributes_record(JCR *jcr, ATTR_DBR *ar) * Make sure we have an acceptable attributes record. */ if (!(ar->Stream == STREAM_UNIX_ATTRIBUTES || + ar->Stream == STREAM_UNIX_ATTRIBUTE_UPDATE || ar->Stream == STREAM_UNIX_ATTRIBUTES_EX)) { Mmsg1(&errmsg, _("Attempt to put non-attributes into catalog. Stream=%d\n"), ar->Stream); diff --git a/bacula/src/dird/catreq.c b/bacula/src/dird/catreq.c index ff0f50cc8..cca55c792 100644 --- a/bacula/src/dird/catreq.c +++ b/bacula/src/dird/catreq.c @@ -581,7 +581,9 @@ static void update_attribute(JCR *jcr, char *msg, int32_t msglen) Dmsg5(400, "UpdCat VolSessId=%d VolSessT=%d FI=%d Strm=%d reclen=%d\n", VolSessionId, VolSessionTime, FileIndex, Stream, reclen); - if (Stream == STREAM_UNIX_ATTRIBUTES || Stream == STREAM_UNIX_ATTRIBUTES_EX) { + if (Stream == STREAM_UNIX_ATTRIBUTES || + Stream == STREAM_UNIX_ATTRIBUTES_EX || + Stream == STREAM_UNIX_ATTRIBUTE_UPDATE) { if (jcr->cached_attribute) { Dmsg2(400, "Cached attr. Stream=%d fname=%s\n", ar->Stream, ar->fname); if (!db_create_attributes_record(jcr, jcr->db, ar)) { diff --git a/bacula/src/dird/inc_conf.c b/bacula/src/dird/inc_conf.c index f99203645..f4e08108c 100644 --- a/bacula/src/dird/inc_conf.c +++ b/bacula/src/dird/inc_conf.c @@ -277,7 +277,7 @@ static void scan_include_options(LEX *lc, int keyword, char *opts, int optlen) break; case INC_KW_BASEJOB: case INC_KW_ACCURATE: - fs_options = "ipnugsamMcdA5123:JC"; /* From filed/accurate.c accurate_check_file() */ + fs_options = "oipnugsamMcdA5123:JC"; /* From filed/accurate.c accurate_check_file() */ break; default: break; diff --git a/bacula/src/filed/accurate.c b/bacula/src/filed/accurate.c index 0f07de7d1..7dd7e4cea 100644 --- a/bacula/src/filed/accurate.c +++ b/bacula/src/filed/accurate.c @@ -319,6 +319,108 @@ bail_out: } +/* Helper method to calculate actual file's checksum and compare it with the old one. + * Return statuses: + * 0 - checksum did not change + * 1 - checksum changed + * -1 - error + */ +static int check_checksum_diff(JCR *jcr, FF_PKT *ff_pkt, CurFile *elt) +{ + int ret = 0; + int digest_stream = STREAM_NONE; + DIGEST *digest = NULL; + char *fname; + + if (S_ISDIR(ff_pkt->statp.st_mode)) { + fname = ff_pkt->link; + } else { + fname = ff_pkt->fname; + } + + /* + * The remainder of the function is all about getting the checksum. + * First we initialise, then we read files, other streams and Finder Info. + */ + if (ff_pkt->type != FT_LNKSAVED && + (S_ISREG(ff_pkt->statp.st_mode) && + ff_pkt->flags & (FO_MD5|FO_SHA1|FO_SHA256|FO_SHA512))) + { + + if (!*elt->chksum && !jcr->rerunning) { + Jmsg(jcr, M_WARNING, 0, _("Cannot verify checksum for %s\n"), + ff_pkt->fname); + ret = -1; + goto bail_out; + } + + /* + * Create our digest context. If this fails, the digest will be set + * to NULL and not used. + */ + if (ff_pkt->flags & FO_MD5) { + digest = crypto_digest_new(jcr, CRYPTO_DIGEST_MD5); /* TODO: With FIPS, MD5 is disabled */ + digest_stream = STREAM_MD5_DIGEST; + + } else if (ff_pkt->flags & FO_SHA1) { + digest = crypto_digest_new(jcr, CRYPTO_DIGEST_SHA1); + digest_stream = STREAM_SHA1_DIGEST; + + } else if (ff_pkt->flags & FO_SHA256) { + digest = crypto_digest_new(jcr, CRYPTO_DIGEST_SHA256); + digest_stream = STREAM_SHA256_DIGEST; + + } else if (ff_pkt->flags & FO_SHA512) { + digest = crypto_digest_new(jcr, CRYPTO_DIGEST_SHA512); + digest_stream = STREAM_SHA512_DIGEST; + } + + /* Did digest initialization fail? */ + if (digest_stream != STREAM_NONE && digest == NULL) { + Jmsg(jcr, M_WARNING, 0, _("%s digest initialization failed\n"), + stream_to_ascii(digest_stream)); + } + + /* compute MD5 or SHA1 hash */ + if (digest) { + char md[CRYPTO_DIGEST_MAX_SIZE]; + uint32_t size; + + size = sizeof(md); + + if (digest_file(jcr, ff_pkt, digest) != 0) { + jcr->JobErrors++; + + } else if (crypto_digest_finalize(digest, (uint8_t *)md, &size)) { + char *digest_buf; + const char *digest_name; + + digest_buf = (char *)malloc(BASE64_SIZE(size)); + digest_name = crypto_digest_name(digest); + + bin_to_base64(digest_buf, BASE64_SIZE(size), md, size, true); + + if (strcmp(digest_buf, elt->chksum)) { + Dmsg4(dbglvl,"%s %s chksum diff. Cat: %s File: %s\n", + fname, + digest_name, + elt->chksum, + digest_buf); + ret = 1; + } + free(digest_buf); + } + } + } + +bail_out: + if (digest) { + crypto_digest_free(digest); + } + + return ret; +} + /* * This function is called for each file seen in fileset. * We check in file_list hash if fname have been backuped @@ -330,15 +432,14 @@ bail_out: */ bool accurate_check_file(JCR *jcr, FF_PKT *ff_pkt) { - int digest_stream = STREAM_NONE; - DIGEST *digest = NULL; - struct stat statc; int32_t LinkFIc; bool stat = false; char *opts; char *fname; + bool only_changed = false, checksum = false; CurFile elt; + int ret; ff_pkt->delta_seq = 0; ff_pkt->accurate_found = false; @@ -481,96 +582,69 @@ bool accurate_check_file(JCR *jcr, FF_PKT *ff_pkt) stat = true; break; /* TODO: cleanup and factorise this function with verify.c */ + case ':': + case 'J': + case 'C': + default: + break; + } + } + + /* Go through opts once again, this time check only for checksum-related opts */ + for (char *p=opts; *p; p++) { + switch (*p) { case '5': /* compare MD5 */ case '1': /* compare SHA1 */ case '2': /* compare SHA256 */ case '3': /* compare SHA512 */ - /* - * The remainder of the function is all about getting the checksum. - * First we initialise, then we read files, other streams and Finder Info. - */ - if (!stat && ff_pkt->type != FT_LNKSAVED && - (S_ISREG(ff_pkt->statp.st_mode) && - ff_pkt->flags & (FO_MD5|FO_SHA1|FO_SHA256|FO_SHA512))) - { - - if (!*elt.chksum && !jcr->rerunning) { - Jmsg(jcr, M_WARNING, 0, _("Cannot verify checksum for %s\n"), - ff_pkt->fname); - stat = true; - break; - } - - /* - * Create our digest context. If this fails, the digest will be set - * to NULL and not used. - */ - if (ff_pkt->flags & FO_MD5) { - digest = crypto_digest_new(jcr, CRYPTO_DIGEST_MD5); /* TODO: With FIPS, MD5 is disabled */ - digest_stream = STREAM_MD5_DIGEST; - - } else if (ff_pkt->flags & FO_SHA1) { - digest = crypto_digest_new(jcr, CRYPTO_DIGEST_SHA1); - digest_stream = STREAM_SHA1_DIGEST; - - } else if (ff_pkt->flags & FO_SHA256) { - digest = crypto_digest_new(jcr, CRYPTO_DIGEST_SHA256); - digest_stream = STREAM_SHA256_DIGEST; - - } else if (ff_pkt->flags & FO_SHA512) { - digest = crypto_digest_new(jcr, CRYPTO_DIGEST_SHA512); - digest_stream = STREAM_SHA512_DIGEST; - } - - /* Did digest initialization fail? */ - if (digest_stream != STREAM_NONE && digest == NULL) { - Jmsg(jcr, M_WARNING, 0, _("%s digest initialization failed\n"), - stream_to_ascii(digest_stream)); - } - - /* compute MD5 or SHA1 hash */ - if (digest) { - char md[CRYPTO_DIGEST_MAX_SIZE]; - uint32_t size; - - size = sizeof(md); - - if (digest_file(jcr, ff_pkt, digest) != 0) { - jcr->JobErrors++; - - } else if (crypto_digest_finalize(digest, (uint8_t *)md, &size)) { - char *digest_buf; - const char *digest_name; - - digest_buf = (char *)malloc(BASE64_SIZE(size)); - digest_name = crypto_digest_name(digest); - - bin_to_base64(digest_buf, BASE64_SIZE(size), md, size, true); - - if (strcmp(digest_buf, elt.chksum)) { - Dmsg4(dbglvl,"%s %s chksum diff. Cat: %s File: %s\n", - fname, - digest_name, - elt.chksum, - digest_buf); - stat = true; - } - - free(digest_buf); - } - crypto_digest_free(digest); - } + if (ff_pkt->type != FT_LNKSAVED && + (S_ISREG(ff_pkt->statp.st_mode) && + ff_pkt->flags & (FO_MD5|FO_SHA1|FO_SHA256|FO_SHA512))) { + checksum = true; } - break; - case ':': - case 'J': - case 'C': + case 'o': + only_changed = true; + break; default: break; } } + /* Check if user specified any of the checksum accurate opts */ + if (checksum) { + if (only_changed) { + /* User wants to calculate checksum only for the files with changed metadata */ + if (stat) { + /* Any of metadata member specified in accurate options has changed so we need to calculate + * and compare file's checksum and decide if we want to backup file or only metadata based on + * checksum comparison with the 'old' file*/ + ret = check_checksum_diff(jcr, ff_pkt, &elt); + if (ret == 1) { + // checksum has changed, backup file normally + stat = true; + } else if (ret == -1){ + stat = false; + goto bail_out; + } else { + /* Checksum hasn't changed, we can backup only meta */ + ff_pkt->stat_update = true; + } + } + } else if (!only_changed && !stat) { + /* User did not specified the 'calculate checksum only when metadata change' option, + * and we know that specified metadata did not change at that point so we need to calculate it + * and base our backup decision on the result of comparing it with the one we had before */ + ret = check_checksum_diff(jcr, ff_pkt, &elt); + if (ret == 1) { + stat = true; + } else if (ret == -1){ + stat = false; + goto bail_out; + } + } + } + /* In Incr/Diff accurate mode, we mark all files as seen * When in Full+Base mode, we mark only if the file match exactly */ diff --git a/bacula/src/filed/backup.c b/bacula/src/filed/backup.c index f558e27df..22b138dec 100644 --- a/bacula/src/filed/backup.c +++ b/bacula/src/filed/backup.c @@ -523,14 +523,12 @@ int save_file(JCR *jcr, FF_PKT *ff_pkt, bool top_level) goto bail_out; } - /** Meta data only for restore object */ - if (IS_FT_OBJECT(ff_pkt->type)) { - goto good_rtn; - } - /** Meta data only for deleted files */ - if (ff_pkt->type == FT_DELETED) { + if (IS_FT_OBJECT(ff_pkt->type) || /* Meta data only for restore object */ + ff_pkt->type == FT_DELETED || /* Meta data only for deleted files */ + bctx.ff_pkt->stat_update) { /* Only metadata changed for file */ goto good_rtn; } + /** Set up the encryption context and send the session data to the SD */ if (has_file_data && jcr->crypto.pki_encrypt) { if (!crypto_session_send(jcr, sd)) { @@ -1047,6 +1045,8 @@ bool encode_and_send_attributes(bctx_t &bctx) attr_stream = STREAM_RESTORE_OBJECT; } else if (ff_pkt->type == FT_PLUGIN_OBJECT) { attr_stream = STREAM_PLUGIN_OBJECT; + } else if (ff_pkt->stat_update) { + attr_stream = STREAM_UNIX_ATTRIBUTE_UPDATE; } else { attribsEx = attribsExBuf; attr_stream = encode_attribsEx(jcr, attribsEx, ff_pkt); @@ -1201,7 +1201,9 @@ bool encode_and_send_attributes(bctx_t &bctx) case FT_REG: stat = sd->fsend("%ld %d %s%c%s%c%c%s%c%d%c", jcr->JobFiles, ff_pkt->type, ff_pkt->fname, 0, attribs, 0, 0, attribsEx, 0, - ff_pkt->delta_seq, 0); + /*TODO we may want to increment the delta_seq number intead of hardcode it to 1, + * when at some point we start to generate delta sequences also for regular files */ + ff_pkt->stat_update ? 1 : ff_pkt->delta_seq, 0); break; default: stat = sd->fsend("%ld %d %s%c%s%c%c%s%c%u%c", jcr->JobFiles, diff --git a/bacula/src/filed/restore.c b/bacula/src/filed/restore.c index dff3956bc..26d40f2da 100644 --- a/bacula/src/filed/restore.c +++ b/bacula/src/filed/restore.c @@ -525,6 +525,7 @@ void do_restore(JCR *jcr) switch (rctx.stream) { case STREAM_UNIX_ATTRIBUTES: case STREAM_UNIX_ATTRIBUTES_EX: + case STREAM_UNIX_ATTRIBUTE_UPDATE: /* if any previous stream open, close it */ if (!close_previous_stream(rctx)) { goto get_out; diff --git a/bacula/src/findlib/create_file.c b/bacula/src/findlib/create_file.c index 088834d10..a3aef68a9 100644 --- a/bacula/src/findlib/create_file.c +++ b/bacula/src/findlib/create_file.c @@ -160,7 +160,8 @@ int create_file(JCR *jcr, ATTR *attr, BFILE *bfd, int replace) * we may blow away a FIFO that is being used to read the * restore data, or we may blow away a partition definition. */ - if (exists && attr->type != FT_RAW && attr->type != FT_FIFO) { + if (exists && attr->type != FT_RAW && attr->type != FT_FIFO + && attr->stream != STREAM_UNIX_ATTRIBUTE_UPDATE) { /* Get rid of old copy */ Dmsg1(400, "unlink %s\n", attr->ofname); if (unlink(attr->ofname) == -1) { @@ -212,6 +213,7 @@ int create_file(JCR *jcr, ATTR *attr, BFILE *bfd, int replace) case FT_REG: Dmsg1(100, "Create=%s\n", attr->ofname); flags = O_WRONLY | O_CREAT | O_BINARY | O_EXCL; + if (IS_CTG(attr->statp.st_mode)) { flags |= O_CTG; /* set contiguous bit if needed */ } @@ -221,6 +223,11 @@ int create_file(JCR *jcr, ATTR *attr, BFILE *bfd, int replace) } set_fattrs(bfd, &attr->statp); + if (attr->stream == STREAM_UNIX_ATTRIBUTE_UPDATE) { + /* File is created and has valid contents, we want only to update it's metadata */ + return CF_CREATED; + } + if ((bopen(bfd, attr->ofname, flags, S_IRUSR | S_IWUSR)) < 0) { berrno be; be.set_errno(bfd->berrno); diff --git a/bacula/src/findlib/find.h b/bacula/src/findlib/find.h index 19424541a..05099a8f6 100644 --- a/bacula/src/findlib/find.h +++ b/bacula/src/findlib/find.h @@ -168,6 +168,7 @@ struct FF_PKT { POOLMEM *ignoredir_fname; /* used to ignore directories */ char *digest; /* set to file digest when the file is a hardlink */ struct stat statp; /* stat packet */ + bool stat_update; /* Only file's metada needds to be updated */ uint32_t digest_len; /* set to the digest len when the file is a hardlink*/ int32_t digest_stream; /* set to digest type when the file is hardlink */ int32_t FileIndex; /* FileIndex of this file */ diff --git a/bacula/src/findlib/find_one.c b/bacula/src/findlib/find_one.c index bd70f72d4..3fd25d3d4 100644 --- a/bacula/src/findlib/find_one.c +++ b/bacula/src/findlib/find_one.c @@ -537,6 +537,10 @@ find_one_file(JCR *jcr, FF_PKT *ff_pkt, if (ff_pkt->null_output_device || (sizeleft == 0 && MODE_RALL == (MODE_RALL & ff_pkt->statp.st_mode))) { ff_pkt->type = FT_REGE; + if (ff_pkt->stat_update) { + /* No need do the metadata update for empty files, perform usual backup */ + ff_pkt->stat_update = 0; + } } else { ff_pkt->type = FT_REG; } diff --git a/bacula/src/stored/append.c b/bacula/src/stored/append.c index 60adfe924..02101262a 100644 --- a/bacula/src/stored/append.c +++ b/bacula/src/stored/append.c @@ -445,6 +445,7 @@ bool send_attrs_to_dir(JCR *jcr, DEV_RECORD *rec) rec->maskedStream == STREAM_RESTORE_OBJECT || rec->maskedStream == STREAM_PLUGIN_OBJECT || rec->maskedStream == STREAM_PLUGIN_META_CATALOG || + rec->maskedStream == STREAM_UNIX_ATTRIBUTE_UPDATE || crypto_digest_stream_type(rec->maskedStream) != CRYPTO_DIGEST_NONE) { if (!jcr->no_attributes) { BSOCK *dir = jcr->dir_bsock; diff --git a/bacula/src/stored/bextract.c b/bacula/src/stored/bextract.c index 44a74ffbd..03a84051e 100644 --- a/bacula/src/stored/bextract.c +++ b/bacula/src/stored/bextract.c @@ -410,6 +410,7 @@ static bool record_cb(DCR *dcr, DEV_RECORD *rec) switch (rec->maskedStream) { case STREAM_UNIX_ATTRIBUTES: case STREAM_UNIX_ATTRIBUTES_EX: + case STREAM_UNIX_ATTRIBUTE_UPDATE: /* If extracting, it was from previous stream, so * close the output file. diff --git a/bacula/src/stored/bscan.c b/bacula/src/stored/bscan.c index dffa2cc4a..7b16a53f0 100644 --- a/bacula/src/stored/bscan.c +++ b/bacula/src/stored/bscan.c @@ -736,6 +736,7 @@ static bool record_cb(DCR *dcr, DEV_RECORD *rec) switch (rec->maskedStream) { case STREAM_UNIX_ATTRIBUTES: case STREAM_UNIX_ATTRIBUTES_EX: + case STREAM_UNIX_ATTRIBUTE_UPDATE: if (!unpack_attributes_record(bjcr, rec->Stream, rec->data, rec->data_len, attr)) { Emsg0(M_ERROR_TERM, 0, _("Cannot continue.\n")); } diff --git a/bacula/src/streams.h b/bacula/src/streams.h index b85a7bda9..47da72059 100644 --- a/bacula/src/streams.h +++ b/bacula/src/streams.h @@ -104,6 +104,7 @@ #define STREAM_PLUGIN_OBJECT 34 /* Plugin object */ #define STREAM_PLUGIN_META_BLOB 35 /* Plugin metadata (blob) for file being backed up */ #define STREAM_PLUGIN_META_CATALOG 36 /* Plugin metadata (to be stored in catalog) for file being backed up */ +#define STREAM_UNIX_ATTRIBUTE_UPDATE 37 /* File's updated metadata */ #define STREAM_ADATA_BLOCK_HEADER 200 /* Adata block header */ #define STREAM_ADATA_RECORD_HEADER 201 /* Adata record header */