From: Eric Bollengier Date: Thu, 21 Apr 2022 15:51:09 +0000 (+0200) Subject: Add core functions to check malware after a backup X-Git-Tag: Beta-15.0.0~470 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4243048e09373d0829c53c4c83962d22cc949368;p=thirdparty%2Fbacula.git Add core functions to check malware after a backup --- diff --git a/bacula/src/dird/Makefile.in b/bacula/src/dird/Makefile.in index 38eb9408f..dac30cd7f 100644 --- a/bacula/src/dird/Makefile.in +++ b/bacula/src/dird/Makefile.in @@ -45,7 +45,7 @@ SVRSRCS = dird.c admin.c authenticate.c \ ua_query.c ua_collect.c \ ua_input.c ua_label.c ua_output.c ua_prune.c \ ua_purge.c ua_restore.c ua_run.c \ - ua_select.c ua_server.c snapshot.c \ + ua_select.c ua_server.c snapshot.c malware.c \ ua_status.c ua_tree.c ua_update.c vbackup.c verify.c $(EXTRA_SRCS) SVROBJS = $(SVRSRCS:.c=.o) diff --git a/bacula/src/dird/backup.c b/bacula/src/dird/backup.c index fa66cb79e..d88bf0a4f 100644 --- a/bacula/src/dird/backup.c +++ b/bacula/src/dird/backup.c @@ -748,6 +748,17 @@ bool do_backup(JCR *jcr) /* Any error already printed */ } + /* Check for Malware */ + if (jcr->JobFiles > 0 && jcr->job->CheckMalware) { + Jmsg(jcr, M_INFO, 0, _("[DI0002] Checking file metadata for Malwares\n")); + edit_int64(jcr->JobId, ed1); + if (check_malware(jcr, ed1, buf.handle()) != 0) { + Jmsg(jcr, M_ERROR, 0, "%s", buf.c_str()); + } else { + Jmsg(jcr, M_INFO, 0, "%s", buf.c_str()); + } + } + if (!jcr->is_canceled() && stat == JS_Terminated) { backup_cleanup(jcr, stat); return true; diff --git a/bacula/src/dird/dird_conf.c b/bacula/src/dird/dird_conf.c index 39d093195..0b610a8a7 100644 --- a/bacula/src/dird/dird_conf.c +++ b/bacula/src/dird/dird_conf.c @@ -382,6 +382,7 @@ static RES_ITEM dir_items[] = { {"MaximumConcurrentJobs", store_pint32, ITEM(res_dir.MaxConcurrentJobs), 0, ITEM_DEFAULT, 20}, {"MaximumReloadRequests", store_pint32, ITEM(res_dir.MaxReload), 0, ITEM_DEFAULT, 32}, {"MaximumConsoleConnections", store_pint32, ITEM(res_dir.MaxConsoleConnect), 0, ITEM_DEFAULT, 20}, + {"MalwareDatabaseCommand", store_str, ITEM(res_dir.get_malwaredb_command), 0, 0, 0}, {"Password", store_password, ITEM(res_dir.password), 0, ITEM_REQUIRED, 0}, {"FdConnectTimeout", store_time,ITEM(res_dir.FDConnectTimeout), 0, ITEM_DEFAULT, 3 * 60}, {"SdConnectTimeout", store_time,ITEM(res_dir.SDConnectTimeout), 0, ITEM_DEFAULT, 30 * 60}, @@ -585,6 +586,7 @@ RES_ITEM job_items[] = { {"Type", store_jobtype, ITEM(res_job.JobType), 0, ITEM_REQUIRED, 0}, {"Level", store_level, ITEM(res_job.JobLevel), 0, 0, 0}, {"Messages", store_res, ITEM(res_job.messages), R_MSGS, ITEM_REQUIRED, 0}, + {"CheckMalware", store_bool, ITEM(res_job.CheckMalware), 0, 0, 0}, {"Storage", store_alist_res, ITEM(res_job.storage), R_STORAGE, 0, 0}, {"StorageGroupPolicy", store_storage_mngr, ITEM(res_job.storage_policy), 0, 0, 0}, {"Pool", store_res, ITEM(res_job.pool), R_POOL, ITEM_REQUIRED, 0}, @@ -1595,6 +1597,9 @@ void free_resource(RES *rres, int type) switch (type) { case R_DIRECTOR: + if (res->res_dir.get_malwaredb_command) { + free(res->res_dir.get_malwaredb_command); + } if (res->res_dir.working_directory) { free(res->res_dir.working_directory); } diff --git a/bacula/src/dird/dird_conf.h b/bacula/src/dird/dird_conf.h index d4a29d757..07f29d31a 100644 --- a/bacula/src/dird/dird_conf.h +++ b/bacula/src/dird/dird_conf.h @@ -130,6 +130,7 @@ public: bool tls_verify_peer; /* TLS Verify Client Certificate */ char *verid; /* Custom Id to print in version command */ char *customerid; /* Custom CustomerID */ + char *get_malwaredb_command; /* Custom Malware Command */ /* Methods */ char *name() const; }; @@ -519,6 +520,7 @@ public: bool CancelRunningDuplicates; /* Cancel Running jobs */ bool PurgeMigrateJob; /* Purges source job on completion */ bool DeleteConsolidatedJobs; /* Delete or not consolidated Virtual Full jobs */ + bool CheckMalware; /* Check for malware during the job */ alist *tag; /* tags defined for this Job */ alist *base; /* Base jobs */ diff --git a/bacula/src/dird/malware.c b/bacula/src/dird/malware.c new file mode 100644 index 000000000..0a132bd82 --- /dev/null +++ b/bacula/src/dird/malware.c @@ -0,0 +1,391 @@ +/* + Bacula(R) - The Network Backup Solution + + Copyright (C) 2000-2023 Kern Sibbald + + The original author of Bacula is Kern Sibbald, with contributions + from many others, a complete list can be found in the file AUTHORS. + + You may use this file and others of this release according to the + license defined in the LICENSE file, which includes the Affero General + Public License, v3.0 ("AGPLv3") and some additional permissions and + terms pursuant to its AGPLv3 Section 7. + + This notice must be preserved when any source code is + conveyed and/or propagated. + + Bacula(R) is a registered trademark of Kern Sibbald. +*/ + +/* Check for malware in the catalog + * Written by Eric Bollengier Apr 2022 + */ + +#include "bacula.h" +#include "dird.h" + +#define dbglvl 0 + +/* Get hash type from the string length */ +static const char *hash_get_type(int len) +{ + switch(len) { + case 22: + return "MD5"; + case 65: + return "SHA256"; + default: + Dmsg1(0, "Unknown hash len %d\n", len); + return NULL; + } +} + +/* We should run the load operation inside a BEGIN/COMMIT + * mode: 0 Skip the load (already loaded) + * 1 Truncate the table and load the MD5 + * 2 Load the MD5 + * + * source: Source of the information + * fname: Name of the malware hash list + * + * return false with errmsg set if any problem + */ +static bool load_malware_db(JCR *jcr, BDB *db, int mode, const char *source, const char *fname, POOLMEM **errmsg) +{ + POOL_MEM out, tmp, esc, line, esc_source; + FILE *fp = NULL; + const char *type=NULL; + bool ret = false; + int64_t nb=0; + + if (mode == 0) { /* Already loaded, nothing to do */ + return true; + } + + /* TODO: We are going to update the malware database, we need to make sure + * only one job is doing that work at a time, the next one will just jump + * over it. Normally it's per catalog. + */ + Dmsg1(dbglvl, "Load malware database from %s\n", fname); + + /* We open the malware hash database */ + fp = fopen(fname, "r"); + if (!fp) { + berrno be; + Mmsg(errmsg, _("[DE0003] Unable to open the Malware Database export %s ERR=%s\n"), + fname, be.bstrerror()); + goto bail_out; + } + + /* We will now read the file to insert all records in the catalog. + * At some point, we might use the batch connection for this work (TODO) + */ + db_lock(db); + { + while (bfgets(out.addr(), fp)) { + strip_trailing_junk(out.c_str()); + if (out.c_str()[0] == '#') { // Skip comments + continue; + } + + /* Look if we can empty the checksum table before to insert data */ + int len = strlen(out.c_str()); + if (!type) { + type = hash_get_type(len); + if (!type) { + continue; + } + if (mode == 1) { + Dmsg0(dbglvl, "Truncate the current malware table\n"); + /* On postgresql, a truncate inside a transaction will disable + * the WAL logging for the next command and speedup the insertion + */ + Mmsg(tmp, sql_clear_malware_table[db_get_type_index(db)], type); + if (!db_sql_query(db, tmp.c_str(), NULL, NULL)) { + /* It is not the end of the world if the truncate doesn't work + * we will have just too many records, but I'm not expecting a + * failure here + */ + Dmsg2(dbglvl, "Unable to execute %s %s\n", tmp.c_str(), db->errmsg); + } + } + } + + /* Hash are usually fine, but the data is coming from outside */ + esc.check_size(2*len+2); + db_escape_string(jcr, db, esc.c_str(), out.c_str(), len); + + /* We batch the insertion of the checksum to limit the number of SQL queries */ + Mmsg(tmp, "('%s')", esc.c_str()); + if (line.c_str()[0]) { + pm_strcat(line, ","); + } + pm_strcat(line, tmp.c_str()); + + if ((nb % 5000) == 0) { + if ((nb % 100000) == 0) { + Dmsg0(dbglvl, "Sent 100000 records to the Malware table\n"); + } + Mmsg(tmp, "INSERT INTO Malware%s (MD5) VALUES %s", type, line.c_str()); + if (!db_sql_query(db, tmp.c_str(), NULL, NULL)) { + Mmsg(errmsg, "[DE0008] SQL Error %s\n", db->errmsg); + db_unlock(db); + goto bail_out; + } + pm_strcpy(line, ""); + } + nb++; + } + + /* We might still have some checksum to store */ + if (line.c_str()[0]) { + Mmsg(tmp, "INSERT INTO Malware%s (MD5) VALUES %s", type, line.c_str()); + if (!db_sql_query(db, tmp.c_str(), NULL, NULL)) { + Mmsg(errmsg, "[DE0008] SQL Error %s\n", db->errmsg); + db_unlock(db); + goto bail_out; + } + } + if (type) { + Mmsg(tmp, "ANALYZE Malware%s", type); + if (!db_sql_query(db, tmp.c_str(), NULL, NULL)) { + Dmsg2(dbglvl, "Unable to execute %s %s\n", tmp.c_str(), db->errmsg); + } + } + Dmsg2(dbglvl, "Inserted %lld %s checksums\n", nb, NPRT(type)); + } + db_unlock(db); + ret = true; + +bail_out: + if (fp) { + fclose(fp); + } + return ret; +} + +/* Update the malware catalog entries + * It is done in a transaction, so even running jobs checking for malware should + * be able to run in parallel + * -1 : Error while download the latest database + * 0 : Nothing to do + * 1 : Reset and load the file fname in the catalog + * 2 : Load the file fname in the catalog + */ +static int update_malware_db(JCR *jcr, char *update_cmd, POOLMEM **fname, POOLMEM **errmsg) +{ + Dmsg1(dbglvl, "Updating the malware database via %s\n", update_cmd); + + /* The command will return the name of the checksum database file */ + int ret = run_program(update_cmd, 300, *fname); + strip_trailing_junk(*fname); + + if (ret == 0 || ret & b_errno_exit) { + ret = ret & ~b_errno_exit; + if (ret == 0) { + Dmsg0(dbglvl, "Malware database can be loaded\n"); + return 1; + + } else if (ret == 2) { + Dmsg0(dbglvl, "Malware database can be updated\n"); + return 2; + + } else if (ret == 1) { + Dmsg0(dbglvl, "Malware database is up to date\n"); + return 0; + } + } + pm_strcpy(fname, ""); + Mmsg(errmsg, _("[DE0001] Unable to update the Malware Database ret=%d\n"), ret); + return -1; +} + +#if 0 +bool ua_update_malware_db(UAContext *ua) +{ + + return true; +} + +bool list_malware(JCR *jcr, const char *jobids) +{ + return true; +} +#endif + +extern const char *exepath; // defined in lib/messages.c + +/* Check if a given set of jobids has a malware + * Return code: + * 0 - Nothing found + * -1 - Error while processing the data (info in errmsg) + * 1 - Found a malware (info in errmsg) + */ +int check_malware(JCR *jcr, const char *jobids, POOLMEM **errmsg) +{ + POOL_MEM q, fname, source_esc; + const char *type = NULL; + const char *source = NULL; + alist lst(owned_by_alist, 1), *l; + l = &lst; + pm_strcpy(errmsg, ""); + +#if 0 // Not working, job record probably not always up to date + uint32_t i=0; + /* We fetch the first checksum for the set of jobs to determine the hash type */ + Mmsg(q, "SELECT SUM(JobFiles) FROM Job WHERE JobId IN (%s)", jobids); + if (!db_sql_query(jcr->db, q.c_str(), db_int_handler, &i)) { + Mmsg(errmsg, "[DE0008] SQL Error %s\n", jcr->db->errmsg); + return -1; + } + /* We need at least one checksum to determine the hash type */ + if (i == 0) { + Mmsg(errmsg, "[DI0002] Nothing to check for JobIds %s [%s]\n", jobids, q.c_str()); + Dmsg1(dbglvl, "%s", *errmsg); + return 0; + } +#endif + + /* We fetch the first checksum for the set of jobs to determine the hash type */ + Mmsg(q, "SELECT MD5 FROM File " + "WHERE Filename <> '' AND MD5 <> '0' AND MD5 <> '' AND JobId IN (%s) LIMIT 1", jobids); + + if (!db_sql_query(jcr->db, q.c_str(), db_string_list_handler, &l)) { + Mmsg(errmsg, "[DE0008] SQL Error %s\n", jcr->db->errmsg); + return -1; + } + + /* We need at least one checksum to determine the hash type */ + if (lst.size() != 1) { + Mmsg(errmsg, "[DE0005] Unable to find a checksum for JobIds %s. Use Signature = MD5/SHA256 FileSet option\n", jobids); + return -1; + } + + type = hash_get_type(strlen((char *)lst[0])); + if (!type) { + Mmsg(errmsg, "[DE0006] Unable to detect the checksum type for JobIds %s\n", jobids); + return -1; + } + + Dmsg1(dbglvl, "Found checksum type %s\n", type); + + Mmsg(fname, "%s/malware-%s.dat", working_directory, type); + // TODO: It is not working + // We should take director and copy it locally, we can have problems with reload + if (director->get_malwaredb_command) { + regex_t re; + regmatch_t pmatch[3]; + if (regcomp(&re, ".*get_malware_(.+)$", 0) != 0) { + Dmsg0(dbglvl, "Unable to compile regex\n"); + } + + if (regexec(&re, director->get_malwaredb_command, 3, pmatch, 0) == 0) { + source = director->get_malwaredb_command + pmatch[1].rm_so; + + } else { + source = "user specified"; + } + regfree(&re); + + if (director->get_malwaredb_command[0] == '/' || director->get_malwaredb_command[0] == ':') { + Mmsg(q, "%s %s %s", director->get_malwaredb_command, type, fname.c_str()); + + } else { + Mmsg(q, "%s/%s %s %s", exepath, director->get_malwaredb_command, type, fname.c_str()); + } + + } else { + source = "abuse.ch"; + Mmsg(q, "%s/get_malware_%s %s %s", exepath, source, type, fname.c_str()); + } + + int ret = update_malware_db(jcr, q.c_str(), fname.handle(), errmsg); + if (ret < 0) { + return -1; + } + + db_lock(jcr->db); + if (!db_sql_query(jcr->db, "BEGIN", NULL, NULL)) { + Mmsg(errmsg, "[DE0008] SQL Error %s\n", jcr->db->errmsg); + db_unlock(jcr->db); + return -1; + } + + // Small macro to cleanup +#define commit_and_unlock(db) do { \ + if (!db_sql_query(db, "COMMIT", NULL, NULL)) { \ + db_unlock(db); \ + return -1; \ + } \ + db_unlock(db); \ + } while (0) + + /* TODO: Here we must load the malware database with a dedicated SQL connection + * and a global lock (no need to load the malware database multiple times in // + */ + if (!load_malware_db(jcr, jcr->db, ret, source, fname.c_str(), errmsg)) { + commit_and_unlock(jcr->db); + events_send_msg(jcr, + "DD0007", + EVENTS_TYPE_SECURITY, "*Director*", (intptr_t)jcr, + "Unable to update Malware hash database from %s. %s", source, errmsg); + return -1; + } + + // TODO: Check if we need to be inside the transaction or not + commit_and_unlock(jcr->db); + + /* Keep track of important events */ + events_send_msg(jcr, + "DD0006", + EVENTS_TYPE_SECURITY, "*Director*", (intptr_t)jcr, + "Malware hash database updated from %s", source); + + /* Add the source of information in the table */ + int slen = strlen(source); + source_esc.check_size(2*slen+2); + db_escape_string(jcr, jcr->db, source_esc.c_str(), source, slen); + + /* We keep track of the infected files in the FileEvents table */ + Mmsg(q, "INSERT INTO FileEvents (SourceJobId, JobId, FileId, Type, Description, Severity, Source) " + "SELECT JobId, JobId, FileId, 'M', 'Malware found', 100, '%s' FROM File JOIN Malware%s USING (MD5) " + "WHERE JobId IN (%s)", source_esc.c_str(), type, jobids); + + if (!db_sql_query(jcr->db, q.c_str(), NULL, NULL)) { + Mmsg(errmsg, "[DE0008] SQL Error %s\n", jcr->db->errmsg); + return -1; + } + + uint32_t nb=0; + Mmsg(q, "SELECT 1 FROM FileEvents JOIN File USING (FileId) " + "WHERE FileEvents.JobId IN (%s) AND Type = 'M' LIMIT 1\n", jobids); + + if (!db_sql_query(jcr->db, q.c_str(), db_int_handler, &nb)) { + Mmsg(errmsg, "[DE0006] Unable to check malware for JobIds %s\n", jobids); + return -1; + } + + if (nb > 0) { + Mmsg(errmsg, _("[DE0007] Found Malware(s) on JobIds %s"), jobids); + return 1; + } + /* Leave a message */ + Mmsg(errmsg, _("[DI0001] No known Malware reported by \"%s\"\n"), source); + return 0; +} + +#if 0 + char *f; + lst.destroy(); + Mmsg(q, "SELECT Filename FROM SecurityEvents JOIN File USING (FileId) WHERE SecurityEvents.JobId IN (%s) LIMIT 1000\n", jobids); + if (!db_sql_query(jcr->db, q.c_str(), db_string_list_handler, &l)) { + Mmsg(errmsg, "[DE0006] Unable to check malware for JobIds %s\n", jobids); + return false; + } + if (lst.size() > 0) { + Jmsg(jcr, M_ERROR, 0, _("Found %s %d Malware(s) on\n"), (lst.size() == 1000) ? _("more than") : "", lst.size()); + foreach_alist(f, &lst) { + Jmsg(jcr, M_INFO, 0, " %s\n", f); + } + } +#endif diff --git a/bacula/src/dird/protos.h b/bacula/src/dird/protos.h index 4e334345b..468e5e4e9 100644 --- a/bacula/src/dird/protos.h +++ b/bacula/src/dird/protos.h @@ -397,3 +397,6 @@ bool is_dedup_ref(DEV_RECORD *rec, bool lazy); void * dir_authplugin_getauthenticationData(JCR *jcr, const char *console, const char *param); bRC dir_authplugin_do_interaction(JCR *jcr, BSOCK *bsock, const char *pluginname, void *data, bool pluginall = false); bRC dir_authplugin_authenticate(JCR *jcr, BSOCK *bsock, const char *pluginname); + +/* malware.c */ +int check_malware(JCR *jcr, const char *jobids, POOLMEM **errmsg); diff --git a/bacula/src/dird/ua_purge.c b/bacula/src/dird/ua_purge.c index 8970770cb..fb1a0fd2f 100644 --- a/bacula/src/dird/ua_purge.c +++ b/bacula/src/dird/ua_purge.c @@ -361,6 +361,10 @@ void purge_files_from_jobs(UAContext *ua, char *jobs) { POOL_MEM query(PM_MESSAGE); + Mmsg(query, "DELETE FROM FileEvents WHERE JobId IN (%s)", jobs); + db_sql_query(ua->db, query.c_str(), NULL, (void *)NULL); + Dmsg1(050, "Delete FileEvents sql=%s\n", query.c_str()); + Mmsg(query, "DELETE FROM MetaEmail WHERE JobId IN (%s)", jobs); db_sql_query(ua->db, query.c_str(), NULL, (void *)NULL); Dmsg1(050, "Delete MetaEmail sql=%s\n", query.c_str());