From 8bb66f06bc71d45c49a71db22bb5e76f7b2aa171 Mon Sep 17 00:00:00 2001 From: Eric Bollengier Date: Fri, 11 Nov 2022 10:46:00 +0100 Subject: [PATCH] Enhance the network error reporting with between the Director and the File/SD --- bacula/src/dird/backup.c | 29 ++++++++++++++++++++++------- bacula/src/dird/fd_cmds.c | 1 - bacula/src/dird/getmsg.c | 4 ++-- bacula/src/dird/job.c | 3 ++- bacula/src/dird/mac.c | 6 ++++-- bacula/src/dird/msgchan.c | 5 +++-- bacula/src/dird/restore.c | 1 + bacula/src/dird/ua_cmds.c | 7 ++++--- bacula/src/dird/ua_dotcmds.c | 2 +- bacula/src/dird/ua_status.c | 7 +++---- bacula/src/dird/ua_update.c | 9 +++++---- bacula/src/dird/vbackup.c | 1 + bacula/src/dird/verify.c | 1 + 13 files changed, 49 insertions(+), 27 deletions(-) diff --git a/bacula/src/dird/backup.c b/bacula/src/dird/backup.c index 0590f2b52..2c53973ca 100644 --- a/bacula/src/dird/backup.c +++ b/bacula/src/dird/backup.c @@ -466,7 +466,7 @@ bool do_backup(JCR *jcr) char ed1[100]; db_int64_ctx job, first, last; int64_t val=0; - POOL_MEM buf, tmp; + POOL_MEM buf, tmp, lasterr; if (jcr->is_JobLevel(L_VIRTUAL_FULL)) { return do_vbackup(jcr); @@ -564,6 +564,13 @@ bool do_backup(JCR *jcr) } foreach_alist(store, jcr->store_mngr->get_wstore_list()) { + /* The idea is to display the error at the start of the loop + * doing so, the last error will be displayed as fatal if needed + */ + if (strcmp(lasterr.c_str(), "") != 0) { + Jmsg(jcr, M_WARNING, 0, "%s", lasterr.c_str()); + pm_strcpy(lasterr, ""); + } jcr->store_mngr->set_current_wstorage(store); if (jcr->store_bsock) { @@ -575,8 +582,13 @@ bool do_backup(JCR *jcr) * Start conversation with Storage daemon */ if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) { - Dmsg1(100, "Failed connect to the storage: %s\n", jcr->store_mngr->get_wstore()->name()); + /* The message will be displayed as warning in the next + * iteration of the loop, or as fatal at the end of the + * foreach block + */ + pm_strcpy(lasterr, jcr->errmsg); continue; + } else { Dmsg1(100, "Connected to the storage: %s\n", jcr->store_mngr->get_wstore()->name()); } @@ -599,7 +611,8 @@ bool do_backup(JCR *jcr) * because we have only 2 iterations (so 'iter' variable is either 0 or 1) */ if (!start_storage_daemon_job(jcr, NULL, &wlist, wstore_group ? (bool)iter : true)) { - Dmsg1(100, "Failed to start job on the storage: %s\n", jcr->store_mngr->get_wstore()->name()); + Mmsg(lasterr, _("Failed to start job on the storage: %s\n"), + jcr->store_mngr->get_wstore()->name()); continue; } else { sd_job_started = true; @@ -626,7 +639,7 @@ bool do_backup(JCR *jcr) } if(!sd_job_started) { - Jmsg(jcr, M_FATAL, 0, _("Failed to start job on any of the storages defined!\n")); + Jmsg(jcr, M_FATAL, 0, "%s", lasterr.c_str()); goto bail_out; } @@ -654,7 +667,8 @@ bool do_backup(JCR *jcr) /* Print connection info only for real jobs */ build_connecting_info_log(_("Client"), jcr->client->name(), - get_client_address(jcr, jcr->client, tmp.addr()), jcr->client->FDport, + get_client_address(jcr, jcr->client, tmp.addr()), + jcr->client->FDport, fd->tls ? true : false, buf.addr()); Jmsg(jcr, M_INFO, 0, "%s", buf.c_str()); @@ -687,7 +701,7 @@ bool do_backup(JCR *jcr) if (jcr->sd_calls_client) { if (jcr->FDVersion < 10) { - Jmsg(jcr, M_FATAL, 0, _("The File daemon does not support SDCallsClient.\n")); + Jmsg(jcr, M_FATAL, 0, _("[DE0011] The File daemon does not support SDCallsClient.\n")); goto bail_out; } if (!send_client_addr_to_sd(jcr)) { @@ -882,8 +896,9 @@ int wait_for_job_termination(JCR *jcr, int timeout) jcr->CommCompressedBytes = CommCompressedBytes; jcr->Snapshot = VSS; jcr->Encrypt = Encrypt; + } else if (!jcr->is_canceled()) { - Jmsg(jcr, M_FATAL, 0, _("No Job status returned from FD. %c\n"), jcr->getJobStatus()); + Jmsg(jcr, M_FATAL, 0, _("[DE0011] No Job status returned from FD\n")); } /* Return the first error status we find Dir, FD, or SD */ diff --git a/bacula/src/dird/fd_cmds.c b/bacula/src/dird/fd_cmds.c index c2f150819..4d4d3524c 100644 --- a/bacula/src/dird/fd_cmds.c +++ b/bacula/src/dird/fd_cmds.c @@ -151,7 +151,6 @@ int connect_to_file_daemon(JCR *jcr, int retry_interval, int max_retry_time, jcr->setJobStatus(JS_Running); if (!authenticate_file_daemon(jcr, &status, &jcr->errmsg)) { - Dmsg1(10, "Authentication error with FD. %s\n", jcr->errmsg); return 0; } diff --git a/bacula/src/dird/getmsg.c b/bacula/src/dird/getmsg.c index d46a11e96..54d8a1e1b 100644 --- a/bacula/src/dird/getmsg.c +++ b/bacula/src/dird/getmsg.c @@ -416,12 +416,12 @@ bool response(JCR *jcr, BSOCK *bs, BSOCK_CLIENT_TYPE role, const char *resp, con return true; } if (prtmsg == DISPLAY_ERROR) { - Jmsg(jcr, M_FATAL, 0, _("Bad response to %s command: wanted %s, got %s\n"), + Jmsg(jcr, M_FATAL, 0, _("[DE0011] Bad response to %s command: wanted %s, got %s\n"), cmd, resp, bs->msg); } return false; } - Jmsg(jcr, M_FATAL, 0, _("Socket error on %s command: ERR=%s\n"), + Jmsg(jcr, M_FATAL, 0, _("[DE0018] Socket error on %s command: ERR=%s\n"), cmd, bs->bstrerror()); return false; } diff --git a/bacula/src/dird/job.c b/bacula/src/dird/job.c index ff674386a..b5b838b72 100644 --- a/bacula/src/dird/job.c +++ b/bacula/src/dird/job.c @@ -601,7 +601,7 @@ static bool cancel_sd_job(UAContext *ua, const char *cmd, JCR *jcr) } if (!connect_to_storage_daemon(ua->jcr, 10, SDConnectTimeout, 1)) { - ua->error_msg(_("Failed to connect to Storage daemon.\n")); + ua->error_msg("%s", ua->jcr->errmsg); return false; } @@ -891,6 +891,7 @@ void cancel_storage_daemon_job(JCR *jcr) } if (!connect_to_storage_daemon(ua->jcr, 10, SDConnectTimeout, 1)) { + Jmsg(ua->jcr, M_ERROR, 0, "%s", ua->jcr->errmsg); /* TODO: Enhance, it's not always a job */ goto bail_out; } Dmsg0(200, "Connected to storage daemon\n"); diff --git a/bacula/src/dird/mac.c b/bacula/src/dird/mac.c index cd17fe764..dcbaf7a47 100644 --- a/bacula/src/dird/mac.c +++ b/bacula/src/dird/mac.c @@ -524,7 +524,8 @@ bool do_mac(JCR *jcr) */ Dmsg0(200, "Connect to write (wjcr) storage daemon.\n"); if (!connect_to_storage_daemon(wjcr, 10, SDConnectTimeout, 1)) { - Jmsg(jcr, M_FATAL, 0, _("Could not connect to write Storage Daemon \"%s\"\n"), wjcr->store_mngr->get_wstore()->name()); + Jmsg(jcr, M_ERROR, 0, _("Could not connect to write Storage Daemon \"%s\"\n"), wjcr->store_mngr->get_wstore()->name()); + Jmsg(jcr, M_FATAL, 0, "%s", wjcr->errmsg); goto bail_out; } wsd = wjcr->store_bsock; @@ -539,7 +540,8 @@ bool do_mac(JCR *jcr) */ Dmsg1(200, "Connect to read (jcr) storage daemon. Jid=%d\n", jcr->JobId); if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) { - Jmsg(jcr, M_FATAL, 0, _("Could not connect to read Storage Daemon \"%s\"\n"), jcr->store_mngr->get_rstore()->name()); + Jmsg(jcr, M_ERROR, 0, _("Could not connect to read Storage Daemon \"%s\"\n"), jcr->store_mngr->get_rstore()->name()); + Jmsg(jcr, M_FATAL, 0, "%s", wjcr->errmsg); goto bail_out; } sd = jcr->store_bsock; diff --git a/bacula/src/dird/msgchan.c b/bacula/src/dird/msgchan.c index 60603f362..4da9fec13 100644 --- a/bacula/src/dird/msgchan.c +++ b/bacula/src/dird/msgchan.c @@ -69,8 +69,9 @@ BSOCK *open_sd_bsock(UAContext *ua) if (!is_bsock_open(ua->jcr->store_bsock)) { ua->send_msg(_("Connecting to Storage daemon %s at %s:%d ...\n"), store->name(), store->address, store->SDport); - if (!connect_to_storage_daemon(ua->jcr, 10, SDConnectTimeout, 1)) { - ua->error_msg(_("Failed to connect to Storage daemon.\n")); + if (!connect_to_storage_daemon(ua->jcr, 5, SDConnectTimeout, 0 /* No need to be verbose */)) { + ua->error_msg("%s", ua->jcr->errmsg); + free_bsock(ua->jcr->store_bsock); return NULL; } } diff --git a/bacula/src/dird/restore.c b/bacula/src/dird/restore.c index f83f1dcca..396bd673c 100644 --- a/bacula/src/dird/restore.c +++ b/bacula/src/dird/restore.c @@ -336,6 +336,7 @@ bool restore_bootstrap(JCR *jcr) * Start conversation with Storage daemon */ if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) { + Jmsg(jcr, M_FATAL, 0, "%s", jcr->errmsg); goto bail_out; } sd = jcr->store_bsock; diff --git a/bacula/src/dird/ua_cmds.c b/bacula/src/dird/ua_cmds.c index 9d2775a26..74c27c7ec 100644 --- a/bacula/src/dird/ua_cmds.c +++ b/bacula/src/dird/ua_cmds.c @@ -1064,8 +1064,8 @@ static void do_storage_setdebug(UAContext *ua, STORE *store, ua->send_msg(_("Connecting to Storage daemon %s at %s:%d\n"), store->name(), store->address, store->SDport); if (!connect_to_storage_daemon(ua->jcr, 1, 15, 0)) { - ua->error_msg(_("Failed to connect to Storage daemon.\n")); - return; + ua->error_msg("%s", ua->jcr->errmsg); + goto bail_out; } Dmsg0(120, _("Connected to storage daemon\n")); sd = ua->jcr->store_bsock; @@ -1075,6 +1075,7 @@ static void do_storage_setdebug(UAContext *ua, STORE *store, ua->send_msg("%s", sd->msg); } sd->signal(BNET_TERMINATE); +bail_out: free_bsock(ua->jcr->store_bsock); return; } @@ -2519,7 +2520,7 @@ static void do_storage_cmd(UAContext *ua, const char *command) Dmsg4(120, "Cmd: %s %s drive=%d slot=%d\n", command, dev_name, drive, slot); if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) { - ua->error_msg(_("Failed to connect to Storage daemon.\n")); + ua->error_msg("%s", jcr->errmsg); goto bail_out; } diff --git a/bacula/src/dird/ua_dotcmds.c b/bacula/src/dird/ua_dotcmds.c index b21340f4d..5ee58e04c 100644 --- a/bacula/src/dird/ua_dotcmds.c +++ b/bacula/src/dird/ua_dotcmds.c @@ -1753,7 +1753,7 @@ static void do_storage_cmd(UAContext *ua, STORE *store, const char *cmd) ua->send_msg(_("Connecting to Storage daemon %s at %s:%d\n"), store->name(), store->address, store->SDport); if (!connect_to_storage_daemon(jcr, 1, 15, 0)) { - ua->error_msg(_("Failed to connect to Storage daemon.\n")); + ua->error_msg("%s", jcr->errmsg); goto bail_out; } Dmsg0(120, _("Connected to storage daemon\n")); diff --git a/bacula/src/dird/ua_status.c b/bacula/src/dird/ua_status.c index 526d0f425..41cb4012b 100644 --- a/bacula/src/dird/ua_status.c +++ b/bacula/src/dird/ua_status.c @@ -161,8 +161,8 @@ static int do_network_status(UAContext *ua) ustore.store->name(), ustore.store->address, ustore.store->SDport); } - if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) { - ua->error_msg(_("Failed to connect to Storage.\n")); + if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 0 /* not verbose */)) { + ua->error_msg("%s", jcr->errmsg); goto bail_out; } @@ -573,8 +573,7 @@ static void do_storage_status(UAContext *ua, STORE *store, char *cmd) if (!ua->api) ua->send_msg(_("Connecting to Storage daemon %s at %s:%d\n"), store->name(), store->address, store->SDport); if (!connect_to_storage_daemon(ua->jcr, 1, 15, 0)) { - ua->send_msg(_("\nFailed to connect to Storage daemon %s.\n====\n"), - store->name()); + ua->error_msg("%s", ua->jcr->errmsg); free_bsock(ua->jcr->store_bsock); return; } diff --git a/bacula/src/dird/ua_update.c b/bacula/src/dird/ua_update.c index 52c050507..08bea43f2 100644 --- a/bacula/src/dird/ua_update.c +++ b/bacula/src/dird/ua_update.c @@ -1203,7 +1203,7 @@ static int update_volumeprotect_cmd(UAContext *ua) media_protect_list_handler, &list); if (list.size() > 0) { - ua->send_msg(_("Found %d volumes with status Used/Full that must be protected\n"), list.size()); + ua->send_msg(_("Found %d volume(s) with status Used/Full that must be protected\n"), list.size()); } else { ua->send_msg(_("No volume found to protect\n")); @@ -1234,9 +1234,10 @@ static int update_volumeprotect_cmd(UAContext *ua) free_bsock(ua->jcr->store_bsock); sd = NULL; } - - if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) { - ua->error_msg(_("Failed to connect to Storage daemon.\n")); + ua->send_msg(_("Connecting to Storage %s at %s:%d\n"), + ustore.store->name(), ustore.store->address, ustore.store->SDport); + if (!connect_to_storage_daemon(jcr, 5, SDConnectTimeout, 0 /* not verbose */)) { + ua->error_msg("%s", jcr->errmsg); ret = 0; continue; } diff --git a/bacula/src/dird/vbackup.c b/bacula/src/dird/vbackup.c index d0bb49f51..cf2fb769c 100644 --- a/bacula/src/dird/vbackup.c +++ b/bacula/src/dird/vbackup.c @@ -342,6 +342,7 @@ _("This Job is not an Accurate backup so is not equivalent to a Full backup.\n") * Start conversation with Storage daemon */ if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) { + Jmsg(jcr, M_FATAL, 0, "%s", jcr->errmsg); return false; } sd = jcr->store_bsock; diff --git a/bacula/src/dird/verify.c b/bacula/src/dird/verify.c index 10fe85cd9..0800b3b92 100644 --- a/bacula/src/dird/verify.c +++ b/bacula/src/dird/verify.c @@ -274,6 +274,7 @@ bool do_verify(JCR *jcr) */ jcr->setJobStatus(JS_Blocked); if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) { + Jmsg(jcr, M_FATAL, 0, "%s", jcr->errmsg); return false; } /* -- 2.47.3