From: Eric Bollengier Date: Sun, 15 Jul 2018 06:32:18 +0000 (+0200) Subject: Fix concurrent acquire/release of device X-Git-Tag: Release-9.2.0~11 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f4861e1eddf45b7ee9246fa9940d753073ea2d0c;p=thirdparty%2Fbacula.git Fix concurrent acquire/release of device --- diff --git a/bacula/src/stored/acquire.c b/bacula/src/stored/acquire.c index e68a57cf8..cec9295d9 100644 --- a/bacula/src/stored/acquire.c +++ b/bacula/src/stored/acquire.c @@ -482,17 +482,16 @@ bool release_device(DCR *dcr) DEVICE *dev = dcr->dev; bool ok = true; char tbuf[100]; - int was_blocked; - + bsteal_lock_t holder; dev->Lock(); - was_blocked = BST_NOT_BLOCKED; - if (!dev->is_blocked()) { - block_device(dev, BST_RELEASING); - } else { - was_blocked = dev->blocked(); - dev->set_blocked(BST_RELEASING); + if (!obtain_device_block(dev, + &holder, + 0, /* infinite wait */ + BST_RELEASING)) { + ASSERT2(0, "unable to obtain device block"); } + lock_volumes(); Dmsg2(100, "release_device device %s is %s\n", dev->print_name(), dev->is_tape()?"tape":"disk"); @@ -578,15 +577,13 @@ bool release_device(DCR *dcr) (uint32_t)jcr->JobId, bstrftimes(tbuf, sizeof(tbuf), (utime_t)time(NULL))); pthread_cond_broadcast(&wait_device_release); - + give_back_device_block(dev, &holder); /* * If we are the thread that blocked the device, then unblock it */ if (pthread_equal(dev->no_wait_id, pthread_self())) { dev->dunblock(true); } else { - /* Otherwise, reset the prior block status and unlock */ - dev->set_blocked(was_blocked); dev->Unlock(); } diff --git a/bacula/src/stored/askdir.c b/bacula/src/stored/askdir.c index b03c5d927..c0a7d906e 100644 --- a/bacula/src/stored/askdir.c +++ b/bacula/src/stored/askdir.c @@ -446,6 +446,7 @@ bool dir_update_volume_info(DCR *dcr, bool label, bool update_LastWritten, /* This happens when nothing to update after fixup_device ... */ if (vol.VolCatName[0] == 0) { + Dmsg0(50, "Volume Name is NULL\n"); goto bail_out; } Dmsg4(100, "Update cat VolBytes=%lld VolABytes=%lld Status=%s Vol=%s\n", diff --git a/bacula/src/stored/dev.h b/bacula/src/stored/dev.h index 412205b1d..480e1033e 100644 --- a/bacula/src/stored/dev.h +++ b/bacula/src/stored/dev.h @@ -625,6 +625,10 @@ public: virtual void dblock(int why); /* in lock.c */ virtual void dunblock(bool locked=false); /* in lock.c */ + /* use obtain_device_block() macro */ + bool _obtain_device_block(const char *file, int line, + bsteal_lock_t *hold, int retry, int state); + int init_mutex(); /* in lock.c */ int init_acquire_mutex(); /* in lock.c */ diff --git a/bacula/src/stored/dircmd.c b/bacula/src/stored/dircmd.c index dba2491b7..2dafa645c 100644 --- a/bacula/src/stored/dircmd.c +++ b/bacula/src/stored/dircmd.c @@ -913,10 +913,15 @@ static void label_volume_if_ok(DCR *dcr, char *oldname, const char *volname = (relabel == 1) ? oldname : newname; uint64_t volCatBytes; - if (!obtain_device_block(dev, &hold, BST_WRITING_LABEL)) { + if (!obtain_device_block(dev, + &hold, + 1, /* one try */ + BST_WRITING_LABEL)) { send_dir_busy_message(dir, dev); return; } + dev->Unlock(); + Dmsg1(100, "Stole device %s lock, writing label.\n", dev->print_name()); Dmsg0(90, "try_autoload_device - looking for volume_info\n"); @@ -1026,6 +1031,7 @@ bail_out: dev->clear_volhdr(); } volume_unused(dcr); /* no longer using volume */ + dev->Lock(); give_back_device_block(dev, &hold); return; } @@ -1044,11 +1050,14 @@ static bool read_label(DCR *dcr) bsteal_lock_t hold; DEVICE *dev = dcr->dev; - if (!obtain_device_block(dev, &hold, BST_DOING_ACQUIRE)) { + if (!obtain_device_block(dev, + &hold, + 1 /* one try */, + BST_DOING_ACQUIRE)) { send_dir_busy_message(dir, dev); return false; } - + dev->Unlock(); dcr->VolumeName[0] = 0; dev->clear_labeled(); /* force read of label */ switch (dev->read_dev_volume_label(dcr)) { @@ -1063,6 +1072,7 @@ static bool read_label(DCR *dcr) break; } volume_unused(dcr); + dev->Lock(); give_back_device_block(dev, &hold); return ok; } @@ -1831,10 +1841,14 @@ static void read_volume_label(JCR *jcr, DCR *dcr, DEVICE *dev, int Slot) bsteal_lock_t hold; dcr->set_dev(dev); - if (!obtain_device_block(dev, &hold, BST_WRITING_LABEL)) { + if (!obtain_device_block(dev, + &hold, + 1 /* one try */, + BST_WRITING_LABEL)) { send_dir_busy_message(dir, dev); return; } + dev->Unlock(); if (!try_autoload_device(jcr, dcr, Slot, "")) { goto bail_out; /* error */ @@ -1854,6 +1868,7 @@ static void read_volume_label(JCR *jcr, DCR *dcr, DEVICE *dev, int Slot) } bail_out: + dev->Lock(); give_back_device_block(dev, &hold); return; } diff --git a/bacula/src/stored/lock.c b/bacula/src/stored/lock.c index 0c485a7d3..ef808bba5 100644 --- a/bacula/src/stored/lock.c +++ b/bacula/src/stored/lock.c @@ -27,9 +27,9 @@ #include "stored.h" /* pull in Storage Deamon headers */ #ifdef SD_DEBUG_LOCK -const int dbglvl = 300; +const int dbglvl = DT_LOCK|30; #else -const int dbglvl = 500; +const int dbglvl = DT_LOCK|50; #endif @@ -117,10 +117,8 @@ const int dbglvl = 500; * save status * set new blocked status * set new pid - * Unlock() * - * give_back_device_block() does (must be blocked but not locked) - * Lock() + * give_back_device_block() does (must be blocked and locked) * reset blocked status * save previous blocked * reset pid @@ -432,43 +430,59 @@ void _unblock_device(const char *file, int line, DEVICE *dev) static pthread_mutex_t block_mutex = PTHREAD_MUTEX_INITIALIZER; /* - * Enter with device locked + * Enter and leave with device locked * * Note: actually this routine: * returns true if it can either set or steal the device block * returns false if it cannot block the device */ -bool _obtain_device_block(const char *file, int line, DEVICE *dev, bsteal_lock_t *hold, int state) +bool DEVICE::_obtain_device_block(const char *file, int line, + bsteal_lock_t *hold, int retry, int state) { + int ret; + int r = retry; + + if (!can_obtain_block() && !pthread_equal(no_wait_id, pthread_self())) { + num_waiting++; /* indicate that I am waiting */ + while ((retry == 0 || r-- > 0) && !can_obtain_block()) { + if ((ret = bthread_cond_wait_p(&wait, &m_mutex, file, line)) != 0) { + berrno be; + Emsg1(M_ABORT, 0, _("pthread_cond_wait failure. ERR=%s\n"), + be.bstrerror(ret)); + } + } + num_waiting--; /* no longer waiting */ + } + P(block_mutex); Dmsg4(sd_dbglvl, "Steal lock %s old=%s from %s:%d\n", - dev->device->hdr.name, dev->print_blocked(), file, line); - if (!dev->can_obtain_block()) { + device->hdr.name, print_blocked(), file, line); + + if (!can_obtain_block() && !pthread_equal(no_wait_id, pthread_self())) { V(block_mutex); return false; } - hold->dev_blocked = dev->blocked(); - hold->dev_prev_blocked = dev->dev_prev_blocked; - hold->no_wait_id = dev->no_wait_id; - hold->blocked_by = dev->blocked_by; - dev->set_blocked(state); - Dmsg1(sd_dbglvl, "steal block. new=%s\n", dev->print_blocked()); - dev->no_wait_id = pthread_self(); - dev->blocked_by = get_jobid_from_tsd(); + hold->dev_blocked = blocked(); + hold->dev_prev_blocked = dev_prev_blocked; + hold->no_wait_id = no_wait_id; + hold->blocked_by = blocked_by; + set_blocked(state); + Dmsg1(sd_dbglvl, "steal block. new=%s\n", print_blocked()); + no_wait_id = pthread_self(); + blocked_by = get_jobid_from_tsd(); V(block_mutex); - dev->Unlock(); return true; } /* - * Enter with device blocked by us but not locked + * Enter with device blocked and locked by us * Exit with device locked, and blocked by previous owner */ -void _give_back_device_block(const char *file, int line, DEVICE *dev, bsteal_lock_t *hold) +void _give_back_device_block(const char *file, int line, + DEVICE *dev, bsteal_lock_t *hold) { Dmsg4(sd_dbglvl, "Return lock %s old=%s from %s:%d\n", dev->device->hdr.name, dev->print_blocked(), file, line); - dev->Lock(); P(block_mutex); dev->set_blocked(hold->dev_blocked); dev->dev_prev_blocked = hold->dev_prev_blocked; diff --git a/bacula/src/stored/lock.h b/bacula/src/stored/lock.h index 5742f3db7..77140c7d9 100644 --- a/bacula/src/stored/lock.h +++ b/bacula/src/stored/lock.h @@ -88,7 +88,8 @@ void _unlock_volumes(); #define block_device(d, s) _block_device(__FILE__, __LINE__, (d), s) #define unblock_device(d) _unblock_device(__FILE__, __LINE__, (d)) -#define obtain_device_block(d, p, s) _obtain_device_block(__FILE__, __LINE__, (d), (p), s) + +#define obtain_device_block(d, p, r, s) (d)->_obtain_device_block(__FILE__, __LINE__, (p), (r), (s)) #define give_back_device_block(d, p) _give_back_device_block(__FILE__, __LINE__, (d), (p)) /* m_blocked states (mutually exclusive) */ diff --git a/bacula/src/stored/protos.h b/bacula/src/stored/protos.h index 7c7cddbc6..27edb128e 100644 --- a/bacula/src/stored/protos.h +++ b/bacula/src/stored/protos.h @@ -189,7 +189,6 @@ void _lock_device(const char *file, int line, DEVICE *dev); void _unlock_device(const char *file, int line, DEVICE *dev); void _block_device(const char *file, int line, DEVICE *dev, int state); void _unblock_device(const char *file, int line, DEVICE *dev); -bool _obtain_device_block(const char *file, int line, DEVICE *dev, bsteal_lock_t *hold, int state); void _give_back_device_block(const char *file, int line, DEVICE *dev, bsteal_lock_t *hold);