]> git.ipfire.org Git - thirdparty/bacula.git/commitdiff
Fix concurrent acquire/release of device
authorEric Bollengier <eric@baculasystems.com>
Sun, 15 Jul 2018 06:32:18 +0000 (08:32 +0200)
committerKern Sibbald <kern@sibbald.com>
Sun, 15 Jul 2018 06:32:18 +0000 (08:32 +0200)
bacula/src/stored/acquire.c
bacula/src/stored/askdir.c
bacula/src/stored/dev.h
bacula/src/stored/dircmd.c
bacula/src/stored/lock.c
bacula/src/stored/lock.h
bacula/src/stored/protos.h

index e68a57cf834ef2c872eaba215759f18b7222a240..cec9295d9f9b6c13fc40524637e0f2865bf5d625 100644 (file)
@@ -482,17 +482,16 @@ bool release_device(DCR *dcr)
    DEVICE *dev = dcr->dev;
    bool ok = true;
    char tbuf[100];
-   int was_blocked;
-
+   bsteal_lock_t holder;
 
    dev->Lock();
-   was_blocked = BST_NOT_BLOCKED;
-   if (!dev->is_blocked()) {
-      block_device(dev, BST_RELEASING);
-   } else {
-      was_blocked = dev->blocked();
-      dev->set_blocked(BST_RELEASING);
+   if (!obtain_device_block(dev,
+                            &holder,
+                            0,  /* infinite wait */
+                            BST_RELEASING)) {
+      ASSERT2(0, "unable to obtain device block");
    }
+
    lock_volumes();
    Dmsg2(100, "release_device device %s is %s\n", dev->print_name(), dev->is_tape()?"tape":"disk");
 
@@ -578,15 +577,13 @@ bool release_device(DCR *dcr)
          (uint32_t)jcr->JobId, bstrftimes(tbuf, sizeof(tbuf), (utime_t)time(NULL)));
    pthread_cond_broadcast(&wait_device_release);
 
-
+   give_back_device_block(dev, &holder);
    /*
     * If we are the thread that blocked the device, then unblock it
     */
    if (pthread_equal(dev->no_wait_id, pthread_self())) {
       dev->dunblock(true);
    } else {
-      /* Otherwise, reset the prior block status and unlock */
-      dev->set_blocked(was_blocked);
       dev->Unlock();
    }
 
index b03c5d927d512f504994c119fa29fb4a4193a525..c0a7d906e631c8efedad3ad49784b4ea3be8809d 100644 (file)
@@ -446,6 +446,7 @@ bool dir_update_volume_info(DCR *dcr, bool label, bool update_LastWritten,
 
    /* This happens when nothing to update after fixup_device ... */
    if (vol.VolCatName[0] == 0) {
+      Dmsg0(50, "Volume Name is NULL\n");
       goto bail_out;
    }
    Dmsg4(100, "Update cat VolBytes=%lld VolABytes=%lld Status=%s Vol=%s\n",
index 412205b1d6428f0063469df73d3d797dc6ca2833..480e1033e10fdba9af8089540b19cd1116924e8b 100644 (file)
@@ -625,6 +625,10 @@ public:
    virtual void dblock(int why);                  /* in lock.c */
    virtual void dunblock(bool locked=false);      /* in lock.c */
 
+   /* use obtain_device_block() macro */
+   bool _obtain_device_block(const char *file, int line,
+                             bsteal_lock_t *hold, int retry, int state);
+
 
    int init_mutex();                      /* in lock.c */
    int init_acquire_mutex();              /* in lock.c */
index dba2491b76af465adf6a48392eca59b07c6685bd..2dafa645c28ca3eae6db493043b7204172d43693 100644 (file)
@@ -913,10 +913,15 @@ static void label_volume_if_ok(DCR *dcr, char *oldname,
    const char *volname = (relabel == 1) ? oldname : newname;
    uint64_t volCatBytes;
 
-   if (!obtain_device_block(dev, &hold, BST_WRITING_LABEL)) {
+   if (!obtain_device_block(dev,
+                            &hold,
+                            1,     /* one try */
+                            BST_WRITING_LABEL)) {
       send_dir_busy_message(dir, dev);
       return;
    }
+   dev->Unlock();
+
    Dmsg1(100, "Stole device %s lock, writing label.\n", dev->print_name());
 
    Dmsg0(90, "try_autoload_device - looking for volume_info\n");
@@ -1026,6 +1031,7 @@ bail_out:
       dev->clear_volhdr();
    }
    volume_unused(dcr);                   /* no longer using volume */
+   dev->Lock();
    give_back_device_block(dev, &hold);
    return;
 }
@@ -1044,11 +1050,14 @@ static bool read_label(DCR *dcr)
    bsteal_lock_t hold;
    DEVICE *dev = dcr->dev;
 
-   if (!obtain_device_block(dev, &hold, BST_DOING_ACQUIRE)) {
+   if (!obtain_device_block(dev,
+                            &hold,
+                            1 /* one try */,
+                            BST_DOING_ACQUIRE)) {
       send_dir_busy_message(dir, dev);
       return false;
    }
-
+   dev->Unlock();
    dcr->VolumeName[0] = 0;
    dev->clear_labeled();              /* force read of label */
    switch (dev->read_dev_volume_label(dcr)) {
@@ -1063,6 +1072,7 @@ static bool read_label(DCR *dcr)
       break;
    }
    volume_unused(dcr);
+   dev->Lock();
    give_back_device_block(dev, &hold);
    return ok;
 }
@@ -1831,10 +1841,14 @@ static void read_volume_label(JCR *jcr, DCR *dcr, DEVICE *dev, int Slot)
    bsteal_lock_t hold;
 
    dcr->set_dev(dev);
-   if (!obtain_device_block(dev, &hold, BST_WRITING_LABEL)) {
+   if (!obtain_device_block(dev,
+                            &hold,
+                            1 /* one try */,
+                            BST_WRITING_LABEL)) {
       send_dir_busy_message(dir, dev);
       return;
    }
+   dev->Unlock();
 
    if (!try_autoload_device(jcr, dcr, Slot, "")) {
       goto bail_out;                  /* error */
@@ -1854,6 +1868,7 @@ static void read_volume_label(JCR *jcr, DCR *dcr, DEVICE *dev, int Slot)
    }
 
 bail_out:
+   dev->Lock();
    give_back_device_block(dev, &hold);
    return;
 }
index 0c485a7d35cb186082d63b77affd4b163d3a12a4..ef808bba559689db6bfc22aa3b3b0db03ef339ba 100644 (file)
@@ -27,9 +27,9 @@
 #include "stored.h"                   /* pull in Storage Deamon headers */
 
 #ifdef SD_DEBUG_LOCK
-const int dbglvl = 300;
+const int dbglvl = DT_LOCK|30;
 #else
-const int dbglvl = 500;
+const int dbglvl = DT_LOCK|50;
 #endif
 
 
@@ -117,10 +117,8 @@ const int dbglvl = 500;
  *                    save status
  *                    set new blocked status
  *                    set new pid
- *                    Unlock()
  *
- *   give_back_device_block() does (must be blocked but not locked)
- *                    Lock()
+ *   give_back_device_block() does (must be blocked and locked)
  *                    reset blocked status
  *                    save previous blocked
  *                    reset pid
@@ -432,43 +430,59 @@ void _unblock_device(const char *file, int line, DEVICE *dev)
 
 static pthread_mutex_t block_mutex = PTHREAD_MUTEX_INITIALIZER;
 /*
- * Enter with device locked
+ * Enter and leave with device locked
  *
  * Note: actually this routine:
  *   returns true if it can either set or steal the device block
  *   returns false if it cannot block the device
  */
-bool _obtain_device_block(const char *file, int line, DEVICE *dev, bsteal_lock_t *hold, int state)
+bool DEVICE::_obtain_device_block(const char *file, int line,
+                                  bsteal_lock_t *hold, int retry, int state)
 {
+   int ret;
+   int r = retry;
+
+   if (!can_obtain_block() && !pthread_equal(no_wait_id, pthread_self())) {
+      num_waiting++;             /* indicate that I am waiting */
+      while ((retry == 0 || r-- > 0) && !can_obtain_block()) {
+         if ((ret = bthread_cond_wait_p(&wait, &m_mutex, file, line)) != 0) {
+            berrno be;
+            Emsg1(M_ABORT, 0, _("pthread_cond_wait failure. ERR=%s\n"),
+                  be.bstrerror(ret));
+         }
+      }
+      num_waiting--;             /* no longer waiting */
+   }
+
    P(block_mutex);
    Dmsg4(sd_dbglvl, "Steal lock %s old=%s from %s:%d\n",
-      dev->device->hdr.name, dev->print_blocked(), file, line);
-   if (!dev->can_obtain_block()) {
+      device->hdr.name, print_blocked(), file, line);
+
+   if (!can_obtain_block() && !pthread_equal(no_wait_id, pthread_self())) { 
       V(block_mutex);
       return false;
    }
-   hold->dev_blocked = dev->blocked();
-   hold->dev_prev_blocked = dev->dev_prev_blocked;
-   hold->no_wait_id = dev->no_wait_id;
-   hold->blocked_by = dev->blocked_by;
-   dev->set_blocked(state);
-   Dmsg1(sd_dbglvl, "steal block. new=%s\n", dev->print_blocked());
-   dev->no_wait_id = pthread_self();
-   dev->blocked_by = get_jobid_from_tsd();
+   hold->dev_blocked = blocked();
+   hold->dev_prev_blocked = dev_prev_blocked;
+   hold->no_wait_id = no_wait_id;
+   hold->blocked_by = blocked_by;
+   set_blocked(state);
+   Dmsg1(sd_dbglvl, "steal block. new=%s\n", print_blocked());
+   no_wait_id = pthread_self();
+   blocked_by = get_jobid_from_tsd();
    V(block_mutex);
-   dev->Unlock();
    return true;
 }
 
 /*
- * Enter with device blocked by us but not locked
+ * Enter with device blocked and locked by us
  * Exit with device locked, and blocked by previous owner
  */
-void _give_back_device_block(const char *file, int line, DEVICE *dev, bsteal_lock_t *hold)
+void _give_back_device_block(const char *file, int line,
+                             DEVICE *dev, bsteal_lock_t *hold)
 {
    Dmsg4(sd_dbglvl, "Return lock %s old=%s from %s:%d\n",
       dev->device->hdr.name, dev->print_blocked(), file, line);
-   dev->Lock();
    P(block_mutex);
    dev->set_blocked(hold->dev_blocked);
    dev->dev_prev_blocked = hold->dev_prev_blocked;
index 5742f3db7373ae7c253f5cf608fd3089798aa591..77140c7d9d1055e9bdfe07ac6f797a830073e4f8 100644 (file)
@@ -88,7 +88,8 @@ void    _unlock_volumes();
 
 #define block_device(d, s)          _block_device(__FILE__, __LINE__, (d), s)
 #define unblock_device(d)           _unblock_device(__FILE__, __LINE__, (d))
-#define obtain_device_block(d, p, s)  _obtain_device_block(__FILE__, __LINE__, (d), (p), s)
+
+#define obtain_device_block(d, p, r, s) (d)->_obtain_device_block(__FILE__, __LINE__, (p), (r), (s))
 #define give_back_device_block(d, p) _give_back_device_block(__FILE__, __LINE__, (d), (p))
 
 /* m_blocked states (mutually exclusive) */
index 7c7cddbc66e2a2244115f73b78a37048316bccba..27edb128e9ebb4e1849b4e16a0382aadce2386c7 100644 (file)
@@ -189,7 +189,6 @@ void     _lock_device(const char *file, int line, DEVICE *dev);
 void     _unlock_device(const char *file, int line, DEVICE *dev);
 void     _block_device(const char *file, int line, DEVICE *dev, int state);
 void     _unblock_device(const char *file, int line, DEVICE *dev);
-bool     _obtain_device_block(const char *file, int line, DEVICE *dev, bsteal_lock_t *hold, int state);
 void     _give_back_device_block(const char *file, int line, DEVICE *dev, bsteal_lock_t *hold);