From: Alan Jenkins Date: Fri, 7 Jul 2017 10:16:06 +0000 (+0100) Subject: add read locking through rrd_open() (#800) X-Git-Tag: v1.7.1~121 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=248671a941dad879cc90dfe081a1ea77cc34aec1;p=thirdparty%2Frrdtool-1.x.git add read locking through rrd_open() (#800) * fix cleanup in rrd_open() * rados * don't leak rrd_file->rados * don't mysteriously close STDIN * !HAVE_MMAP * don't leak rrd->stat_head I have no explanation for why we were chosing to leak rrd->stat_head (and only rrd->stat_head). If it _was_ required for some reason, there was a bug anyway, as we did not leak it if we fail to allocate live_hdr. If it's required by legacy ABI, then we should remove rrd_open() from the public API (and bump the soname). It was originally scheduled for removal in v1.4. * avoid unmapping pages that don't belong to us in rrd_close() When a rados RRD is closed, don't `munmap(0, rrd_file->file_len)` ! Note munmap() does not return an error when there were no pages mapped, so our callers wouldn't notice even if they check the result of rrd_close(). Also fix the code for munmap errors - so it doesn't format one message and then immediately replace it with a less specific one. (If both munmap() and close() fails though, we will describe the later error only). Also, there's no point rrd_close() setting rrd_file to NULL at the end, it won't affect the variable used by the caller. * Add read locking through rrd_open() Read lock or write lock is taken according to whether RRD_READONLY or RRD_READWRITE is passed. For librrd compat, the lock is only taken when explicitly requested by passing RRD_LOCK. Similarly rrd_lock() is still available, but it should not be needed by new code. So `RRD_LOCK|RRD_READONLY` is intended to take a read lock, however I must admit to limitations of this new code: * On rados, our write lock expires after 2 seconds. This write-lock is probably useful as a way to detect conflicts. However I would not want to rely on a lock that works like this to guarantee consistent reads for a backup. I did not add read locking on rados. If rados users need to e.g. obtain a consistent backup, they should create a rados snapshot. (This is able to work because we use a single atomic rados operation when when we *write* to RRDs). * On the Windows _locking() function, read locks are treated the same as write locks. https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/locking The Windows code required several changes, and has not been compiled as of this moment. With Windows _locking(), we should have been unlocking before close(). Apparently the automatic unlock on close() is not guaranteed to occur synchronously. This isn't specific to _locking(), it's a limitation of winapi LockFileEx(). https://docs.microsoft.com/cpp/c-runtime-library/reference/locking https://msdn.microsoft.com/en-us/library/windows/desktop/aa365203(v=vs.85).aspx To manually unlock, we need to know what we actually locked. But on Windows, rrd_lock() was locking the bytes starting at the current rrd_file->pos, instead of always starting from 0. Also, I thought RRD_LOCK should lock the file immediately, to guarantee we don't have conflicting writes. But on Windows we only tried to lock the bytes that already existed in the file... which would be 0 bytes for RRD_CREATE. Fortunately, even Windows recognizes this problem and allows locking bytes that don't exist yet. So we can just lock the maximum number of bytes allowed. Locking 1 byte would probably work equally well for us, but using LONG_MAX seems cleaner to me. --- diff --git a/src/rrd_create.c b/src/rrd_create.c index 7be6bcf7..e6226014 100644 --- a/src/rrd_create.c +++ b/src/rrd_create.c @@ -842,7 +842,8 @@ int rrd_create_r2( rrd_t trrd; rrd_init(&trrd); - rrd_file_t *tf = rrd_open(template, &trrd, RRD_READONLY | RRD_READAHEAD | RRD_READVALUES); + rrd_file_t *tf = rrd_open(template, &trrd, RRD_READONLY | RRD_LOCK | + RRD_READAHEAD | RRD_READVALUES); if (tf == NULL) { rrd_set_error("Cannot open template RRD %s", template); goto done; @@ -989,8 +990,8 @@ int rrd_create_r2( } rrd_init(srrd); - rrd_file_t *sf = rrd_open(*s, srrd, RRD_READONLY | RRD_READAHEAD | RRD_READVALUES); - + rrd_file_t *sf = rrd_open(*s, srrd, RRD_READONLY | RRD_LOCK | + RRD_READAHEAD | RRD_READVALUES); if (sf == NULL) { rrd_set_error("Cannot open source RRD %s", *s); goto done; diff --git a/src/rrd_daemon.c b/src/rrd_daemon.c index d33a057e..c37865df 100644 --- a/src/rrd_daemon.c +++ b/src/rrd_daemon.c @@ -1665,7 +1665,7 @@ static int handle_request_update (HANDLER_PROTO) /* {{{ */ rrd_clear_error(); rrd_init(&rrd); - rrd_file = rrd_open(file, &rrd, RRD_READONLY); + rrd_file = rrd_open(file, &rrd, RRD_READONLY | RRD_LOCK); if (!rrd_file) { rrd_free(&rrd); @@ -2276,7 +2276,7 @@ static int handle_request_last (HANDLER_PROTO) /* {{{ */ } rrd_clear_error(); rrd_init(&rrd); - rrd_file = rrd_open(file,&rrd,RRD_READONLY); + rrd_file = rrd_open(file, &rrd, RRD_READONLY | RRD_LOCK); if(!rrd_file) { rrd_free(&rrd); rc = send_response(sock, RESP_ERR, "RRD Error: %s\n", rrd_get_error()); diff --git a/src/rrd_dump.c b/src/rrd_dump.c index 3653f012..69bbdf11 100644 --- a/src/rrd_dump.c +++ b/src/rrd_dump.c @@ -90,7 +90,8 @@ int rrd_dump_cb_r( rrd_init(&rrd); - rrd_file = rrd_open(filename, &rrd, RRD_READONLY | RRD_READAHEAD); + rrd_file = rrd_open(filename, &rrd, RRD_READONLY | RRD_LOCK | + RRD_READAHEAD); if (rrd_file == NULL) { rrd_free(&rrd); return (-1); diff --git a/src/rrd_fetch.c b/src/rrd_fetch.c index 1c3e7f5c..0fca39a6 100644 --- a/src/rrd_fetch.c +++ b/src/rrd_fetch.c @@ -318,7 +318,7 @@ int rrd_fetch_fn( } rrd_init(&rrd); - rrd_file = rrd_open(filename, &rrd, RRD_READONLY); + rrd_file = rrd_open(filename, &rrd, RRD_READONLY | RRD_LOCK); if (rrd_file == NULL) goto err_free; diff --git a/src/rrd_first.c b/src/rrd_first.c index 7beb5842..bc99cb4b 100644 --- a/src/rrd_first.c +++ b/src/rrd_first.c @@ -91,7 +91,7 @@ time_t rrd_first_r( rrd_file_t *rrd_file; rrd_init(&rrd); - rrd_file = rrd_open(filename, &rrd, RRD_READONLY); + rrd_file = rrd_open(filename, &rrd, RRD_READONLY | RRD_LOCK); if (rrd_file == NULL) { goto err_free; } diff --git a/src/rrd_info.c b/src/rrd_info.c index dc04c5c6..8d2a01a5 100644 --- a/src/rrd_info.c +++ b/src/rrd_info.c @@ -164,7 +164,7 @@ rrd_info_t *rrd_info_r( enum dst_en current_ds; rrd_init(&rrd); - rrd_file = rrd_open(filename, &rrd, RRD_READONLY); + rrd_file = rrd_open(filename, &rrd, RRD_READONLY | RRD_LOCK); if (rrd_file == NULL) goto err_free; diff --git a/src/rrd_last.c b/src/rrd_last.c index d5267684..0e3d9a74 100644 --- a/src/rrd_last.c +++ b/src/rrd_last.c @@ -77,7 +77,7 @@ time_t rrd_last_r( rrd_t rrd; rrd_init(&rrd); - rrd_file = rrd_open(filename, &rrd, RRD_READONLY); + rrd_file = rrd_open(filename, &rrd, RRD_READONLY | RRD_LOCK); if (rrd_file != NULL) { lastup = rrd.live_head->last_up; rrd_close(rrd_file); diff --git a/src/rrd_lastupdate.c b/src/rrd_lastupdate.c index 5f7aefcc..7fe3b937 100644 --- a/src/rrd_lastupdate.c +++ b/src/rrd_lastupdate.c @@ -98,7 +98,7 @@ int rrd_lastupdate_r(const char *filename, rrd_file_t *rrd_file; rrd_init(&rrd); - rrd_file = rrd_open(filename, &rrd, RRD_READONLY); + rrd_file = rrd_open(filename, &rrd, RRD_READONLY | RRD_LOCK); if (rrd_file == NULL) { rrd_free(&rrd); return (-1); diff --git a/src/rrd_open.c b/src/rrd_open.c index fef5482e..0114f058 100644 --- a/src/rrd_open.c +++ b/src/rrd_open.c @@ -6,15 +6,21 @@ * $Id$ *****************************************************************************/ -#include "rrd_tool.h" -#include "unused.h" - #ifdef WIN32 +#include +#if _WIN32_MAXVER >= 0x0602 /* _WIN32_WINNT_WIN8 */ +#include +#endif + #include +#include #include #include -#endif +#include +#endif /* WIN32 */ +#include "rrd_tool.h" +#include "unused.h" #ifdef HAVE_BROKEN_MS_ASYNC #include @@ -31,8 +37,8 @@ #define _LK_UNLCK 0 /* Unlock */ #define _LK_LOCK 1 /* Lock */ #define _LK_NBLCK 2 /* Non-blocking lock */ -#define _LK_RLCK 3 /* Lock for read only */ -#define _LK_NBRLCK 4 /* Non-blocking lock for read only */ +#define _LK_RLCK 3 /* "Same as _LK_NBLCK" */ +#define _LK_NBRLCK 4 /* "Same as _LK_LOCK" */ #define LK_UNLCK _LK_UNLCK @@ -57,7 +63,7 @@ size_t wanted = sizeof(dst_t)*(cnt); \ if (offset + wanted > rrd_file->file_len) { \ rrd_set_error("reached EOF while loading header " #dst); \ - goto out_nullify_head; \ + goto out_close; \ } \ (dst) = (dst_t*)(void*) (data + offset); \ offset += wanted; \ @@ -68,12 +74,12 @@ size_t got; \ if ((dst = (dst_t*)malloc(wanted)) == NULL) { \ rrd_set_error(#dst " malloc"); \ - goto out_nullify_head; \ + goto out_close; \ } \ got = read (rrd_simple_file->fd, dst, wanted); \ if (got != wanted) { \ rrd_set_error("short read while reading header " #dst); \ - goto out_nullify_head; \ + goto out_close; \ } \ offset += got; \ } @@ -85,12 +91,12 @@ size_t got; \ if ((dst = (dst_t*)malloc(wanted)) == NULL) { \ rrd_set_error(#dst " malloc"); \ - goto out_nullify_head; \ + goto out_close; \ } \ got = rrd_rados_read(rrd_file->rados, dst, wanted, offset); \ if (got != wanted) { \ rrd_set_error("short read while reading header " #dst); \ - goto out_nullify_head; \ + goto out_close; \ } \ offset += got; \ } @@ -124,6 +130,9 @@ #endif #endif +static int rrd_rwlock(rrd_file_t *rrd_file, int writelock); +static int close_and_unlock(int fd); + /* Open a database file, return its header and an open filehandle, * positioned to the first cdp in the first rra. * In the error path of rrd_open, only rrd_free(&rrd) has to be called @@ -183,6 +192,7 @@ rrd_file_t *rrd_open( } memset(rrd_file->pvt, 0, sizeof(rrd_simple_file_t)); rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt; + rrd_simple_file->fd = -1; #ifdef DEBUG if ((rdwr & (RRD_READONLY | RRD_READWRITE)) == @@ -200,6 +210,14 @@ rrd_file_t *rrd_open( if (rrd_file->rados == NULL) goto out_free; + if (rdwr & RRD_LOCK) { + /* Note: rados read lock is not implemented. See rrd_lock(). */ + if (rrd_rwlock(rrd_file, rdwr & RRD_READWRITE) != 0) { + rrd_set_error("could not lock RRD"); + goto out_close; + } + } + if (rdwr & RRD_CREAT) goto out_done; @@ -269,6 +287,13 @@ rrd_file_t *rrd_open( #endif #endif + if (rdwr & RRD_LOCK) { + if (rrd_rwlock(rrd_file, rdwr & RRD_READWRITE) != 0) { + rrd_set_error("could not lock RRD"); + goto out_close; + } + } + /* Better try to avoid seeks as much as possible. stat may be heavy but * many concurrent seeks are even worse. */ if (newfile_size == 0 && ((fstat(rrd_simple_file->fd, &statb)) < 0)) { @@ -396,12 +421,12 @@ read_check: /* lets do some test if we are on track ... */ if (memcmp(rrd->stat_head->cookie, RRD_COOKIE, sizeof(RRD_COOKIE)) != 0) { rrd_set_error("'%s' is not an RRD file", file_name); - goto out_nullify_head; + goto out_close; } if (rrd->stat_head->float_cookie != FLOAT_COOKIE) { rrd_set_error("This RRD was created on another architecture"); - goto out_nullify_head; + goto out_close; } version = atoi(rrd->stat_head->version); @@ -409,7 +434,7 @@ read_check: if (version > atoi(RRD_VERSION5)) { rrd_set_error("can't handle RRD file version %s", rrd->stat_head->version); - goto out_nullify_head; + goto out_close; } __rrd_read(rrd->ds_def, ds_def_t, rrd->stat_head->ds_cnt); @@ -478,7 +503,7 @@ read_check: { rrd_set_error("'%s' is too small (should be %ld bytes)", file_name, (long long) correct_len); - goto out_nullify_head; + goto out_close; } if (rdwr & RRD_READVALUES) { long d_offset = offset; @@ -492,19 +517,30 @@ read_check: } - - out_done: return (rrd_file); - out_nullify_head: - rrd->stat_head = NULL; + out_close: #ifdef HAVE_MMAP if (data != MAP_FAILED) munmap(data, rrd_file->file_len); #endif +#ifdef HAVE_LIBRADOS + if (rrd_file->rados) + rrd_rados_close(rrd_file->rados); +#endif + if (rrd_simple_file->fd >= 0) { + /* keep the original error */ + char *e = strdup(rrd_get_error()); + + close_and_unlock(rrd_simple_file->fd); - close(rrd_simple_file->fd); + if (e) { + rrd_set_error(e); + free(e); + } else + rrd_set_error("error message was lost (out of memory)"); + } out_free: free(rrd_file->pvt); free(rrd_file); @@ -567,38 +603,147 @@ void mincore_print( */ int rrd_lock( rrd_file_t *rrd_file) +{ + return rrd_rwlock(rrd_file, 1); +} + +#if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__) +#define USE_WINDOWS_LOCK 1 +#endif + +#ifdef USE_WINDOWS_LOCK +static +int rrd_windows_lock( + int fd) +{ + int ret; + long pos; + + /* + * _locking() is relative to fd position. + * We need to consistently lock bytes starting from 0, + * so we can successfully unlock on close. + * + * Note rrd_lock() API doesn't set a specific error message. + * Knowing that rrd_lock() (or even rrd_open()) failed should + * be specific enough, if someone manages to invoke rrdtool + * on something silly like a named pipe or COM1. + */ + pos = tell(fd); + if (pos < 0) + return -1; + + if (lseek(fd, 0, SEEK_SET) < 0) + return -1; + + while (1) { + ret = _locking(fd, _LK_NBLCK, LONG_MAX); + if (ret == 0) + break; /* success */ + if (errno != EACCES) + break; /* failure */ + /* EACCES: someone else has the lock. */ + + /* + * Wait 0.01 seconds before trying again. _locking() + * with _LK_LOCK would work similarly but waits 1 second + * between tries, which seems less desirable. + */ + Sleep(10); + } + + /* restore saved fd position */ + if (lseek(fd, pos, SEEK_SET) < 0) + return -1; + + return ret; +} +#endif + +static +int close_and_unlock( + int fd) +{ + int ret = 0; + +#ifdef USE_WINDOWS_LOCK + /* + * "If a process closes a file that has outstanding locks, the locks are + * unlocked by the operating system. However, the time it takes for the + * operating system to unlock these locks depends upon available system + * resources. Therefore, it is recommended that your process explicitly + * unlock all files it has locked when it terminates." (?!) + */ + + if (lseek(fd, 0, SEEK_SET) < 0) { + rrd_set_error("lseek: %s", rrd_strerror(errno)); + ret = -1; + goto out_close; + } + + ret = _locking(fd, LK_UNLCK, LONG_MAX); + if (ret != 0 && errno == EACCES) + /* fd was not locked - this is entirely possible, ignore the error */ + ret = 0; + + if (ret != 0) + rrd_set_error("unlock file: %s", rrd_strerror(errno)); +out_close: +#endif + + if (close(fd) != 0) { + ret = -1; + rrd_set_error("closing file: %s", rrd_strerror(errno)); + } + + return ret; +} + +static +int rrd_rwlock( + rrd_file_t *rrd_file, + int writelock) { #ifdef DISABLE_FLOCK (void)rrd_file; return 0; #else #ifdef HAVE_LIBRADOS - if (rrd_file->rados) - return rrd_rados_lock(rrd_file->rados); + if (rrd_file->rados) { + /* + * No read lock on rados. It would be complicated by the + * use of a short finite lock duration in rrd_rados_lock(). + * Also rados does not provide blocking locks. + * + * Rados users may use snapshots if they need to + * e.g. obtain a consistent backup. + */ + if (writelock) + return rrd_rados_lock(rrd_file->rados); + else + return 0; + } #endif int rcstat; rrd_simple_file_t *rrd_simple_file; rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt; - { -#if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__) - struct _stat st; - - if (_fstat(rrd_simple_file->fd, &st) == 0) { - rcstat = _locking(rrd_simple_file->fd, _LK_NBLCK, st.st_size); - } else { - rcstat = -1; - } +#ifdef USE_WINDOWS_LOCK + /* _locking() does not support read locks; we always take a write lock */ + rcstat = rrd_windows_lock(rrd_simple_file->fd); #else + { struct flock lock; - lock.l_type = F_WRLCK; /* exclusive write lock */ + lock.l_type = writelock ? + F_WRLCK: /* exclusive write lock or */ + F_RDLCK; /* shared read lock */ lock.l_len = 0; /* whole file */ lock.l_start = 0; /* start of file */ lock.l_whence = SEEK_SET; /* end of file */ rcstat = fcntl(rrd_simple_file->fd, F_SETLK, &lock); -#endif } +#endif return (rcstat); #endif @@ -692,25 +837,28 @@ int rrd_close( { rrd_simple_file_t *rrd_simple_file; rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt; - int ret; + int ret = 0; -#ifdef HAVE_MMAP - ret = munmap(rrd_simple_file->file_start, rrd_file->file_len); - if (ret != 0) - rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno)); -#endif #ifdef HAVE_LIBRADOS - if (rrd_file->rados) - ret = rrd_rados_close(rrd_file->rados); - else + if (rrd_file->rados) { + if (rrd_rados_close(rrd_file->rados) != 0) + ret = -1; + } #endif - ret = close(rrd_simple_file->fd); - - if (ret != 0) - rrd_set_error("closing file: %s", rrd_strerror(errno)); +#ifdef HAVE_MMAP + if (rrd_simple_file->file_start != NULL) { + if (munmap(rrd_simple_file->file_start, rrd_file->file_len) != 0) { + ret = -1; + rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno)); + } + } +#endif + if (rrd_simple_file->fd >= 0) { + if (close_and_unlock(rrd_simple_file->fd) != 0) + ret = -1; + } free(rrd_file->pvt); free(rrd_file); - rrd_file = NULL; return ret; } diff --git a/src/rrd_resize.c b/src/rrd_resize.c index a7525cc4..03b7938c 100644 --- a/src/rrd_resize.c +++ b/src/rrd_resize.c @@ -58,20 +58,12 @@ int rrd_resize( rrd_init(&rrdold); - rrd_file = rrd_open(infilename, &rrdold, RRD_READWRITE | RRD_COPY); + rrd_file = rrd_open(infilename, &rrdold, RRD_READWRITE | RRD_LOCK | RRD_COPY); if (rrd_file == NULL) { rrd_free(&rrdold); return (-1); } - if (rrd_lock(rrd_file) != 0) { - rrd_set_error("could not lock original RRD"); - rrd_free(&rrdold); - rrd_close(rrd_file); - return (-1); - } - - if (target_rra >= rrdold.stat_head->rra_cnt) { rrd_set_error("no such RRA in this RRD"); rrd_free(&rrdold); @@ -111,7 +103,7 @@ int rrd_resize( /* Set this so that the file will be created with the correct size */ rrdnew.rra_def[target_rra].row_cnt += modify; - rrd_out_file = rrd_open(outfilename, &rrdnew, RRD_READWRITE | RRD_CREAT); + rrd_out_file = rrd_open(outfilename, &rrdnew, RRD_READWRITE | RRD_CREAT | RRD_LOCK); if (rrd_out_file == NULL) { rrd_set_error("Can't create '%s': %s", outfilename, rrd_strerror(errno)); @@ -120,14 +112,6 @@ int rrd_resize( rrd_close(rrd_file); return (-1); } - if (rrd_lock(rrd_out_file) != 0) { - rrd_set_error("could not lock new RRD"); - rrd_free(&rrdnew); - rrd_free(&rrdold); - rrd_close(rrd_file); - rrd_close(rrd_out_file); - return (-1); - } /*XXX: do one write for those parts of header that are unchanged */ if ((rrdnew.rra_ptr = (rra_ptr_t *)malloc(sizeof(rra_ptr_t) * rrdold.stat_head->rra_cnt)) == NULL) { rrd_set_error("allocating rra_ptr for new RRD"); diff --git a/src/rrd_tool.h b/src/rrd_tool.h index 4f7dfb37..94942745 100644 --- a/src/rrd_tool.h +++ b/src/rrd_tool.h @@ -133,6 +133,7 @@ typedef int (*rrd_fetch_cb_t)( #define RRD_COPY (1<<4) #define RRD_EXCL (1<<5) #define RRD_READVALUES (1<<6) +#define RRD_LOCK (1<<7) enum cf_en rrd_cf_conv( const char *string); diff --git a/src/rrd_tune.c b/src/rrd_tune.c index 07d45524..2239bcc7 100644 --- a/src/rrd_tune.c +++ b/src/rrd_tune.c @@ -174,7 +174,8 @@ int rrd_tune( } rrd_init(&rrd); - rrd_file = rrd_open(in_filename, &rrd, RRD_READWRITE | RRD_READAHEAD | RRD_READVALUES); + rrd_file = rrd_open(in_filename, &rrd, RRD_READWRITE | RRD_LOCK | + RRD_READAHEAD | RRD_READVALUES); if (rrd_file == NULL) { goto done; } diff --git a/src/rrd_update.c b/src/rrd_update.c index 4f257a18..797f89bb 100644 --- a/src/rrd_update.c +++ b/src/rrd_update.c @@ -366,7 +366,7 @@ static char *rrd_get_file_template(const char *filename) /* {{{ */ /* open file */ rrd_init(&rrd); - rrd_file = rrd_open(filename, &rrd, RRD_READONLY); + rrd_file = rrd_open(filename, &rrd, RRD_READONLY | RRD_LOCK); if (rrd_file == NULL) goto err_free; @@ -858,7 +858,8 @@ static int _rrd_updatex( } rrd_init(&rrd); - if ((rrd_file = rrd_open(filename, &rrd, RRD_READWRITE)) == NULL) { + rrd_file = rrd_open(filename, &rrd, RRD_READWRITE | RRD_LOCK); + if (rrd_file == NULL) { goto err_free; } /* We are now at the beginning of the rra's */ @@ -868,14 +869,6 @@ static int _rrd_updatex( initialize_time(¤t_time, ¤t_time_usec, version); - /* get exclusive lock to whole file. - * lock gets removed when we close the file. - */ - if (rrd_lock(rrd_file) != 0) { - rrd_set_error("could not lock RRD"); - goto err_close; - } - if (allocate_data_structures(&rrd, &updvals, &pdp_temp, tmplt, &tmpl_idx, &tmpl_cnt, &rra_step_cnt, &skip_update,