]> git.ipfire.org Git - thirdparty/rrdtool-1.x.git/blob - src/rrd_open.c
Allow to select locking mechanism (#1207)
[thirdparty/rrdtool-1.x.git] / src / rrd_open.c
1 /*****************************************************************************
2 * RRDtool 1.8.0 Copyright by Tobi Oetiker, 1997-2022
3 *****************************************************************************
4 * rrd_open.c Open an RRD File
5 *****************************************************************************
6 * $Id$
7 *****************************************************************************/
8
9 #ifdef _WIN32
10 #include <windows.h>
11 #include <winsdkver.h> /* Defines _WIN32_MAXVER */
12 #if _WIN32_MAXVER >= 0x0602 /* _WIN32_WINNT_WIN8 */
13 #include <synchapi.h>
14 #endif
15
16 #include <stdlib.h>
17 #include <errno.h>
18 #include <fcntl.h>
19 #include <sys/stat.h>
20 #include <limits.h>
21 #endif /* WIN32 */
22
23 #include "rrd_tool.h"
24 #include "compat-cloexec.h"
25 #include "unused.h"
26
27 #ifdef HAVE_BROKEN_MS_ASYNC
28 #include <sys/types.h>
29 #include <utime.h>
30 #endif
31
32 #ifdef HAVE_LIBRADOS
33 #include "rrd_rados.h"
34 #endif
35
36 #define MEMBLK 8192
37
38 #ifdef _WIN32
39 #define _LK_UNLCK 0 /* Unlock */
40 #define _LK_LOCK 1 /* Lock */
41 #define _LK_NBLCK 2 /* Non-blocking lock */
42 #define _LK_RLCK 3 /* "Same as _LK_NBLCK" */
43 #define _LK_NBRLCK 4 /* "Same as _LK_LOCK" */
44
45
46 #define LK_UNLCK _LK_UNLCK
47 #define LK_LOCK _LK_LOCK
48 #define LK_NBLCK _LK_NBLCK
49 #define LK_RLCK _LK_RLCK
50 #define LK_NBRLCK _LK_NBRLCK
51
52 /* Variables for CreateFileA(). Names of variables are according to
53 * https://docs.microsoft.com/en-us/windows/desktop/api/fileapi/nf-fileapi-createfilea */
54 DWORD dwDesiredAccess = 0;
55 DWORD dwCreationDisposition = 0;
56 #endif
57
58 /* DEBUG 2 prints information obtained via mincore(2) */
59 // #define DEBUG 1
60 /* do not calculate exact madvise hints but assume 1 page for headers and
61 * set DONTNEED for the rest, which is assumed to be data */
62 /* Avoid calling madvise on areas that were already hinted. May be beneficial if
63 * your syscalls are very slow */
64
65 #ifdef HAVE_MMAP
66 /* the cast to void* is there to avoid this warning seen on ia64 with certain
67 versions of gcc: 'cast increases required alignment of target type'
68 */
69 #define __rrd_read_mmap(dst, dst_t, cnt) { \
70 size_t wanted = sizeof(dst_t)*(cnt); \
71 if (offset + wanted > rrd_file->file_len) { \
72 rrd_set_error("reached EOF while loading header " #dst); \
73 goto out_close; \
74 } \
75 (dst) = (dst_t*)(void*) (data + offset); \
76 offset += wanted; \
77 }
78 #else
79 #define __rrd_read_seq(dst, dst_t, cnt) { \
80 size_t wanted = sizeof(dst_t)*(cnt); \
81 size_t got; \
82 if ((dst = (dst_t*)malloc(wanted)) == NULL) { \
83 rrd_set_error(#dst " malloc"); \
84 goto out_close; \
85 } \
86 got = read (rrd_simple_file->fd, dst, wanted); \
87 if (got != wanted) { \
88 rrd_set_error("short read while reading header " #dst); \
89 goto out_close; \
90 } \
91 offset += got; \
92 }
93 #endif
94
95 #ifdef HAVE_LIBRADOS
96 #define __rrd_read_rados(dst, dst_t, cnt) { \
97 size_t wanted = sizeof(dst_t)*(cnt); \
98 size_t got; \
99 if ((dst = (dst_t*)malloc(wanted)) == NULL) { \
100 rrd_set_error(#dst " malloc"); \
101 goto out_close; \
102 } \
103 got = rrd_rados_read(rrd_file->rados, dst, wanted, offset); \
104 if (got != wanted) { \
105 rrd_set_error("short read while reading header " #dst); \
106 goto out_close; \
107 } \
108 offset += got; \
109 }
110 #endif
111
112 #if defined(HAVE_LIBRADOS) && defined(HAVE_MMAP)
113 #define __rrd_read(dst, dst_t, cnt) { \
114 if (rrd_file->rados) \
115 __rrd_read_rados(dst, dst_t, cnt) \
116 else \
117 __rrd_read_mmap(dst, dst_t, cnt) \
118 }
119 #elif defined(HAVE_LIBRADOS) && !defined(HAVE_MMAP)
120 #define __rrd_read(dst, dst_t, cnt) { \
121 if (rrd_file->rados) \
122 __rrd_read_rados(dst, dst_t, cnt) \
123 else \
124 __rrd_read_seq(dst, dst_t, cnt) \
125 }
126 #elif defined(HAVE_MMAP)
127 #define __rrd_read(dst, dst_t, cnt) \
128 __rrd_read_mmap(dst, dst_t, cnt)
129 #else
130 #define __rrd_read(dst, dst_t, cnt) \
131 __rrd_read_seq(dst, dst_t, cnt)
132 #endif
133
134 /* get the address of the start of this page */
135 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
136 #ifndef PAGE_START
137 #define PAGE_START(addr) ((addr)&(~(_page_size-1)))
138 #endif
139 #endif
140
141 static int rrd_rwlock(
142 rrd_file_t *rrd_file,
143 int writelock,
144 int lock_mode);
145 static int close_and_unlock(
146 int fd);
147
148 /* Open a database file, return its header and an open filehandle,
149 * positioned to the first cdp in the first rra.
150 * In the error path of rrd_open, only rrd_free(&rrd) has to be called
151 * before returning an error. Do not call rrd_close upon failure of rrd_open.
152 * If creating a new file, the parameter rrd must be initialized with
153 * details of the file content.
154 * If opening an existing file, then use rrd must be initialized by
155 * rrd_init(rrd) prior to invoking rrd_open
156 */
157
158 rrd_file_t *rrd_open(
159 const char *const file_name,
160 rrd_t *rrd,
161 unsigned rdwr)
162 {
163 unsigned long ui;
164 int flags = 0;
165 int version;
166
167 #ifdef HAVE_MMAP
168 char *data = MAP_FAILED;
169 #endif
170 off_t offset = 0;
171 struct stat statb;
172 rrd_file_t *rrd_file = NULL;
173 rrd_simple_file_t *rrd_simple_file = NULL;
174 size_t newfile_size = 0;
175
176 if ((rdwr & RRD_LOCK_MASK) == RRD_LOCK_DEFAULT) {
177 rdwr &= ~RRD_LOCK_MASK;
178 rdwr |= _rrd_lock_flags(_rrd_lock_default());
179 }
180
181 /* Are we creating a new file? */
182 if (rdwr & RRD_CREAT) {
183 size_t header_len, value_cnt, data_len;
184
185 header_len = rrd_get_header_size(rrd);
186
187 value_cnt = 0;
188 for (ui = 0; ui < rrd->stat_head->rra_cnt; ui++)
189 value_cnt += rrd->stat_head->ds_cnt * rrd->rra_def[ui].row_cnt;
190
191 data_len = sizeof(rrd_value_t) * value_cnt;
192
193 newfile_size = header_len + data_len;
194 }
195
196 rrd_file = (rrd_file_t *) malloc(sizeof(rrd_file_t));
197 if (rrd_file == NULL) {
198 rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
199 return NULL;
200 }
201 memset(rrd_file, 0, sizeof(rrd_file_t));
202 rrd_file->rrd = rrd;
203
204 rrd_file->pvt = malloc(sizeof(rrd_simple_file_t));
205 if (rrd_file->pvt == NULL) {
206 rrd_set_error("allocating rrd_simple_file for '%s'", file_name);
207 free(rrd_file);
208 return NULL;
209 }
210 memset(rrd_file->pvt, 0, sizeof(rrd_simple_file_t));
211 rrd_simple_file = (rrd_simple_file_t *) rrd_file->pvt;
212 rrd_simple_file->fd = -1;
213
214 #ifdef DEBUG
215 if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
216 (RRD_READONLY | RRD_READWRITE)) {
217 /* Both READONLY and READWRITE were given, which is invalid. */
218 rrd_set_error("in read/write request mask");
219 free(rrd_file);
220 return NULL;
221 }
222 #endif
223
224 #ifdef HAVE_LIBRADOS
225 if (strncmp("ceph//", file_name, 6) == 0) {
226 rrd_file->rados = rrd_rados_open(file_name + 6);
227 if (rrd_file->rados == NULL)
228 goto out_free;
229
230 if (rdwr & RRD_LOCK) {
231 /* Note: rados read lock is not implemented. See rrd_lock(). */
232 if (rrd_rwlock(rrd_file, rdwr & RRD_READWRITE) != 0) {
233 rrd_set_error("could not lock RRD");
234 goto out_close;
235 }
236 }
237
238 if (rdwr & RRD_CREAT)
239 goto out_done;
240
241 goto read_check;
242 }
243 #endif
244
245 #ifdef HAVE_MMAP
246 rrd_simple_file->mm_prot = PROT_READ;
247 rrd_simple_file->mm_flags = 0;
248 #endif
249
250 if (rdwr & RRD_READONLY) {
251 flags |= O_RDONLY;
252 #ifdef _WIN32
253 dwDesiredAccess = GENERIC_READ;
254 dwCreationDisposition = OPEN_EXISTING;
255 #endif
256 #ifdef HAVE_MMAP
257 # if !defined(AIX)
258 rrd_simple_file->mm_flags = MAP_PRIVATE;
259 # endif
260 # ifdef MAP_NORESERVE
261 rrd_simple_file->mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
262 # endif
263 #endif
264 } else {
265 if (rdwr & RRD_READWRITE) {
266 flags |= O_RDWR;
267 #ifdef _WIN32
268 dwDesiredAccess = GENERIC_READ | GENERIC_WRITE;
269 dwCreationDisposition = OPEN_EXISTING;
270 #endif
271 #ifdef HAVE_MMAP
272 rrd_simple_file->mm_flags = MAP_SHARED;
273 rrd_simple_file->mm_prot |= PROT_WRITE;
274 #endif
275 }
276 if (rdwr & RRD_CREAT) {
277 flags |= (O_CREAT | O_TRUNC);
278 #ifdef _WIN32
279 dwDesiredAccess = GENERIC_READ | GENERIC_WRITE;
280 dwCreationDisposition = CREATE_ALWAYS;
281 #endif
282 }
283 if (rdwr & RRD_EXCL) {
284 flags |= O_EXCL;
285 #ifdef _WIN32
286 dwDesiredAccess = GENERIC_READ | GENERIC_WRITE;
287 dwCreationDisposition = CREATE_NEW;
288 #endif
289 }
290 }
291 if (rdwr & RRD_READAHEAD) {
292 #ifdef MAP_POPULATE
293 rrd_simple_file->mm_flags |= MAP_POPULATE; /* populate ptes and data */
294 #endif
295 #if defined MAP_NONBLOCK
296 rrd_simple_file->mm_flags |= MAP_NONBLOCK; /* just populate ptes */
297 #endif
298 }
299 #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)
300 flags |= O_BINARY;
301 #endif
302
303 #if defined(_WIN32)
304 /* In Windows we need FILE_SHARE_DELETE, so that the file can be
305 * renamed/replaced later on in rrd_create.c
306 * This is only possible using CreateFileA() first and not using open() alone */
307 HANDLE handle;
308
309 handle =
310 CreateFileA(file_name, dwDesiredAccess,
311 FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
312 NULL, dwCreationDisposition, FILE_ATTRIBUTE_NORMAL, NULL);
313 if (handle == INVALID_HANDLE_VALUE) {
314 LPVOID lpMsgBuf = NULL;
315
316 FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER |
317 FORMAT_MESSAGE_FROM_SYSTEM |
318 FORMAT_MESSAGE_IGNORE_INSERTS, NULL, GetLastError(), 0,
319 (LPTSTR) & lpMsgBuf, 0, NULL);
320 rrd_set_error("opening '%s': %s", file_name, (LPTSTR) lpMsgBuf);
321 LocalFree(lpMsgBuf);
322 goto out_free;
323 }
324 if ((rrd_simple_file->fd = _open_osfhandle((intptr_t) handle, flags)) < 0) {
325 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
326 goto out_free;
327 }
328 #else
329 if ((rrd_simple_file->fd = open(file_name, flags | O_CLOEXEC, 0666)) < 0) {
330 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
331 goto out_free;
332 }
333 #endif
334
335 #ifdef HAVE_MMAP
336 #ifdef HAVE_BROKEN_MS_ASYNC
337 if (rdwr & RRD_READWRITE) {
338 /* some unices, the files mtime does not get updated
339 on memory mapped files, in order to help them,
340 we update the timestamp at this point.
341 The thing happens pretty 'close' to the open
342 call so the chances of a race should be minimal.
343
344 Maybe ask your vendor to fix your OS ... */
345 utime(file_name, NULL);
346 }
347 #endif
348 #endif
349
350 if (rrd_rwlock(rrd_file, rdwr & RRD_READWRITE, rdwr & RRD_LOCK_MASK) != 0) {
351 rrd_set_error("could not lock RRD");
352 goto out_close;
353 }
354
355 /* Better try to avoid seeks as much as possible. stat may be heavy but
356 * many concurrent seeks are even worse. */
357 if (newfile_size == 0 && ((fstat(rrd_simple_file->fd, &statb)) < 0)) {
358 rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
359 goto out_close;
360 }
361 if (newfile_size == 0) {
362 rrd_file->file_len = statb.st_size;
363 } else {
364 rrd_file->file_len = newfile_size;
365 #ifdef HAVE_POSIX_FALLOCATE
366 /* man: posix_fallocate() returns zero on success,
367 * or an error number on failure. Note that errno is not set.
368 */
369 int fret =
370 posix_fallocate(rrd_simple_file->fd, 0, newfile_size);
371 /* ZFS (on FreeBSD) does not support posix_fallocate(), always returning
372 * EINVAL. Ignore this error and continue anyway.
373 * Without this, resize isn't possible on ZFS filesystems.
374 */
375 if (fret == EINVAL) {
376 /* DO NOTHING */
377 } else if (fret) {
378 rrd_set_error("posix_fallocate '%s': %s", file_name,
379 rrd_strerror(fret));
380 goto out_close;
381 } else {
382 goto no_lseek_necessary;
383 }
384 #endif
385 if (lseek(rrd_simple_file->fd, newfile_size - 1, SEEK_SET) == -1) {
386 rrd_set_error("lseek '%s': %s", file_name, rrd_strerror(errno));
387 goto out_close;
388 }
389 if (write(rrd_simple_file->fd, "\0", 1) == -1) { /* poke */
390 rrd_set_error("write '%s': %s", file_name, rrd_strerror(errno));
391 goto out_close;
392 }
393 if (lseek(rrd_simple_file->fd, 0, SEEK_SET) == -1) {
394 rrd_set_error("lseek '%s': %s", file_name, rrd_strerror(errno));
395 goto out_close;
396 }
397 }
398 #ifdef HAVE_POSIX_FALLOCATE
399 no_lseek_necessary:
400 #endif
401
402 #ifdef HAVE_MMAP
403 #ifndef HAVE_POSIX_FALLOCATE
404 /* force allocating the file on the underlying filesystem to prevent any
405 * future bus error when the filesystem is full and attempting to write
406 * trough the file mapping. Filling the file using memset on the file
407 * mapping can also lead some bus error, so we use the old fashioned
408 * write().
409 */
410 if (rdwr & RRD_CREAT) {
411 char buf[4096];
412 unsigned i;
413
414 memset(buf, DNAN, sizeof buf);
415 lseek(rrd_simple_file->fd, offset, SEEK_SET);
416
417 for (i = 0; i < (newfile_size - 1) / sizeof buf; ++i) {
418 if (write(rrd_simple_file->fd, buf, sizeof buf) == -1) {
419 rrd_set_error("write '%s': %s", file_name,
420 rrd_strerror(errno));
421 goto out_close;
422 }
423 }
424
425 if (write(rrd_simple_file->fd, buf,
426 (newfile_size - 1) % sizeof buf) == -1) {
427 rrd_set_error("write '%s': %s", file_name, rrd_strerror(errno));
428 goto out_close;
429 }
430
431 lseek(rrd_simple_file->fd, 0, SEEK_SET);
432 }
433 #endif
434
435 data = mmap(0, rrd_file->file_len,
436 rrd_simple_file->mm_prot, rrd_simple_file->mm_flags,
437 rrd_simple_file->fd, offset);
438
439 /* lets see if the first read worked */
440 if (data == MAP_FAILED) {
441 rrd_set_error("mmaping file '%s': %s", file_name,
442 rrd_strerror(errno));
443 goto out_close;
444 }
445 rrd->__mmap_start = data;
446 rrd->__mmap_size = rrd_file->file_len;
447
448 rrd_simple_file->file_start = data;
449 #endif
450 if (rdwr & RRD_CREAT)
451 goto out_done;
452
453 if (rdwr & RRD_READAHEAD) {
454 /* If perfect READAHEAD is not achieved for whatever reason, caller
455 will not thank us for advising the kernel of RANDOM access below. */
456 rdwr |= RRD_COPY;
457 }
458 /* In general we need no read-ahead when dealing with rrd_files.
459 When we stop reading, it is highly unlikely that we start up again.
460 In this manner we actually save time and disk access (and buffer cache).
461 Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
462 #ifdef USE_MADVISE
463 if (rdwr & RRD_COPY) {
464 /* We will read everything in a moment (copying) */
465 madvise(data, rrd_file->file_len, MADV_SEQUENTIAL);
466 } else {
467 /* We do not need to read anything in for the moment */
468 madvise(data, rrd_file->file_len, MADV_RANDOM);
469 }
470 #endif
471 #if !defined(HAVE_MMAP) && defined(HAVE_POSIX_FADVISE)
472 if (rdwr & RRD_COPY) {
473 posix_fadvise(rrd_simple_file->fd, 0, 0, POSIX_FADV_SEQUENTIAL);
474 } else {
475 posix_fadvise(rrd_simple_file->fd, 0, 0, POSIX_FADV_RANDOM);
476 }
477 #endif
478
479 #ifdef HAVE_LIBRADOS
480 read_check:
481 #endif
482
483 __rrd_read(rrd->stat_head, stat_head_t,
484 1);
485
486 /* lets do some test if we are on track ... */
487 if (memcmp(rrd->stat_head->cookie, RRD_COOKIE, sizeof(RRD_COOKIE)) != 0) {
488 rrd_set_error("'%s' is not an RRD file", file_name);
489 goto out_close;
490 }
491
492 if (rrd->stat_head->float_cookie != FLOAT_COOKIE) {
493 rrd_set_error("This RRD was created on another architecture");
494 goto out_close;
495 }
496
497 version = atoi(rrd->stat_head->version);
498
499 if (version > atoi(RRD_VERSION5)) {
500 rrd_set_error("can't handle RRD file version %s",
501 rrd->stat_head->version);
502 goto out_close;
503 }
504 __rrd_read(rrd->ds_def, ds_def_t,
505 rrd->stat_head->ds_cnt);
506
507 __rrd_read(rrd->rra_def, rra_def_t,
508 rrd->stat_head->rra_cnt);
509
510 /* handle different format for the live_head */
511 if (version < 3) {
512 rrd->live_head = (live_head_t *) malloc(sizeof(live_head_t));
513 if (rrd->live_head == NULL) {
514 rrd_set_error("live_head_t malloc");
515 goto out_close;
516 }
517 __rrd_read(rrd->legacy_last_up, time_t,
518 1);
519
520 rrd->live_head->last_up = *rrd->legacy_last_up;
521 rrd->live_head->last_up_usec = 0;
522 } else {
523 __rrd_read(rrd->live_head, live_head_t,
524 1);
525 }
526 __rrd_read(rrd->pdp_prep, pdp_prep_t,
527 rrd->stat_head->ds_cnt);
528 __rrd_read(rrd->cdp_prep, cdp_prep_t,
529 rrd->stat_head->rra_cnt * rrd->stat_head->ds_cnt);
530 __rrd_read(rrd->rra_ptr, rra_ptr_t,
531 rrd->stat_head->rra_cnt);
532
533 rrd_file->header_len = offset;
534 rrd_file->pos = offset;
535
536 #if defined(HAVE_MMAP) && defined(USE_MADVISE)
537 if (data != MAP_FAILED) {
538 /* MADV_SEQUENTIAL mentions drop-behind. Override it for the header
539 * now we've read it, in case anyone implemented drop-behind.
540 *
541 * Do *not* fall back to fadvise() for !HAVE_MMAP. In that case,
542 * we've copied the header and will not read it again. Doing e.g.
543 * FADV_NORMAL on Linux (4.12) on *any* region would negate the
544 * effect of previous FADV_SEQUENTIAL.
545 */
546 madvise(data, sysconf(_SC_PAGESIZE), MADV_NORMAL);
547 madvise(data, sysconf(_SC_PAGESIZE), MADV_WILLNEED);
548 }
549 #endif
550
551 {
552 unsigned long row_cnt = 0;
553
554 for (ui = 0; ui < rrd->stat_head->rra_cnt; ui++)
555 row_cnt += rrd->rra_def[ui].row_cnt;
556
557 size_t correct_len = rrd_file->header_len +
558 sizeof(rrd_value_t) * row_cnt * rrd->stat_head->ds_cnt;
559
560 #ifdef HAVE_LIBRADOS
561 /* skip length checking for rados file */
562 if (rrd_file->rados) {
563 rrd_file->file_len = correct_len;
564 }
565 #endif
566
567 if (correct_len > rrd_file->file_len) {
568 rrd_set_error("'%s' is too small (should be %ld bytes)",
569 file_name, (long long) correct_len);
570 goto out_close;
571 }
572 if (rdwr & RRD_READVALUES) {
573 __rrd_read(rrd->rrd_value, rrd_value_t,
574 row_cnt * rrd->stat_head->ds_cnt);
575
576 if (rrd_seek(rrd_file, rrd_file->header_len, SEEK_SET) != 0)
577 goto out_close;
578 }
579 }
580
581 out_done:
582 return (rrd_file);
583
584 out_close:
585 #ifdef HAVE_MMAP
586 if (data != MAP_FAILED)
587 munmap(data, rrd_file->file_len);
588 #endif
589 #ifdef HAVE_LIBRADOS
590 if (rrd_file->rados)
591 rrd_rados_close(rrd_file->rados);
592 #endif
593 if (rrd_simple_file->fd >= 0) {
594 /* keep the original error */
595 char *e = strdup(rrd_get_error());
596
597 close_and_unlock(rrd_simple_file->fd);
598
599 if (e) {
600 rrd_set_error(e);
601 free(e);
602 } else
603 rrd_set_error("error message was lost (out of memory)");
604 }
605 out_free:
606 free(rrd_file->pvt);
607 free(rrd_file);
608 return NULL;
609 }
610
611
612 #if defined DEBUG && DEBUG > 1
613 /* Print list of in-core pages of a the current rrd_file. */
614 static
615 void mincore_print(
616 rrd_file_t *rrd_file,
617 char *mark)
618 {
619 rrd_simple_file_t *rrd_simple_file;
620
621 rrd_simple_file = (rrd_simple_file_t *) rrd_file->pvt;
622 #ifdef HAVE_MMAP
623 /* pretty print blocks in core */
624 size_t off;
625 unsigned char *vec;
626 ssize_t _page_size = sysconf(_SC_PAGESIZE);
627
628 off = rrd_file->file_len +
629 ((rrd_file->file_len + _page_size - 1) / _page_size);
630 vec = malloc(off);
631 if (vec != NULL) {
632 memset(vec, 0, off);
633 if (mincore(rrd_simple_file->file_start, rrd_file->file_len, vec) ==
634 0) {
635 int prev;
636 unsigned is_in = 0, was_in = 0;
637
638 for (off = 0, prev = 0; off < rrd_file->file_len; ++off) {
639 is_in = vec[off] & 1; /* if lsb set then is core resident */
640 if (off == 0)
641 was_in = is_in;
642 if (was_in != is_in) {
643 fprintf(stderr, "%s: %sin core: %p len %ld\n", mark,
644 was_in ? "" : "not ", vec + prev, off - prev);
645 was_in = is_in;
646 prev = off;
647 }
648 }
649 fprintf(stderr,
650 "%s: %sin core: %p len %ld\n", mark,
651 was_in ? "" : "not ", vec + prev, off - prev);
652 } else
653 fprintf(stderr, "mincore: %s", rrd_strerror(errno));
654 }
655 #else
656 fprintf(stderr, "sorry mincore only works with mmap");
657 #endif
658 }
659 #endif /* defined DEBUG && DEBUG > 1 */
660
661 /*
662 * get exclusive lock to whole file.
663 * lock gets removed when we close the file
664 *
665 * returns 0 on success
666 */
667 int rrd_lock(
668 rrd_file_t *rrd_file)
669 {
670 return rrd_rwlock(rrd_file, 1, RRD_LOCK_DEFAULT);
671 }
672
673 #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)
674 #define USE_WINDOWS_LOCK 1
675 #endif
676
677 #ifdef USE_WINDOWS_LOCK
678 static
679 int rrd_windows_lock(
680 int fd)
681 {
682 int ret;
683 long pos;
684
685 /*
686 * _locking() is relative to fd position.
687 * We need to consistently lock bytes starting from 0,
688 * so we can successfully unlock on close.
689 *
690 * Note rrd_lock() API doesn't set a specific error message.
691 * Knowing that rrd_lock() (or even rrd_open()) failed should
692 * be specific enough, if someone manages to invoke rrdtool
693 * on something silly like a named pipe or COM1.
694 */
695 pos = tell(fd);
696 if (pos < 0)
697 return -1;
698
699 if (lseek(fd, 0, SEEK_SET) < 0)
700 return -1;
701
702 while (1) {
703 ret = _locking(fd, _LK_NBLCK, LONG_MAX);
704 if (ret == 0)
705 break; /* success */
706 if (errno != EACCES)
707 break; /* failure */
708 /* EACCES: someone else has the lock. */
709
710 /*
711 * Wait 0.01 seconds before trying again. _locking()
712 * with _LK_LOCK would work similarly but waits 1 second
713 * between tries, which seems less desirable.
714 */
715 Sleep(10);
716 }
717
718 /* restore saved fd position */
719 if (lseek(fd, pos, SEEK_SET) < 0)
720 return -1;
721
722 return ret;
723 }
724 #endif
725
726 static
727 int close_and_unlock(
728 int fd)
729 {
730 int ret = 0;
731
732 #ifdef USE_WINDOWS_LOCK
733 /*
734 * "If a process closes a file that has outstanding locks, the locks are
735 * unlocked by the operating system. However, the time it takes for the
736 * operating system to unlock these locks depends upon available system
737 * resources. Therefore, it is recommended that your process explicitly
738 * unlock all files it has locked when it terminates." (?!)
739 */
740
741 if (lseek(fd, 0, SEEK_SET) < 0) {
742 rrd_set_error("lseek: %s", rrd_strerror(errno));
743 ret = -1;
744 goto out_close;
745 }
746
747 ret = _locking(fd, LK_UNLCK, LONG_MAX);
748 if (ret != 0 && errno == EACCES)
749 /* fd was not locked - this is entirely possible, ignore the error */
750 ret = 0;
751
752 if (ret != 0)
753 rrd_set_error("unlock file: %s", rrd_strerror(errno));
754 out_close:
755 #endif
756
757 if (close(fd) != 0) {
758 ret = -1;
759 rrd_set_error("closing file: %s", rrd_strerror(errno));
760 }
761
762 return ret;
763 }
764
765 static
766 int rrd_rwlock(
767 rrd_file_t *rrd_file,
768 int writelock,
769 int lock_mode)
770 {
771 if (lock_mode == RRD_LOCK_NONE)
772 return 0;
773
774 #ifdef DISABLE_FLOCK
775 (void) rrd_file;
776 return 0;
777 #else
778 #ifdef HAVE_LIBRADOS
779 if (rrd_file->rados) {
780 /*
781 * No read lock on rados. It would be complicated by the
782 * use of a short finite lock duration in rrd_rados_lock().
783 * Also rados does not provide blocking locks.
784 *
785 * Rados users may use snapshots if they need to
786 * e.g. obtain a consistent backup.
787 */
788 if (writelock)
789 return rrd_rados_lock(rrd_file->rados);
790 else
791 return 0;
792 }
793 #endif
794 int rcstat;
795 rrd_simple_file_t *rrd_simple_file;
796
797 rrd_simple_file = (rrd_simple_file_t *) rrd_file->pvt;
798 #ifdef USE_WINDOWS_LOCK
799 /* _locking() does not support read locks; we always take a write lock */
800 rcstat = rrd_windows_lock(rrd_simple_file->fd);
801 /* Silence unused parameter compiler warning */
802 (void) writelock;
803 #else
804 {
805 struct flock lock;
806 int op = lock_mode == RRD_LOCK_TRY ? F_SETLK : F_SETLKW;
807
808 lock.l_type = writelock ? F_WRLCK : /* exclusive write lock or */
809 F_RDLCK; /* shared read lock */
810 lock.l_len = 0; /* whole file */
811 lock.l_start = 0; /* start of file */
812 lock.l_whence = SEEK_SET; /* end of file */
813
814 rcstat = fcntl(rrd_simple_file->fd, op, &lock);
815 }
816 #endif
817
818 return (rcstat);
819 #endif
820 }
821
822
823 /* drop cache except for the header and the active pages */
824 void rrd_dontneed(
825 rrd_file_t *rrd_file,
826 rrd_t *rrd)
827 {
828 rrd_simple_file_t *rrd_simple_file;
829
830 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
831 size_t dontneed_start;
832 size_t rra_start;
833 size_t active_block;
834 size_t i;
835 ssize_t _page_size = sysconf(_SC_PAGESIZE);
836
837 if (rrd_file == NULL) {
838 #if defined DEBUG && DEBUG
839 fprintf(stderr, "rrd_dontneed: Argument 'rrd_file' is NULL.\n");
840 #endif
841 return;
842 }
843 rrd_simple_file = (rrd_simple_file_t *) rrd_file->pvt;
844
845 #if defined DEBUG && DEBUG > 1
846 mincore_print(rrd_file, "before");
847 #endif
848
849 /* ignoring errors from RRDs that are smaller then the file_len+rounding */
850 rra_start = rrd_file->header_len;
851 dontneed_start = PAGE_START(rra_start) + _page_size;
852 for (i = 0; i < rrd->stat_head->rra_cnt; ++i) {
853 active_block =
854 PAGE_START(rra_start
855 + rrd->rra_ptr[i].cur_row
856 * rrd->stat_head->ds_cnt * sizeof(rrd_value_t));
857 if (active_block > dontneed_start) {
858 #ifdef USE_MADVISE
859 madvise(rrd_simple_file->file_start + dontneed_start,
860 active_block - dontneed_start - 1, MADV_DONTNEED);
861 #else
862 #ifdef HAVE_POSIX_FADVISE
863 /* in linux at least only fadvise DONTNEED seems to purge pages from cache */
864 posix_fadvise(rrd_simple_file->fd, dontneed_start,
865 active_block - dontneed_start - 1,
866 POSIX_FADV_DONTNEED);
867 #endif
868 #endif
869 }
870 dontneed_start = active_block;
871 /* do not release 'hot' block if update for this RAA will occur
872 * within 10 minutes */
873 if (rrd->stat_head->pdp_step * rrd->rra_def[i].pdp_cnt -
874 rrd->live_head->last_up % (rrd->stat_head->pdp_step *
875 rrd->rra_def[i].pdp_cnt) < 10 * 60) {
876 dontneed_start += _page_size;
877 }
878 rra_start +=
879 rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt *
880 sizeof(rrd_value_t);
881 }
882
883 if (dontneed_start < rrd_file->file_len) {
884 #ifdef USE_MADVISE
885 madvise(rrd_simple_file->file_start + dontneed_start,
886 rrd_file->file_len - dontneed_start, MADV_DONTNEED);
887 #else
888 #ifdef HAVE_POSIX_FADVISE
889 posix_fadvise(rrd_simple_file->fd, dontneed_start,
890 rrd_file->file_len - dontneed_start,
891 POSIX_FADV_DONTNEED);
892 #endif
893 #endif
894 }
895
896 #if defined DEBUG && DEBUG > 1
897 mincore_print(rrd_file, "after");
898 #endif
899 #else /* #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE */
900 /* Silence compiler warnings about unused variables and parameters */
901 (void) rrd_simple_file;
902 (void) rrd_file;
903 (void) rrd;
904 #endif /* without madvise and posix_fadvise it does not make much sense todo anything */
905 }
906
907
908
909
910
911 int rrd_close(
912 rrd_file_t *rrd_file)
913 {
914 rrd_simple_file_t *rrd_simple_file;
915
916 rrd_simple_file = (rrd_simple_file_t *) rrd_file->pvt;
917 int ret = 0;
918
919 #ifdef HAVE_LIBRADOS
920 if (rrd_file->rados) {
921 if (rrd_rados_close(rrd_file->rados) != 0)
922 ret = -1;
923 }
924 #endif
925 #ifdef HAVE_MMAP
926 if (rrd_simple_file->file_start != NULL) {
927 if (munmap(rrd_simple_file->file_start, rrd_file->file_len) != 0) {
928 ret = -1;
929 rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
930 }
931 }
932 #endif
933 if (rrd_simple_file->fd >= 0) {
934 if (close_and_unlock(rrd_simple_file->fd) != 0)
935 ret = -1;
936 }
937 free(rrd_file->pvt);
938 free(rrd_file);
939 return ret;
940 }
941
942
943 /* Set position of rrd_file. */
944
945 off_t rrd_seek(
946 rrd_file_t *rrd_file,
947 off_t off,
948 int whence)
949 {
950 #ifdef HAVE_LIBRADOS
951 /* no seek for rados */
952 if (rrd_file->rados) {
953 rrd_file->pos = off;
954 return 0;
955 }
956 #endif
957
958 off_t ret = 0;
959
960 #ifndef HAVE_MMAP
961 rrd_simple_file_t *rrd_simple_file;
962
963 rrd_simple_file = (rrd_simple_file_t *) rrd_file->pvt;
964 #endif
965
966 #ifdef HAVE_MMAP
967 if (whence == SEEK_SET)
968 rrd_file->pos = off;
969 else if (whence == SEEK_CUR)
970 rrd_file->pos += off;
971 else if (whence == SEEK_END)
972 rrd_file->pos = rrd_file->file_len + off;
973 #else
974 ret = lseek(rrd_simple_file->fd, off, whence);
975 if (ret < 0)
976 rrd_set_error("lseek: %s", rrd_strerror(errno));
977 rrd_file->pos = ret;
978 #endif
979 /* mimic fseek, which returns 0 upon success */
980 return ret < 0; /*XXX: or just ret to mimic lseek */
981 }
982
983
984 /* Get current position in rrd_file. */
985
986 off_t rrd_tell(
987 rrd_file_t *rrd_file)
988 {
989 return rrd_file->pos;
990 }
991
992
993 /* Read count bytes into buffer buf, starting at rrd_file->pos.
994 * Returns the number of bytes read or <0 on error. */
995
996 ssize_t rrd_read(
997 rrd_file_t *rrd_file,
998 void *buf,
999 size_t count)
1000 {
1001 #ifdef HAVE_LIBRADOS
1002 if (rrd_file->rados) {
1003 ssize_t ret =
1004 rrd_rados_read(rrd_file->rados, buf, count, rrd_file->pos);
1005 if (ret > 0)
1006 rrd_file->pos += ret;
1007 return ret;
1008 }
1009 #endif
1010 rrd_simple_file_t *rrd_simple_file = (rrd_simple_file_t *) rrd_file->pvt;
1011
1012 #ifdef HAVE_MMAP
1013 size_t _cnt = count;
1014 ssize_t _surplus;
1015
1016 if (rrd_file->pos > rrd_file->file_len || _cnt == 0) /* EOF */
1017 return 0;
1018 if (buf == NULL)
1019 return -1; /* EINVAL */
1020 _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
1021 if (_surplus > 0) { /* short read */
1022 _cnt -= _surplus;
1023 }
1024 if (_cnt == 0)
1025 return 0; /* EOF */
1026 buf = memcpy(buf, rrd_simple_file->file_start + rrd_file->pos, _cnt);
1027
1028 rrd_file->pos += _cnt; /* mimic read() semantics */
1029 return _cnt;
1030 #else
1031 ssize_t ret;
1032
1033 ret = read(rrd_simple_file->fd, buf, count);
1034 if (ret > 0)
1035 rrd_file->pos += ret; /* mimic read() semantics */
1036 return ret;
1037 #endif
1038 }
1039
1040
1041 /* Write count bytes from buffer buf to the current position
1042 * rrd_file->pos of rrd_simple_file->fd.
1043 * Returns the number of bytes written or <0 on error. */
1044
1045 ssize_t rrd_write(
1046 rrd_file_t *rrd_file,
1047 const void *buf,
1048 size_t count)
1049 {
1050 #ifdef HAVE_LIBRADOS
1051 if (rrd_file->rados) {
1052 size_t ret =
1053 rrd_rados_write(rrd_file->rados, buf, count, rrd_file->pos);
1054 if (ret > 0)
1055 rrd_file->pos += count;
1056 return ret;
1057 }
1058 #endif
1059 rrd_simple_file_t *rrd_simple_file = (rrd_simple_file_t *) rrd_file->pvt;
1060
1061 #ifdef HAVE_MMAP
1062 size_t old_size = rrd_file->file_len;
1063
1064 if (count == 0)
1065 return 0;
1066 if (buf == NULL)
1067 return -1; /* EINVAL */
1068
1069 if ((rrd_file->pos + count) > old_size) {
1070 rrd_set_error
1071 ("attempting to write beyond end of file (%ld + %ld > %ld)",
1072 rrd_file->pos, count, old_size);
1073 return -1;
1074 }
1075 /* can't use memcpy since the areas overlap when tuning */
1076 memmove(rrd_simple_file->file_start + rrd_file->pos, buf, count);
1077 rrd_file->pos += count;
1078 return count; /* mimic write() semantics */
1079 #else
1080 ssize_t _sz = write(rrd_simple_file->fd, buf, count);
1081
1082 if (_sz > 0)
1083 rrd_file->pos += _sz;
1084 return _sz;
1085 #endif
1086 }
1087
1088
1089 /* this is a leftover from the old days, it serves no purpose
1090 and is therefore turned into a no-op */
1091 void rrd_flush(
1092 rrd_file_t UNUSED(*rrd_file))
1093 {
1094 }
1095
1096 /* Initialize RRD header. */
1097
1098 void rrd_init(
1099 rrd_t *rrd)
1100 {
1101 rrd->stat_head = NULL;
1102 rrd->ds_def = NULL;
1103 rrd->rra_def = NULL;
1104 rrd->live_head = NULL;
1105 rrd->legacy_last_up = NULL;
1106 rrd->rra_ptr = NULL;
1107 rrd->pdp_prep = NULL;
1108 rrd->cdp_prep = NULL;
1109 rrd->rrd_value = NULL;
1110 rrd->__mmap_start = NULL;
1111 rrd->__mmap_size = 0;
1112 }
1113
1114
1115 /* free RRD data, act correctly, regardless of mmap'ped or malloc'd memory. */
1116 static void free_rrd_ptr_if_not_mmapped(
1117 void *m,
1118 const rrd_t *rrd)
1119 {
1120 if (m == NULL)
1121 return;
1122
1123 if (rrd == NULL || rrd->__mmap_start == NULL) {
1124 free(m);
1125 return;
1126 }
1127
1128 /* is this ALWAYS correct on all supported platforms ??? */
1129 long ofs = (char *) m - (char *) rrd->__mmap_start;
1130
1131 if (ofs < rrd->__mmap_size) {
1132 // DO NOT FREE, this memory is mmapped!!
1133 return;
1134 }
1135
1136 free(m);
1137 }
1138
1139 void rrd_free(
1140 rrd_t *rrd)
1141 {
1142 if (rrd == NULL)
1143 return;
1144
1145 free_rrd_ptr_if_not_mmapped(rrd->live_head, rrd);
1146 rrd->live_head = NULL;
1147 free_rrd_ptr_if_not_mmapped(rrd->stat_head, rrd);
1148 rrd->stat_head = NULL;
1149 free_rrd_ptr_if_not_mmapped(rrd->ds_def, rrd);
1150 rrd->ds_def = NULL;
1151 free_rrd_ptr_if_not_mmapped(rrd->rra_def, rrd);
1152 rrd->rra_def = NULL;
1153 free_rrd_ptr_if_not_mmapped(rrd->rra_ptr, rrd);
1154 rrd->rra_ptr = NULL;
1155 free_rrd_ptr_if_not_mmapped(rrd->pdp_prep, rrd);
1156 rrd->pdp_prep = NULL;
1157 free_rrd_ptr_if_not_mmapped(rrd->cdp_prep, rrd);
1158 rrd->cdp_prep = NULL;
1159 free_rrd_ptr_if_not_mmapped(rrd->rrd_value, rrd);
1160 rrd->rrd_value = NULL;
1161 }
1162
1163 /* routine used by external libraries to free memory allocated by
1164 * rrd library */
1165
1166 void rrd_freemem(
1167 void *mem)
1168 {
1169 free(mem);
1170 }
1171
1172 /*
1173 * rra_update informs us about the RRAs being updated
1174 * The low level storage API may use this information for
1175 * aligning RRAs within stripes, or other performance enhancements
1176 */
1177 void rrd_notify_row(
1178 rrd_file_t UNUSED(*rrd_file),
1179 int UNUSED(rra_idx),
1180 unsigned long UNUSED(rra_row),
1181 time_t UNUSED(rra_time))
1182 {
1183 }
1184
1185 /*
1186 * This function is called when creating a new RRD
1187 * The storage implementation can use this opportunity to select
1188 * a sensible starting row within the file.
1189 * The default implementation is random, to ensure that all RRAs
1190 * don't change to a new disk block at the same time
1191 */
1192 unsigned long rrd_select_initial_row(
1193 rrd_file_t UNUSED(*rrd_file),
1194 int UNUSED(rra_idx),
1195 rra_def_t *rra)
1196 {
1197 return rrd_random() % rra->row_cnt;
1198 }
1199
1200 /*
1201 * Translates a string in a RRD_FLAGS_LOCKING_xxx constant.
1202 *
1203 * Empty or non-existing strings are valid and will be mapped to a default
1204 * value.
1205 *
1206 * Functions returns -1 on unsupported values but does not emit diagnostics.
1207 */
1208 static int _rrd_lock_parse(const char *opt)
1209 {
1210 /* non-existing and empty values */
1211 if (!opt || !opt[0])
1212 /* the default locking mode */
1213 return RRD_FLAGS_LOCKING_MODE_TRY;
1214 else if (strcmp(opt, "try") == 0)
1215 return RRD_FLAGS_LOCKING_MODE_TRY;
1216 else if (strcmp(opt, "block") == 0)
1217 return RRD_FLAGS_LOCKING_MODE_BLOCK;
1218 else if (strcmp(opt, "none") == 0)
1219 return RRD_FLAGS_LOCKING_MODE_NONE;
1220 else
1221 return -1;
1222 }
1223
1224 /*
1225 * Returns the default locking method.
1226 *
1227 * It reads the $RRD_LOCKING environment.
1228 *
1229 * Function always succeeds; unsupported values will emit a
1230 * diagnostic and function returns a default value in this case.
1231 */
1232 int _rrd_lock_default(void)
1233 {
1234 const char *opt = getenv("RRD_LOCKING");
1235 int flags = _rrd_lock_parse(opt);
1236
1237 if (flags < 0) {
1238 fprintf(stderr,
1239 "unsupported locking mode '%s' in $RRD_LOCKING; assuming 'try'\n",
1240 opt);
1241 return RRD_FLAGS_LOCKING_MODE_TRY;
1242 }
1243
1244 return flags;
1245 }
1246
1247 /*
1248 * Translates a string to a RRD_FLAGS_LOCKING_xxx constant and updates flags.
1249 *
1250 * Function will fail on unsupported values and return -1. It sets rrd_set_error()
1251 * in this case.
1252 *
1253 * Else, the RRD_FLAGS_LOCKING_xxx related bits in 'out_flags' will be cleared
1254 * and updated. Function returns 0 then.
1255 */
1256 int _rrd_lock_from_opt(int *out_flags, const char *opt)
1257 {
1258 int flags = _rrd_lock_parse(opt);
1259
1260 if (flags < 0) {
1261 rrd_set_error("unsupported locking mode '%s'\n", opt);
1262 return flags;
1263 }
1264
1265 *out_flags &= ~RRD_FLAGS_LOCKING_MODE_MASK;
1266 *out_flags |= flags;
1267
1268 return 0;
1269 }
1270
1271 /*
1272 * Translates RRD_FLAGS_LOCKING_MODE_xxx to RRD_LOCK_xxx
1273 *
1274 * Function removes unrelated bits from 'extra_flags' and maps it to the
1275 * RRD_LOCK_xxx constants.
1276 */
1277 int _rrd_lock_flags(int extra_flags)
1278 {
1279 /* Due to legacy reasons, we have to map this manually.
1280 *
1281 * E.g. RRD_LOCK_DEFAULT (which might be used by deprecated direct calls
1282 * to rrd_open()) must be non-zero. But RRD_FLAGS_LOCKING_MODE_DEFAULT
1283 * must be 0 because not all users of the updatex api might have been
1284 * updated yet.
1285 */
1286 switch (extra_flags & RRD_FLAGS_LOCKING_MODE_MASK) {
1287 case RRD_FLAGS_LOCKING_MODE_NONE:
1288 return RRD_LOCK_NONE;
1289 case RRD_FLAGS_LOCKING_MODE_TRY:
1290 return RRD_LOCK_TRY;
1291 case RRD_FLAGS_LOCKING_MODE_BLOCK:
1292 return RRD_LOCK_BLOCK;
1293 case RRD_FLAGS_LOCKING_MODE_DEFAULT:
1294 return RRD_LOCK_DEFAULT;
1295 default:
1296 abort();
1297 }
1298 }