From: Noah Date: Wed, 7 Jul 2021 18:40:10 +0000 (-0400) Subject: debuginfod: PR27711 - Use -I/-X regexes during groom phase X-Git-Tag: elfutils-0.186~65 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=0ae9b791b47cdee92ac7221e3eead79c83a64a40;p=thirdparty%2Felfutils.git debuginfod: PR27711 - Use -I/-X regexes during groom phase The debuginfod -I/-X regexes operate during traversal to identify those files in need of scanning. The regexes are not used during grooming. This means that if from run to run, the regex changes so that formerly indexed files are excluded from traversal, the data is still retained in the index. This is both good and bad. On one hand, if the underlying data is still available, grooming will preserve the data, and let clients ask for it. On the other hand, if the growing index size is a problem, and one wishes to age no-longer-regex-matching index data out, there is no way. Let's add a debuginfod flag to use regexes during grooming. Specifically, in groom(), where the stat() test exists, also check for regex matching as in scan_source_paths(). Treat failure of the regex the same way as though the file didn't exist. Signed-off-by: Noah Sanci --- diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog index d9d117373..a4f20a788 100644 --- a/debuginfod/ChangeLog +++ b/debuginfod/ChangeLog @@ -1,3 +1,11 @@ +2021-07-01 Noah Sanci + + PR27711 + * debuginfod.cxx (options): Add --regex-groom, -r option. + (regex_groom): New static bool defaults to false. + (parse_opt): Handle 'r' option by setting regex_groom to true. + (groom): Introduce and use reg_include and reg_exclude. + 2021-06-18 Mark Wielaard * debuginfod-client.c (debuginfod_begin): Don't use client if diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx index 543044c6f..4f7fd2d5a 100644 --- a/debuginfod/debuginfod.cxx +++ b/debuginfod/debuginfod.cxx @@ -360,6 +360,7 @@ static const struct argp_option options[] = { "database", 'd', "FILE", 0, "Path to sqlite database.", 0 }, { "ddl", 'D', "SQL", 0, "Apply extra sqlite ddl/pragma to connection.", 0 }, { "verbose", 'v', NULL, 0, "Increase verbosity.", 0 }, + { "regex-groom", 'r', NULL, 0,"Uses regexes from -I and -X arguments to groom the database.",0}, #define ARGP_KEY_FDCACHE_FDS 0x1001 { "fdcache-fds", ARGP_KEY_FDCACHE_FDS, "NUM", 0, "Maximum number of archive files to keep in fdcache.", 0 }, #define ARGP_KEY_FDCACHE_MBS 0x1002 @@ -407,6 +408,7 @@ static map scan_archives; static vector extra_ddl; static regex_t file_include_regex; static regex_t file_exclude_regex; +static bool regex_groom = false; static bool traverse_logical; static long fdcache_fds; static long fdcache_mbs; @@ -527,6 +529,9 @@ parse_opt (int key, char *arg, if (rc != 0) argp_failure(state, 1, EINVAL, "regular expression"); break; + case 'r': + regex_groom = true; + break; case ARGP_KEY_FDCACHE_FDS: fdcache_fds = atol (arg); break; @@ -3249,8 +3254,11 @@ void groom() int64_t fileid = sqlite3_column_int64 (files, 1); const char* filename = ((const char*) sqlite3_column_text (files, 2) ?: ""); struct stat s; + bool reg_include = !regexec (&file_include_regex, filename, 0, 0, 0); + bool reg_exclude = !regexec (&file_exclude_regex, filename, 0, 0, 0); + rc = stat(filename, &s); - if (rc < 0 || (mtime != (int64_t) s.st_mtime)) + if ( (regex_groom && reg_exclude && !reg_include) || rc < 0 || (mtime != (int64_t) s.st_mtime) ) { if (verbose > 2) obatched(clog) << "groom: forgetting file=" << filename << " mtime=" << mtime << endl; @@ -3261,7 +3269,6 @@ void groom() } else inc_metric("groomed_total", "decision", "fresh"); - if (sigusr1 != forced_rescan_count) // stop early if scan triggered break; } diff --git a/doc/debuginfod.8 b/doc/debuginfod.8 index 1ba42cf65..1adf703af 100644 --- a/doc/debuginfod.8 +++ b/doc/debuginfod.8 @@ -159,6 +159,9 @@ scan, independent of the rescan time (including if it was zero), interrupting a groom pass (if any). .TP +.B "\-r" +Apply the -I and -X during groom cycles, so that files excluded by the regexes are removed from the index. These parameters are in addition to what normally qualifies a file for grooming, not a replacement. + .B "\-g SECONDS" "\-\-groom\-time=SECONDS" Set the groom time for the index database. This is the amount of time the grooming thread will wait after finishing a grooming pass before diff --git a/tests/ChangeLog b/tests/ChangeLog index 7b493c99a..3a30e06b5 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,3 +1,9 @@ +2021-07-01 Noah Sanci + + PR2711 + * run-debuginfod-find.sh: Added test case for grooming the database + using regexes. + 2021-07-08 Mark Wielaard * Makefile.am (EXTRA_DIST): Fix typo testfile-largealign.bz2 was diff --git a/tests/run-debuginfod-find.sh b/tests/run-debuginfod-find.sh index 74a5ceff0..a4c1a13a8 100755 --- a/tests/run-debuginfod-find.sh +++ b/tests/run-debuginfod-find.sh @@ -36,13 +36,14 @@ export DEBUGINFOD_CACHE_PATH=${PWD}/.client_cache PID1=0 PID2=0 PID3=0 +PID4=0 cleanup() { - if [ $PID1 -ne 0 ]; then kill $PID1 || true; wait $PID1; fi - if [ $PID2 -ne 0 ]; then kill $PID2 || true; wait $PID2; fi - if [ $PID3 -ne 0 ]; then kill $PID3 || true; wait $PID3; fi - + if [ $PID1 -ne 0 ]; then kill $PID1; wait $PID1; fi + if [ $PID2 -ne 0 ]; then kill $PID2; wait $PID2; fi + if [ $PID3 -ne 0 ]; then kill $PID3; wait $PID3; fi + if [ $PID4 -ne 0 ]; then kill $PID4; wait $PID4; fi rm -rf F R D L Z ${PWD}/foobar ${PWD}/mocktree ${PWD}/.client_cache* ${PWD}/tmp* exit_cleanup } @@ -293,7 +294,8 @@ kill -USR1 $PID1 wait_ready $PORT1 'thread_work_total{role="traverse"}' 3 wait_ready $PORT1 'thread_work_pending{role="scan"}' 0 wait_ready $PORT1 'thread_busy{role="scan"}' 0 - +cp $DB $DB.backup +tempfiles $DB.backup # Rerun same tests for the prog2 binary filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find -v debuginfo $BUILDID2 2>vlog` cmp $filename F/prog2 @@ -710,4 +712,29 @@ DEBUGINFOD_URLS="file://${PWD}/mocktree/" filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find source aaaaaaaaaabbbbbbbbbbccccccccccdddddddddd /my/path/main.c` cmp $filename ${local_dir}/main.c -exit 0 +######################################################################## +## PR27711 +# Test to ensure that the --include="^$" --exclude=".*" options remove all files from a database backup +while true; do + PORT3=`expr '(' $RANDOM % 1000 ')' + 9000` + ss -atn | fgrep ":$PORT3" || break +done +env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS="http://127.0.0.1:$PORT3/" ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -p $PORT3 -t0 -g0 --regex-groom --include="^$" --exclude=".*" -d $DB.backup > vlog$PORT3 2>&1 & +PID4=$! +wait_ready $PORT3 'ready' 1 +tempfiles vlog$PORT3 +errfiles vlog$PORT3 + +kill -USR2 $PID4 +wait_ready $PORT3 'thread_work_total{role="groom"}' 1 +wait_ready $PORT3 'groom{statistic="archive d/e"}' 0 +wait_ready $PORT3 'groom{statistic="archive sdef"}' 0 +wait_ready $PORT3 'groom{statistic="archive sref"}' 0 +wait_ready $PORT3 'groom{statistic="buildids"}' 0 +wait_ready $PORT3 'groom{statistic="file d/e"}' 0 +wait_ready $PORT3 'groom{statistic="file s"}' 0 +wait_ready $PORT3 'groom{statistic="files scanned (#)"}' 0 +wait_ready $PORT3 'groom{statistic="files scanned (mb)"}' 0 + +kill $PID4 +exit 0;