From: Tony Finch Date: Thu, 4 May 2023 14:26:13 +0000 (+0100) Subject: Help thread sanitizer to cope with liburcu X-Git-Tag: v9.19.14~52^2~3 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=c377e0a9e3dd5dd087a2fb42368aaac5c82268e6;p=thirdparty%2Fbind9.git Help thread sanitizer to cope with liburcu All the places the qp-trie code was using `call_rcu()` needed `__tsan_release()` and `__tsan_acquire()` annotations, so add a couple of wrappers to encapsulate this pattern. With these wrappers, the tests run almost clean under thread sanitizer. The remaining problems are due to `rcu_barrier()` which can be suppressed using `.tsan-suppress`. It does not suppress the whole of `liburcu`, because we would like thread sanitizer to detect problems in `call_rcu()` callbacks, which are called from `liburcu`. The CI jobs have been updated to use `.tsan-suppress` by default, except for a special-case job that needs the additional suppressions in `.tsan-suppress-extra`. We might be able to get rid of some of this after liburcu gains support for thread sanitizer. Note: the `rcu_barrier()` suppression is not entirely effective: tsan sometimes reports races that originate inside `rcu_barrier()` but tsan has discarded the stack so it does not have the information required to suppress the report. These "races" can be made much easier to reproduce by adding `atexit_sleep_ms=1000` to `TSAN_OPTIONS`. The problem with tsan's short memory can be addressed by increasing `history_size`: when it is large enough (6 or 7) the `rcu_barrier()` stack usually survives long enough for suppression to work. --- diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index bc8ca63f2df..5c332adab37 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -18,14 +18,20 @@ variables: CLANG_VERSION: 16 CLANG: "clang-${CLANG_VERSION}" SCAN_BUILD: "scan-build-${CLANG_VERSION}" - ASAN_SYMBOLIZER_PATH: "/usr/lib/llvm-${CLANG_VERSION}/bin/llvm-symbolizer" + LLVM_SYMBOLIZER: "/usr/lib/llvm-${CLANG_VERSION}/bin/llvm-symbolizer" CLANG_FORMAT: "clang-format-${CLANG_VERSION}" CFLAGS_COMMON: -fno-omit-frame-pointer -fno-optimize-sibling-calls -O1 -g -Wall -Wextra # Pass run-time flags to AddressSanitizer to get core dumps on error. ASAN_OPTIONS: abort_on_error=1:disable_coredump=0:unmap_shadow_on_exit=1 - TSAN_OPTIONS_COMMON: "disable_coredump=0 second_deadlock_stack=1 history_size=7 log_exe_name=true log_path=tsan" + ASAN_SYMBOLIZER_PATH: "${LLVM_SYMBOLIZER}" + + TSAN_OPTIONS_COMMON: "disable_coredump=0 second_deadlock_stack=1 atexit_sleep_ms=1000 history_size=7 log_exe_name=true log_path=tsan" + TSAN_SYMBOLIZER: "external_symbolizer_path=/usr/bin/llvm-symbolizer" + TSAN_SUPPRESSIONS: "suppressions=${CI_PROJECT_DIR}/.tsan-suppress" + TSAN_OPTIONS_DEFAULT: "${TSAN_OPTIONS_COMMON} ${TSAN_SUPPRESSIONS} ${TSAN_SYMBOLIZER}" + UBSAN_OPTIONS: "halt_on_error=1:abort_on_error=1:disable_coredump=0" TARBALL_EXTENSION: xz @@ -1044,7 +1050,7 @@ gcc:tsan: system:gcc:tsan: variables: - TSAN_OPTIONS: "${TSAN_OPTIONS_COMMON} external_symbolizer_path=/usr/bin/llvm-symbolizer" + TSAN_OPTIONS: "${TSAN_OPTIONS_DEFAULT}" <<: *fedora_37_amd64_image <<: *system_test_tsan_job needs: @@ -1053,7 +1059,7 @@ system:gcc:tsan: unit:gcc:tsan: variables: - TSAN_OPTIONS: "${TSAN_OPTIONS_COMMON} external_symbolizer_path=/usr/bin/llvm-symbolizer" + TSAN_OPTIONS: "${TSAN_OPTIONS_DEFAULT}" <<: *fedora_37_amd64_image <<: *unit_test_tsan_job needs: @@ -1071,7 +1077,7 @@ clang:tsan: system:clang:tsan: variables: - TSAN_OPTIONS: "${TSAN_OPTIONS_COMMON} external_symbolizer_path=/usr/lib/llvm-${CLANG_VERSION}/bin/llvm-symbolizer" + TSAN_OPTIONS: "${TSAN_OPTIONS_COMMON} ${TSAN_SUPPRESSIONS} external_symbolizer_path=${LLVM_SYMBOLIZER}" <<: *base_image <<: *system_test_tsan_job needs: @@ -1080,7 +1086,7 @@ system:clang:tsan: unit:clang:tsan: variables: - TSAN_OPTIONS: "${TSAN_OPTIONS_COMMON} external_symbolizer_path=/usr/lib/llvm-${CLANG_VERSION}/bin/llvm-symbolizer suppressions=$CI_PROJECT_DIR/tsan-suppressions.txt" + TSAN_OPTIONS: "${TSAN_OPTIONS_COMMON} suppressions=$CI_PROJECT_DIR/.tsan-suppress-extra external_symbolizer_path=${LLVM_SYMBOLIZER}" <<: *base_image <<: *unit_test_tsan_job needs: @@ -1323,7 +1329,7 @@ respdiff-short:tsan: LDFLAGS: "-fsanitize=thread" EXTRA_CONFIGURE: "--enable-pthread-rwlock --without-jemalloc" MAX_DISAGREEMENTS_PERCENTAGE: "0.5" - TSAN_OPTIONS: "${TSAN_OPTIONS_COMMON} external_symbolizer_path=/usr/bin/llvm-symbolizer" + TSAN_OPTIONS: "${TSAN_OPTIONS_DEFAULT}" script: - bash respdiff.sh -s named -q "${PWD}/10k_a.txt" -c 3 -w "${PWD}/rspworkdir" "${CI_PROJECT_DIR}" "/usr/local/respdiff-reference-bind/sbin/named" after_script: @@ -1361,7 +1367,7 @@ respdiff-long:tsan: LDFLAGS: "-fsanitize=thread" EXTRA_CONFIGURE: "--enable-pthread-rwlock --without-jemalloc" MAX_DISAGREEMENTS_PERCENTAGE: "0.5" - TSAN_OPTIONS: "${TSAN_OPTIONS_COMMON} external_symbolizer_path=/usr/bin/llvm-symbolizer" + TSAN_OPTIONS: "${TSAN_OPTIONS_DEFAULT}" script: - bash respdiff.sh -s named -q "${PWD}/100k_mixed.txt" -c 3 -w "${PWD}/rspworkdir" "${CI_PROJECT_DIR}" "/usr/local/respdiff-reference-bind/sbin/named" after_script: diff --git a/.reuse/dep5 b/.reuse/dep5 index d4072e91fd7..eb708299f78 100644 --- a/.reuse/dep5 +++ b/.reuse/dep5 @@ -163,11 +163,12 @@ Files: **/.clang-format .gitlab-ci.yml .lgtm.yml .pylintrc + .tsan-suppress + .tsan-suppress-extra .uncrustify.cfg doc/misc/*.zoneopt doc/misc/options doc/misc/rndc.grammar - tsan-suppressions.txt sonar-project.properties Copyright: Internet Systems Consortium, Inc. ("ISC") License: CC0-1.0 diff --git a/.tsan-suppress b/.tsan-suppress new file mode 100644 index 00000000000..132a18e5c6f --- /dev/null +++ b/.tsan-suppress @@ -0,0 +1,3 @@ +# be more selective with liburcu +race:rcu_barrier +race:rcu_memb_barrier diff --git a/.tsan-suppress-extra b/.tsan-suppress-extra new file mode 100644 index 00000000000..767a2cc8e1c --- /dev/null +++ b/.tsan-suppress-extra @@ -0,0 +1,6 @@ +# Uninstrumented libraries +called_from_lib:libfstrm.so +called_from_lib:libdummyrpz.so +# be more selective with liburcu +race:rcu_barrier +race:rcu_memb_barrier diff --git a/bin/tests/system/run.sh.in b/bin/tests/system/run.sh.in index 084efc47d8c..81038f1eb1d 100644 --- a/bin/tests/system/run.sh.in +++ b/bin/tests/system/run.sh.in @@ -89,6 +89,7 @@ if ! $do_run; then LOG_FLAGS="$log_flags" \ TEST_LARGE_MAP="${TEST_LARGE_MAP}" \ CI_ENABLE_ALL_TESTS="${CI_ENABLE_ALL_TESTS}" \ + ${TSAN_OPTIONS:+"TSAN_OPTIONS=${TSAN_OPTIONS}"} \ ${VIRTUAL_ENV:+"VIRTUAL_ENV=${VIRTUAL_ENV}"} \ ${PERL5LIB:+"PERL5LIB=${PERL5LIB}"} \ make -e check diff --git a/lib/dns/qp.c b/lib/dns/qp.c index 05d9b76a1be..7633c4d5aa5 100644 --- a/lib/dns/qp.c +++ b/lib/dns/qp.c @@ -627,9 +627,8 @@ recycle(dns_qp_t *qp) { * asynchronous cleanup */ static void -reclaim_chunks_cb(struct rcu_head *rcu_head) { - qp_rcuctx_t *rcuctx = caa_container_of(rcu_head, qp_rcuctx_t, rcu_head); - __tsan_acquire(rcuctx); +reclaim_chunks_cb(struct rcu_head *arg) { + qp_rcuctx_t *rcuctx = isc_urcu_container(arg, qp_rcuctx_t, rcu_head); REQUIRE(QPRCU_VALID(rcuctx)); dns_qpmulti_t *multi = rcuctx->multi; REQUIRE(QPMULTI_VALID(multi)); @@ -711,8 +710,7 @@ reclaim_chunks(dns_qpmulti_t *multi) { } } - __tsan_release(rcuctx); - call_rcu(&rcuctx->rcu_head, reclaim_chunks_cb); + isc_urcu_cleanup(rcuctx, rcu_head, reclaim_chunks_cb); LOG_STATS("qp will reclaim %u chunks", count); } @@ -1432,9 +1430,8 @@ dns_qp_destroy(dns_qp_t **qptp) { } static void -qpmulti_destroy_cb(struct rcu_head *rcu_head) { - qp_rcuctx_t *rcuctx = caa_container_of(rcu_head, qp_rcuctx_t, rcu_head); - __tsan_acquire(rcuctx); +qpmulti_destroy_cb(struct rcu_head *arg) { + qp_rcuctx_t *rcuctx = isc_urcu_container(arg, qp_rcuctx_t, rcu_head); REQUIRE(QPRCU_VALID(rcuctx)); dns_qpmulti_t *multi = rcuctx->multi; REQUIRE(QPMULTI_VALID(multi)); @@ -1473,8 +1470,7 @@ dns_qpmulti_destroy(dns_qpmulti_t **qpmp) { .multi = multi, }; isc_mem_attach(qp->mctx, &rcuctx->mctx); - __tsan_release(rcuctx); - call_rcu(&rcuctx->rcu_head, qpmulti_destroy_cb); + isc_urcu_cleanup(rcuctx, rcu_head, qpmulti_destroy_cb); } /*********************************************************************** diff --git a/lib/dns/rbtdb.c b/lib/dns/rbtdb.c index c94f935d559..2bc92f38359 100644 --- a/lib/dns/rbtdb.c +++ b/lib/dns/rbtdb.c @@ -9583,8 +9583,8 @@ new_gluelist(isc_mem_t *mctx, dns_name_t *name) { static void free_gluelist_rcu(struct rcu_head *rcu_head) { - rbtdb_glue_t *glue = caa_container_of(rcu_head, rbtdb_glue_t, rcu_head); - __tsan_acquire(glue); + rbtdb_glue_t *glue = isc_urcu_container(rcu_head, rbtdb_glue_t, + rcu_head); free_gluelist(glue); } @@ -9600,8 +9600,7 @@ free_gluetable(rbtdb_version_t *rbtversion) { caa_container_of(node, rdatasetheader_t, wfs_node); rbtdb_glue_t *glue = rcu_xchg_pointer(&header->glue_list, NULL); - __tsan_release(glue); - call_rcu(&glue->rcu_head, free_gluelist_rcu); + isc_urcu_cleanup(glue, rcu_head, free_gluelist_rcu); } rcu_read_unlock(); } diff --git a/lib/isc/async.c b/lib/isc/async.c index db4388964e4..1afa2a3027a 100644 --- a/lib/isc/async.c +++ b/lib/isc/async.c @@ -107,8 +107,7 @@ isc__async_cb(uv_async_t *handle) { */ struct cds_wfcq_node *node, *next; __cds_wfcq_for_each_blocking_safe(&jobs.head, &jobs.tail, node, next) { - isc_job_t *job = caa_container_of(node, isc_job_t, wfcq_node); - __tsan_acquire(job); + isc_job_t *job = isc_urcu_container(node, isc_job_t, wfcq_node); job->cb(job->cbarg); diff --git a/lib/isc/include/isc/urcu.h b/lib/isc/include/isc/urcu.h index b43746ce632..0b828d92e6b 100644 --- a/lib/isc/include/isc/urcu.h +++ b/lib/isc/include/isc/urcu.h @@ -39,6 +39,35 @@ #pragma GCC diagnostic pop +/* + * Help thread sanitizer to understand `call_rcu()`: + * + * The `rcu_head` argument to `call_rcu()` is expected to be embedded + * in a structure defined by the caller, which is named `_rcuctx` in + * these macros. The callback function uses `caa_container_of()` to + * recover the `rcuctx` pointer from the `_rcu_head` pointer that is + * passed to the callback. + * + * We explain the ordering dependency to tsan by releasing `_rcuctx` + * pointer before `call_rcu()` and acquiring it in the callback + * funtion. We pass the outer `_rcuctx` pointer to the `__tsan_` + * barriers, because it should match a pointer that is known by tsan + * to have been returned by `malloc()`. + */ + +#define isc_urcu_cleanup(ptr, member, func) \ + { \ + __tsan_release(ptr); \ + call_rcu(&(ptr)->member, func); \ + } + +#define isc_urcu_container(ptr, type, member) \ + ({ \ + type *_ptr = caa_container_of(ptr, type, member); \ + __tsan_acquire(_ptr); \ + _ptr; \ + }) + #if defined(RCU_QSBR) /* diff --git a/lib/isc/quota.c b/lib/isc/quota.c index 4816333e1cc..5a05aa439e4 100644 --- a/lib/isc/quota.c +++ b/lib/isc/quota.c @@ -79,8 +79,7 @@ isc_quota_release(isc_quota_t *quota) { return; } - isc_job_t *job = caa_container_of(node, isc_job_t, wfcq_node); - __tsan_acquire(job); + isc_job_t *job = isc_urcu_container(node, isc_job_t, wfcq_node); job->cb(job->cbarg); } diff --git a/tsan-suppressions.txt b/tsan-suppressions.txt deleted file mode 100644 index 217d77c3883..00000000000 --- a/tsan-suppressions.txt +++ /dev/null @@ -1,3 +0,0 @@ -# Uninstrumented library. -called_from_lib:libfstrm.so -called_from_lib:libdummyrpz.so