]> git.ipfire.org Git - thirdparty/bind9.git/commitdiff
Help thread sanitizer to cope with liburcu
authorTony Finch <fanf@isc.org>
Thu, 4 May 2023 14:26:13 +0000 (15:26 +0100)
committerTony Finch <fanf@isc.org>
Fri, 12 May 2023 19:48:31 +0000 (20:48 +0100)
All the places the qp-trie code was using `call_rcu()` needed
`__tsan_release()` and `__tsan_acquire()` annotations, so
add a couple of wrappers to encapsulate this pattern.

With these wrappers, the tests run almost clean under thread
sanitizer. The remaining problems are due to `rcu_barrier()`
which can be suppressed using `.tsan-suppress`. It does not
suppress the whole of `liburcu`, because we would like thread
sanitizer to detect problems in `call_rcu()` callbacks, which
are called from `liburcu`.

The CI jobs have been updated to use `.tsan-suppress` by
default, except for a special-case job that needs the
additional suppressions in `.tsan-suppress-extra`.

We might be able to get rid of some of this after liburcu gains
support for thread sanitizer.

Note: the `rcu_barrier()` suppression is not entirely effective:
tsan sometimes reports races that originate inside `rcu_barrier()`
but tsan has discarded the stack so it does not have the
information required to suppress the report. These "races" can
be made much easier to reproduce by adding `atexit_sleep_ms=1000`
to `TSAN_OPTIONS`. The problem with tsan's short memory can be
addressed by increasing `history_size`: when it is large enough
(6 or 7) the `rcu_barrier()` stack usually survives long enough
for suppression to work.

.gitlab-ci.yml
.reuse/dep5
.tsan-suppress [new file with mode: 0644]
.tsan-suppress-extra [new file with mode: 0644]
bin/tests/system/run.sh.in
lib/dns/qp.c
lib/dns/rbtdb.c
lib/isc/async.c
lib/isc/include/isc/urcu.h
lib/isc/quota.c
tsan-suppressions.txt [deleted file]

index bc8ca63f2df2a5d4a3fee97c6566381398406009..5c332adab370724bdda301fbbdcc5d33a8149582 100644 (file)
@@ -18,14 +18,20 @@ variables:
   CLANG_VERSION: 16
   CLANG: "clang-${CLANG_VERSION}"
   SCAN_BUILD: "scan-build-${CLANG_VERSION}"
-  ASAN_SYMBOLIZER_PATH: "/usr/lib/llvm-${CLANG_VERSION}/bin/llvm-symbolizer"
+  LLVM_SYMBOLIZER: "/usr/lib/llvm-${CLANG_VERSION}/bin/llvm-symbolizer"
   CLANG_FORMAT: "clang-format-${CLANG_VERSION}"
 
   CFLAGS_COMMON: -fno-omit-frame-pointer -fno-optimize-sibling-calls -O1 -g -Wall -Wextra
 
   # Pass run-time flags to AddressSanitizer to get core dumps on error.
   ASAN_OPTIONS: abort_on_error=1:disable_coredump=0:unmap_shadow_on_exit=1
-  TSAN_OPTIONS_COMMON: "disable_coredump=0 second_deadlock_stack=1 history_size=7 log_exe_name=true log_path=tsan"
+  ASAN_SYMBOLIZER_PATH: "${LLVM_SYMBOLIZER}"
+
+  TSAN_OPTIONS_COMMON: "disable_coredump=0 second_deadlock_stack=1 atexit_sleep_ms=1000 history_size=7 log_exe_name=true log_path=tsan"
+  TSAN_SYMBOLIZER: "external_symbolizer_path=/usr/bin/llvm-symbolizer"
+  TSAN_SUPPRESSIONS: "suppressions=${CI_PROJECT_DIR}/.tsan-suppress"
+  TSAN_OPTIONS_DEFAULT: "${TSAN_OPTIONS_COMMON} ${TSAN_SUPPRESSIONS} ${TSAN_SYMBOLIZER}"
+
   UBSAN_OPTIONS: "halt_on_error=1:abort_on_error=1:disable_coredump=0"
 
   TARBALL_EXTENSION: xz
@@ -1044,7 +1050,7 @@ gcc:tsan:
 
 system:gcc:tsan:
   variables:
-    TSAN_OPTIONS: "${TSAN_OPTIONS_COMMON} external_symbolizer_path=/usr/bin/llvm-symbolizer"
+    TSAN_OPTIONS: "${TSAN_OPTIONS_DEFAULT}"
   <<: *fedora_37_amd64_image
   <<: *system_test_tsan_job
   needs:
@@ -1053,7 +1059,7 @@ system:gcc:tsan:
 
 unit:gcc:tsan:
   variables:
-    TSAN_OPTIONS: "${TSAN_OPTIONS_COMMON} external_symbolizer_path=/usr/bin/llvm-symbolizer"
+    TSAN_OPTIONS: "${TSAN_OPTIONS_DEFAULT}"
   <<: *fedora_37_amd64_image
   <<: *unit_test_tsan_job
   needs:
@@ -1071,7 +1077,7 @@ clang:tsan:
 
 system:clang:tsan:
   variables:
-    TSAN_OPTIONS: "${TSAN_OPTIONS_COMMON} external_symbolizer_path=/usr/lib/llvm-${CLANG_VERSION}/bin/llvm-symbolizer"
+    TSAN_OPTIONS: "${TSAN_OPTIONS_COMMON} ${TSAN_SUPPRESSIONS} external_symbolizer_path=${LLVM_SYMBOLIZER}"
   <<: *base_image
   <<: *system_test_tsan_job
   needs:
@@ -1080,7 +1086,7 @@ system:clang:tsan:
 
 unit:clang:tsan:
   variables:
-    TSAN_OPTIONS: "${TSAN_OPTIONS_COMMON} external_symbolizer_path=/usr/lib/llvm-${CLANG_VERSION}/bin/llvm-symbolizer suppressions=$CI_PROJECT_DIR/tsan-suppressions.txt"
+    TSAN_OPTIONS: "${TSAN_OPTIONS_COMMON} suppressions=$CI_PROJECT_DIR/.tsan-suppress-extra  external_symbolizer_path=${LLVM_SYMBOLIZER}"
   <<: *base_image
   <<: *unit_test_tsan_job
   needs:
@@ -1323,7 +1329,7 @@ respdiff-short:tsan:
     LDFLAGS: "-fsanitize=thread"
     EXTRA_CONFIGURE: "--enable-pthread-rwlock --without-jemalloc"
     MAX_DISAGREEMENTS_PERCENTAGE: "0.5"
-    TSAN_OPTIONS: "${TSAN_OPTIONS_COMMON} external_symbolizer_path=/usr/bin/llvm-symbolizer"
+    TSAN_OPTIONS: "${TSAN_OPTIONS_DEFAULT}"
   script:
     - bash respdiff.sh -s named -q "${PWD}/10k_a.txt" -c 3 -w "${PWD}/rspworkdir" "${CI_PROJECT_DIR}" "/usr/local/respdiff-reference-bind/sbin/named"
   after_script:
@@ -1361,7 +1367,7 @@ respdiff-long:tsan:
     LDFLAGS: "-fsanitize=thread"
     EXTRA_CONFIGURE: "--enable-pthread-rwlock --without-jemalloc"
     MAX_DISAGREEMENTS_PERCENTAGE: "0.5"
-    TSAN_OPTIONS: "${TSAN_OPTIONS_COMMON} external_symbolizer_path=/usr/bin/llvm-symbolizer"
+    TSAN_OPTIONS: "${TSAN_OPTIONS_DEFAULT}"
   script:
     - bash respdiff.sh -s named -q "${PWD}/100k_mixed.txt" -c 3 -w "${PWD}/rspworkdir" "${CI_PROJECT_DIR}" "/usr/local/respdiff-reference-bind/sbin/named"
   after_script:
index d4072e91fd7e24467a0aee7e4dd92c21f1dacfb8..eb708299f7896629d963a3db4aed07763842c711 100644 (file)
@@ -163,11 +163,12 @@ Files: **/.clang-format
        .gitlab-ci.yml
        .lgtm.yml
        .pylintrc
+       .tsan-suppress
+       .tsan-suppress-extra
        .uncrustify.cfg
        doc/misc/*.zoneopt
        doc/misc/options
        doc/misc/rndc.grammar
-       tsan-suppressions.txt
        sonar-project.properties
 Copyright: Internet Systems Consortium, Inc. ("ISC")
 License: CC0-1.0
diff --git a/.tsan-suppress b/.tsan-suppress
new file mode 100644 (file)
index 0000000..132a18e
--- /dev/null
@@ -0,0 +1,3 @@
+# be more selective with liburcu
+race:rcu_barrier
+race:rcu_memb_barrier
diff --git a/.tsan-suppress-extra b/.tsan-suppress-extra
new file mode 100644 (file)
index 0000000..767a2cc
--- /dev/null
@@ -0,0 +1,6 @@
+# Uninstrumented libraries
+called_from_lib:libfstrm.so
+called_from_lib:libdummyrpz.so
+# be more selective with liburcu
+race:rcu_barrier
+race:rcu_memb_barrier
index 084efc47d8cf73e233eabe771c4c0ca4a940aed6..81038f1eb1d4ae3021697667e67573766c74f267 100644 (file)
@@ -89,6 +89,7 @@ if ! $do_run; then
         LOG_FLAGS="$log_flags" \
         TEST_LARGE_MAP="${TEST_LARGE_MAP}" \
         CI_ENABLE_ALL_TESTS="${CI_ENABLE_ALL_TESTS}" \
+       ${TSAN_OPTIONS:+"TSAN_OPTIONS=${TSAN_OPTIONS}"} \
        ${VIRTUAL_ENV:+"VIRTUAL_ENV=${VIRTUAL_ENV}"} \
        ${PERL5LIB:+"PERL5LIB=${PERL5LIB}"} \
         make -e check
index 05d9b76a1be5b84319061b52d0a866285c0861f8..7633c4d5aa56bd1d9bb833efabb55086668de0a2 100644 (file)
@@ -627,9 +627,8 @@ recycle(dns_qp_t *qp) {
  * asynchronous cleanup
  */
 static void
-reclaim_chunks_cb(struct rcu_head *rcu_head) {
-       qp_rcuctx_t *rcuctx = caa_container_of(rcu_head, qp_rcuctx_t, rcu_head);
-       __tsan_acquire(rcuctx);
+reclaim_chunks_cb(struct rcu_head *arg) {
+       qp_rcuctx_t *rcuctx = isc_urcu_container(arg, qp_rcuctx_t, rcu_head);
        REQUIRE(QPRCU_VALID(rcuctx));
        dns_qpmulti_t *multi = rcuctx->multi;
        REQUIRE(QPMULTI_VALID(multi));
@@ -711,8 +710,7 @@ reclaim_chunks(dns_qpmulti_t *multi) {
                }
        }
 
-       __tsan_release(rcuctx);
-       call_rcu(&rcuctx->rcu_head, reclaim_chunks_cb);
+       isc_urcu_cleanup(rcuctx, rcu_head, reclaim_chunks_cb);
 
        LOG_STATS("qp will reclaim %u chunks", count);
 }
@@ -1432,9 +1430,8 @@ dns_qp_destroy(dns_qp_t **qptp) {
 }
 
 static void
-qpmulti_destroy_cb(struct rcu_head *rcu_head) {
-       qp_rcuctx_t *rcuctx = caa_container_of(rcu_head, qp_rcuctx_t, rcu_head);
-       __tsan_acquire(rcuctx);
+qpmulti_destroy_cb(struct rcu_head *arg) {
+       qp_rcuctx_t *rcuctx = isc_urcu_container(arg, qp_rcuctx_t, rcu_head);
        REQUIRE(QPRCU_VALID(rcuctx));
        dns_qpmulti_t *multi = rcuctx->multi;
        REQUIRE(QPMULTI_VALID(multi));
@@ -1473,8 +1470,7 @@ dns_qpmulti_destroy(dns_qpmulti_t **qpmp) {
                .multi = multi,
        };
        isc_mem_attach(qp->mctx, &rcuctx->mctx);
-       __tsan_release(rcuctx);
-       call_rcu(&rcuctx->rcu_head, qpmulti_destroy_cb);
+       isc_urcu_cleanup(rcuctx, rcu_head, qpmulti_destroy_cb);
 }
 
 /***********************************************************************
index c94f935d55998d0c7fe0358a69349838035bf916..2bc92f38359ce7cb19d0f6fc0a30895e629c356c 100644 (file)
@@ -9583,8 +9583,8 @@ new_gluelist(isc_mem_t *mctx, dns_name_t *name) {
 
 static void
 free_gluelist_rcu(struct rcu_head *rcu_head) {
-       rbtdb_glue_t *glue = caa_container_of(rcu_head, rbtdb_glue_t, rcu_head);
-       __tsan_acquire(glue);
+       rbtdb_glue_t *glue = isc_urcu_container(rcu_head, rbtdb_glue_t,
+                                               rcu_head);
 
        free_gluelist(glue);
 }
@@ -9600,8 +9600,7 @@ free_gluetable(rbtdb_version_t *rbtversion) {
                        caa_container_of(node, rdatasetheader_t, wfs_node);
                rbtdb_glue_t *glue = rcu_xchg_pointer(&header->glue_list, NULL);
 
-               __tsan_release(glue);
-               call_rcu(&glue->rcu_head, free_gluelist_rcu);
+               isc_urcu_cleanup(glue, rcu_head, free_gluelist_rcu);
        }
        rcu_read_unlock();
 }
index db4388964e4573c9311f6739ee8d66c345d77cda..1afa2a3027af3264279cd6b149b32b5d39692bff 100644 (file)
@@ -107,8 +107,7 @@ isc__async_cb(uv_async_t *handle) {
         */
        struct cds_wfcq_node *node, *next;
        __cds_wfcq_for_each_blocking_safe(&jobs.head, &jobs.tail, node, next) {
-               isc_job_t *job = caa_container_of(node, isc_job_t, wfcq_node);
-               __tsan_acquire(job);
+               isc_job_t *job = isc_urcu_container(node, isc_job_t, wfcq_node);
 
                job->cb(job->cbarg);
 
index b43746ce6324f6cf269bd40b8b2c92d1f2ac22cd..0b828d92e6b6b0ac198f236388a973f88aca2e8c 100644 (file)
 
 #pragma GCC diagnostic pop
 
+/*
+ * Help thread sanitizer to understand `call_rcu()`:
+ *
+ * The `rcu_head` argument to `call_rcu()` is expected to be embedded
+ * in a structure defined by the caller, which is named `_rcuctx` in
+ * these macros. The callback function uses `caa_container_of()` to
+ * recover the `rcuctx` pointer from the `_rcu_head` pointer that is
+ * passed to the callback.
+ *
+ * We explain the ordering dependency to tsan by releasing `_rcuctx`
+ * pointer before `call_rcu()` and acquiring it in the callback
+ * funtion. We pass the outer `_rcuctx` pointer to the `__tsan_`
+ * barriers, because it should match a pointer that is known by tsan
+ * to have been returned by `malloc()`.
+ */
+
+#define isc_urcu_cleanup(ptr, member, func)     \
+       {                                       \
+               __tsan_release(ptr);            \
+               call_rcu(&(ptr)->member, func); \
+       }
+
+#define isc_urcu_container(ptr, type, member)                     \
+       ({                                                        \
+               type *_ptr = caa_container_of(ptr, type, member); \
+               __tsan_acquire(_ptr);                             \
+               _ptr;                                             \
+       })
+
 #if defined(RCU_QSBR)
 
 /*
index 4816333e1cc476c744591ebe4f628ebd92a469fd..5a05aa439e48d3d85046a933e1c0d39f1b3947f7 100644 (file)
@@ -79,8 +79,7 @@ isc_quota_release(isc_quota_t *quota) {
                return;
        }
 
-       isc_job_t *job = caa_container_of(node, isc_job_t, wfcq_node);
-       __tsan_acquire(job);
+       isc_job_t *job = isc_urcu_container(node, isc_job_t, wfcq_node);
        job->cb(job->cbarg);
 }
 
diff --git a/tsan-suppressions.txt b/tsan-suppressions.txt
deleted file mode 100644 (file)
index 217d77c..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-# Uninstrumented library.
-called_from_lib:libfstrm.so
-called_from_lib:libdummyrpz.so