]> git.ipfire.org Git - thirdparty/bind9.git/commitdiff
Add DoH and DoT stress tests, generate test configurations
authorMichal Nowak <mnowak@isc.org>
Mon, 19 Feb 2024 14:55:00 +0000 (15:55 +0100)
committerMichal Nowak <mnowak@isc.org>
Mon, 27 Jan 2025 15:17:39 +0000 (16:17 +0100)
Add DoH and DoT stress test jobs. The DoH scenario on FreeBSD is omitted
because all Flamethrower's DoH queries timeout on this platform.

Since the response rate of DoT queries is lower than that of DoH and
TCP, the expected TCP response rate is 80%.

Due to the large number of similar stress test configurations, the
"util/generate-stress-test-configs.py" script now generates them as part
of a downstream pipeline. The script is expected to be run exclusively
within the CI environment, which sources all environmental variables and
files.

This refactoring brought the following changes:

- To start a stress test immediately and not wait for artifacts of the
  autoreconf job, run the "autoreconf -fi" command as part of every job.

- Drop the BIND_STRESS_TEST_* variables as they were rarely used and
  conflicted with mode and platform selection in the configuration
  generator.

- Most pipelines now include a few short, randomly selected stress test
  jobs. To schedule all stress tests, set the ALL_BIND_STRESS_TESTS
  environmental variable, push a tag to CI, or run a scheduled pipeline.

- Set the BIND_STRESS_TESTS_RUN_TIME environmental variable to pick the
  stress test runtime of your choosing, set the BIND_STRESS_TESTS_RATE
  environmental variable to set different than the default query rate.

- Job timeout is set to 30 minutes plus stress test runtime in minutes.

.gitlab-ci.yml
util/generate-stress-test-configs.py [new file with mode: 0755]

index dbfb9dffed4bda0e94a49ff13e01b51ec6f89ed6..8ab95af2956a182c248d888de55cfd2756a32fd0 100644 (file)
@@ -51,10 +51,6 @@ variables:
   # cross-testrun files as there is no need to use that feature in CI.
   PYTEST_ADDOPTS: "-p no:cacheprovider"
 
-  # Default platforms to run "stress" tests on
-  BIND_STRESS_TEST_OS: linux
-  BIND_STRESS_TEST_ARCH: amd64
-
   HYPOTHESIS_PROFILE: "ci"
 
 default:
@@ -1699,278 +1695,28 @@ shotgun:doh-get:
 
 .stress-test: &stress_test
   stage: performance
-  script:
-    - *configure
-    - *setup_interfaces
-    - make -j${BUILD_PARALLEL_JOBS:-1} -k all V=1
-    - make DESTDIR="${INSTALL_PATH}" install
-    - git clone --depth 1 https://gitlab.isc.org/isc-projects/bind9-qa.git
-    - cd bind9-qa/stress
-    - LD_LIBRARY_PATH="${INSTALL_PATH}/usr/local/lib" BIND_INSTALL_PATH="${INSTALL_PATH}/usr/local" WORKSPACE="${CI_PROJECT_DIR}" bash stress.sh
-  needs:
-    - job: autoreconf
-      artifacts: true
 
-.stress-test-long: &stress_test_long_job
-  <<: *stress_test
-  artifacts:
-    untracked: true
-    exclude:
-      - "output/ns4/*.dtq*"
-      - "output/ns4/large-delta-rpz*.local"
-      - "output/rpz_*"
-    expire_in: "1 week"
-    when: always
-  timeout: 2h
-
-.stress-test-short: &stress_test_short_job
-  <<: *stress_test
-  only:
-    - merge_requests
+generate-stress-test-configs:
+  <<: *base_image
+  <<: *default_triggering_rules
+  stage: precheck
+  script:
+    - util/generate-stress-test-configs.py > stress-test-configs.yml
   artifacts:
-    untracked: true
-    exclude:
-      - "output/ns4/*.dtq*"
-      - "output/ns4/large-delta-rpz*.local"
-      - "output/rpz_*"
-    when: always
-
-stress:short:authoritative:fedora:41:amd64:
-  <<: *fedora_41_amd64_image
-  <<: *linux_amd64
-  <<: *stress_test_short_job
-  variables:
-    CC: gcc
-    CFLAGS: "${CFLAGS_COMMON} -Og"
-    FLAME: /usr/bin/flame
-    MODE: authoritative
-    RATE: 10000
-    RUN_TIME: 15
-
-stress:short:recursive:fedora:41:amd64:
-  <<: *fedora_41_amd64_image
-  <<: *linux_amd64
-  <<: *stress_test_short_job
-  variables:
-    CC: gcc
-    CFLAGS: "${CFLAGS_COMMON} -Og"
-    FLAME: /usr/bin/flame
-    MODE: recursive
-    RATE: 10000
-    RUN_TIME: 15
-
-stress:short:rpz:fedora:41:amd64:
-  <<: *fedora_41_amd64_image
-  <<: *linux_amd64
-  <<: *stress_test_short_job
-  variables:
-    CC: gcc
-    CFLAGS: "${CFLAGS_COMMON} -Og"
-    FLAME: /usr/bin/flame
-    MODE: rpz
-    RATE: 1500
-    RUN_TIME: 15
-
-stress:short:authoritative:fedora:41:arm64:
-  <<: *fedora_41_arm64_image
-  <<: *linux_arm64
-  <<: *stress_test_short_job
-  variables:
-    CC: gcc
-    CFLAGS: "${CFLAGS_COMMON} -Og"
-    FLAME: /usr/bin/flame
-    MODE: authoritative
-    RATE: 10000
-    RUN_TIME: 15
-
-stress:short:recursive:fedora:41:arm64:
-  <<: *fedora_41_arm64_image
-  <<: *linux_arm64
-  <<: *stress_test_short_job
-  variables:
-    CC: gcc
-    CFLAGS: "${CFLAGS_COMMON} -Og"
-    FLAME: /usr/bin/flame
-    MODE: recursive
-    RATE: 10000
-    RUN_TIME: 15
-
-stress:short:rpz:fedora:41:arm64:
-  <<: *fedora_41_arm64_image
-  <<: *linux_arm64
-  <<: *stress_test_short_job
-  variables:
-    CC: gcc
-    CFLAGS: "${CFLAGS_COMMON} -Og"
-    FLAME: /usr/bin/flame
-    MODE: rpz
-    RATE: 1500
-    RUN_TIME: 15
-
-stress:short:authoritative:freebsd13:amd64:
-  <<: *freebsd_stress_amd64
-  <<: *stress_test_short_job
-  variables:
-    CC: clang
-    CFLAGS: "${CFLAGS_COMMON} -Og"
-    FLAME: /usr/local/bin/flame
-    MODE: authoritative
-    RATE: 10000
-    RUN_TIME: 15
-
-stress:short:recursive:freebsd13:amd64:
-  <<: *freebsd_stress_amd64
-  <<: *stress_test_short_job
-  variables:
-    CC: clang
-    CFLAGS: "${CFLAGS_COMMON} -Og"
-    FLAME: /usr/local/bin/flame
-    MODE: recursive
-    RATE: 10000
-    RUN_TIME: 15
-
-stress:short:rpz:freebsd13:amd64:
-  <<: *freebsd_stress_amd64
-  <<: *stress_test_short_job
-  variables:
-    CC: clang
-    CFLAGS: "${CFLAGS_COMMON} -Og"
-    FLAME: /usr/local/bin/flame
-    MODE: rpz
-    RATE: 1500
-    RUN_TIME: 15
-
-stress:authoritative:fedora:41:amd64:
-  <<: *fedora_41_amd64_image
-  <<: *linux_amd64
-  <<: *stress_test_long_job
-  variables:
-    CC: gcc
-    CFLAGS: "${CFLAGS_COMMON} -Og"
-    FLAME: /usr/bin/flame
-    MODE: authoritative
-    RATE: 10000
-    RUN_TIME: 60
-  only:
-    variables:
-      - $CI_COMMIT_TAG || ($BIND_STRESS_TEST_OS =~ /linux/i && $BIND_STRESS_TEST_MODE =~ /authoritative/i && $BIND_STRESS_TEST_ARCH =~ /amd64/i)
-
-stress:recursive:fedora:41:amd64:
-  <<: *fedora_41_amd64_image
-  <<: *linux_amd64
-  <<: *stress_test_long_job
-  variables:
-    CC: gcc
-    CFLAGS: "${CFLAGS_COMMON} -Og"
-    FLAME: /usr/bin/flame
-    MODE: recursive
-    RATE: 10000
-    RUN_TIME: 60
-  only:
-    variables:
-      - $CI_COMMIT_TAG || ($BIND_STRESS_TEST_OS =~ /linux/i && $BIND_STRESS_TEST_MODE =~ /recursive/i && $BIND_STRESS_TEST_ARCH =~ /amd64/i)
-
-stress:rpz:fedora:41:amd64:
-  <<: *fedora_41_amd64_image
-  <<: *linux_amd64
-  <<: *stress_test_long_job
-  variables:
-    CC: gcc
-    CFLAGS: "${CFLAGS_COMMON} -Og"
-    FLAME: /usr/bin/flame
-    MODE: rpz
-    RATE: 1500
-    RUN_TIME: 60
-  only:
-    variables:
-      - $CI_COMMIT_TAG || ($BIND_STRESS_TEST_OS =~ /linux/i && $BIND_STRESS_TEST_MODE =~ /rpz/i && $BIND_STRESS_TEST_ARCH =~ /amd64/i)
-
-stress:authoritative:fedora:41:arm64:
-  <<: *fedora_41_arm64_image
-  <<: *linux_arm64
-  <<: *stress_test_long_job
-  variables:
-    CC: gcc
-    CFLAGS: "${CFLAGS_COMMON} -Og"
-    FLAME: /usr/bin/flame
-    MODE: authoritative
-    RATE: 10000
-    RUN_TIME: 60
-  only:
-    variables:
-      - $CI_COMMIT_TAG || ($BIND_STRESS_TEST_OS =~ /linux/i && $BIND_STRESS_TEST_MODE =~ /authoritative/i && $BIND_STRESS_TEST_ARCH =~ /arm64/i)
-
-stress:recursive:fedora:41:arm64:
-  <<: *fedora_41_arm64_image
-  <<: *linux_arm64
-  <<: *stress_test_long_job
-  variables:
-    CC: gcc
-    CFLAGS: "${CFLAGS_COMMON} -Og"
-    FLAME: /usr/bin/flame
-    MODE: recursive
-    RATE: 10000
-    RUN_TIME: 60
-  only:
-    variables:
-      - $CI_COMMIT_TAG || ($BIND_STRESS_TEST_OS =~ /linux/i && $BIND_STRESS_TEST_MODE =~ /recursive/i && $BIND_STRESS_TEST_ARCH =~ /arm64/i)
-
-stress:rpz:fedora:41:arm64:
-  <<: *fedora_41_arm64_image
-  <<: *linux_arm64
-  <<: *stress_test_long_job
-  variables:
-    CC: gcc
-    CFLAGS: "${CFLAGS_COMMON} -Og"
-    FLAME: /usr/bin/flame
-    MODE: rpz
-    RATE: 1500
-    RUN_TIME: 60
-  only:
-    variables:
-      - $CI_COMMIT_TAG || ($BIND_STRESS_TEST_OS =~ /linux/i && $BIND_STRESS_TEST_MODE =~ /rpz/i && $BIND_STRESS_TEST_ARCH =~ /arm64/i)
-
-stress:authoritative:freebsd13:amd64:
-  <<: *freebsd_stress_amd64
-  <<: *stress_test_long_job
-  variables:
-    CC: clang
-    CFLAGS: "${CFLAGS_COMMON} -Og"
-    FLAME: /usr/local/bin/flame
-    MODE: authoritative
-    RATE: 10000
-    RUN_TIME: 60
-  only:
-    variables:
-      - $CI_COMMIT_TAG || ($BIND_STRESS_TEST_OS =~ /freebsd/i && $BIND_STRESS_TEST_MODE =~ /authoritative/i && $BIND_STRESS_TEST_ARCH =~ /amd64/i)
-
-stress:recursive:freebsd13:amd64:
-  <<: *freebsd_stress_amd64
-  <<: *stress_test_long_job
-  variables:
-    CC: clang
-    CFLAGS: "${CFLAGS_COMMON} -Og"
-    FLAME: /usr/local/bin/flame
-    MODE: recursive
-    RATE: 10000
-    RUN_TIME: 60
-  only:
-    variables:
-      - $CI_COMMIT_TAG || ($BIND_STRESS_TEST_OS =~ /freebsd/i && $BIND_STRESS_TEST_MODE =~ /recursive/i && $BIND_STRESS_TEST_ARCH =~ /amd64/i)
+    paths:
+      - stress-test-configs.yml
+  needs: []
 
-stress:rpz:freebsd13:amd64:
-  <<: *freebsd_stress_amd64
-  <<: *stress_test_long_job
-  variables:
-    CC: clang
-    CFLAGS: "${CFLAGS_COMMON} -Og"
-    FLAME: /usr/local/bin/flame
-    MODE: rpz
-    RATE: 1500
-    RUN_TIME: 60
-  only:
-    variables:
-      - $CI_COMMIT_TAG || ($BIND_STRESS_TEST_OS =~ /freebsd/i && $BIND_STRESS_TEST_MODE =~ /rpz/i && $BIND_STRESS_TEST_ARCH =~ /amd64/i)
+stress-test-child-pipeline:
+  <<: *default_triggering_rules
+  stage: performance
+  trigger:
+    include:
+      - artifact: stress-test-configs.yml
+        job: generate-stress-test-configs
+  needs:
+    - job: generate-stress-test-configs
+      artifacts: true
 
 # git fsck operates over the whole repository and is sufficient to schedule it
 # only in one branch, preferably "main". GitLab's clone strategy prevents us
diff --git a/util/generate-stress-test-configs.py b/util/generate-stress-test-configs.py
new file mode 100755 (executable)
index 0000000..55b27c9
--- /dev/null
@@ -0,0 +1,139 @@
+#!/usr/bin/env python3
+
+# Copyright (C) Internet Systems Consortium, Inc. ("ISC")
+#
+# SPDX-License-Identifier: MPL-2.0
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0.  If a copy of the MPL was not distributed with this
+# file, you can obtain one at https://mozilla.org/MPL/2.0/.
+#
+# See the COPYRIGHT file distributed with this work for additional
+# information regarding copyright ownership.
+
+import itertools
+import os
+import random
+
+import yaml
+
+with open(".gitlab-ci.yml", encoding="utf-8") as gitlab_ci_yml:
+    anchors = yaml.load(gitlab_ci_yml, Loader=yaml.Loader)
+
+# Mandatory environment variables
+ci_pipeline_source = os.environ["CI_PIPELINE_SOURCE"]
+install_path = os.environ["INSTALL_PATH"]
+project_directory = os.environ["CI_PROJECT_DIR"]
+
+# Optional environment variables
+all_bind_stress_tests = os.getenv("ALL_BIND_STRESS_TESTS")
+build_parallel_jobs = os.getenv("BUILD_PARALLEL_JOBS", "1")
+cflags_common = os.getenv("CFLAGS_COMMON", "")
+ci_commit_tag = os.getenv("CI_COMMIT_TAG")
+
+# Optional overrides for default test parameters
+env_traffic_rate = os.getenv("BIND_STRESS_TESTS_RATE")
+env_run_time = os.getenv("BIND_STRESS_TESTS_RUN_TIME")
+
+# Tags and scheduled pipelines produce longer jobs.
+if ci_commit_tag or ci_pipeline_source == "schedule":
+    all_bind_stress_tests = True
+    scenario = "long"
+    default_runtime = 60
+    expire_in = "1 week"
+else:
+    scenario = "short"
+    default_runtime = 15
+    expire_in = "1 day"
+
+ALL_MODES = "recursive", "authoritative", "rpz"
+ALL_PROTOCOLS = "tcp", "doh", "dot"
+ALL_PLATFORMS = ".fedora-41-amd64", ".fedora-41-arm64", ".freebsd-stress-amd64"
+
+# If ALL_BIND_STRESS_TESTS and CI_COMMIT_TAG environmental variables are unset,
+# pick only two of three items from "modes", "protocols", and "machines" to make
+# the "modes x protocols x machines" matrix smaller.
+if all_bind_stress_tests is None and ci_commit_tag is None:
+    modes = random.sample(ALL_MODES, k=2)
+    protocols = random.sample(ALL_PROTOCOLS, k=2)
+    platforms = random.sample(ALL_PLATFORMS, k=2)
+else:
+    modes = ALL_MODES
+    protocols = ALL_PROTOCOLS
+    platforms = ALL_PLATFORMS
+
+jobs = {}
+
+for mode, protocol, platform in itertools.product(modes, protocols, platforms):
+    if "freebsd" in platform:
+        # Flamethrower-produced DoH queries on FreeBSD always timeout. Skip
+        # DoH-on-FreeBSD jobs.
+        if protocol == "doh":
+            continue
+        job_platform = "freebsd:amd64"
+        compiler_binary = "clang"
+        flame_binary = "/usr/local/bin/flame"
+    else:
+        if "amd64" in platform:
+            job_platform = "linux:amd64"
+        else:
+            job_platform = "linux:arm64"
+        compiler_binary = "gcc"
+        flame_binary = "/usr/bin/flame"
+
+    if mode == "rpz":
+        default_traffic_rate = 1500
+    else:
+        default_traffic_rate = 10000
+
+    traffic_rate = int(env_traffic_rate or default_traffic_rate)
+    runtime = int(env_run_time or default_runtime)
+
+    expected_tcp_response_rate = 80 if protocol == "dot" else 90
+
+    job_definition = {
+        "stage": "test",
+        "variables": {
+            "CC": compiler_binary,
+            "CFLAGS": f"{cflags_common} -Og",
+            "EXPECTED_TCP_RESPONSE_RATE": expected_tcp_response_rate,
+            "FLAME": flame_binary,
+            "MODE": mode,
+            "PROTOCOL": f"{protocol} udp",
+            "RATE": traffic_rate,
+            "RUN_TIME": runtime,
+        },
+        "script": [
+            "autoreconf -fi",
+            *anchors[".configure"],
+            *anchors[".setup_interfaces"],
+            f"make -j{build_parallel_jobs} -k all V=1",
+            f'make DESTDIR="{install_path}" install',
+            "git clone --depth 1 https://gitlab.isc.org/isc-projects/bind9-qa.git",
+            "cd bind9-qa/stress",
+            f'export LD_LIBRARY_PATH="{install_path}/usr/local/lib"',
+            f'export BIND_INSTALL_PATH="{install_path}/usr/local"',
+            f'export WORKSPACE="{project_directory}"',
+            "bash stress.sh",
+        ],
+        "rules": [{"if": '$CI_PIPELINE_SOURCE == "parent_pipeline"'}],
+        "timeout": f"{runtime + 30} minutes",
+        "artifacts": {
+            "untracked": True,
+            "when": "always",
+            "expire_in": expire_in,
+            "exclude": [
+                "output/ns4/*.dtq*",
+                "output/ns4/large-delta-rpz*.local",
+                "output/rpz_*",
+            ],
+        },
+    }
+
+    job_definition |= anchors[platform]
+
+    job_name = f"stress:{scenario}:{mode}:{protocol}+udp:{job_platform}"
+    jobs[job_name] = job_definition
+
+
+print(yaml.dump(jobs, Dumper=yaml.Dumper))