From: Stefan Eissing <stefan@eissing.org>
Date: Thu, 16 Feb 2023 13:09:16 +0000 (+0100)
Subject: test: add test for HTTP/2 corruption as reported in #10525
X-Git-Tag: curl-7_88_1~20
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4437e3e34460d26f24a5af12a3e7b8da5aa84a21;p=thirdparty%2Fcurl.git

test: add test for HTTP/2 corruption as reported in #10525

- adding test_02_20 for reproducing the situation
- using recently released mod_h2 Apache module
- skipping test if an older version is installed
- adding installation of current mod_h2 to github pytest workflow

This reproduces the error reliable (for me) on the lib/http2.c version
of curl 7.88.0. And passes with the recent curl master.

Closes #10534
---

diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index d4e3085ab5..c12c335019 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -80,6 +80,15 @@ jobs:
         make install
       name: 'install nghttp2'
 
+    - run: |
+        git clone --depth=1 -b master https://github.com/icing/mod_h2
+        cd mod_h2
+        autoreconf -fi
+        ./configure PKG_CONFIG_PATH="$HOME/all/lib/pkgconfig"
+        make
+        sudo make install
+      name: 'install mod_h2'
+
     - uses: actions/checkout@v3
 
     - run: autoreconf -fi
diff --git a/tests/tests-httpd/test_02_download.py b/tests/tests-httpd/test_02_download.py
index 5d9f2d9496..60ceeae389 100644
--- a/tests/tests-httpd/test_02_download.py
+++ b/tests/tests-httpd/test_02_download.py
@@ -24,6 +24,8 @@
 #
 ###########################################################################
 #
+import difflib
+import filecmp
 import logging
 import os
 import pytest
@@ -43,21 +45,11 @@ class TestDownload:
         if env.have_h3():
             nghttpx.start_if_needed()
 
-    def _make_docs_file(self, docs_dir: str, fname: str, fsize: int):
-        fpath = os.path.join(docs_dir, fname)
-        data1k = 1024*'x'
-        flen = 0
-        with open(fpath, 'w') as fd:
-            while flen < fsize:
-                fd.write(data1k)
-                flen += len(data1k)
-        return flen
-
     @pytest.fixture(autouse=True, scope='class')
     def _class_scope(self, env, httpd):
-        self._make_docs_file(docs_dir=httpd.docs_dir, fname='data1.data', fsize=1024*1024)
-        self._make_docs_file(docs_dir=httpd.docs_dir, fname='data10.data', fsize=10*1024*1024)
-        self._make_docs_file(docs_dir=httpd.docs_dir, fname='data100.data', fsize=100*1024*1024)
+        env.make_data_file(indir=httpd.docs_dir, fname="data-100k", fsize=100*1024)
+        env.make_data_file(indir=httpd.docs_dir, fname="data-1m", fsize=1024*1024)
+        env.make_data_file(indir=httpd.docs_dir, fname="data-10m", fsize=10*1024*1024)
 
     # download 1 file
     @pytest.mark.parametrize("proto", ['http/1.1', 'h2', 'h3'])
@@ -173,7 +165,7 @@ class TestDownload:
     def test_02_08_1MB_serial(self, env: Env,
                               httpd, nghttpx, repeat, proto):
         count = 20
-        urln = f'https://{env.authority_for(env.domain1, proto)}/data1.data?[0-{count-1}]'
+        urln = f'https://{env.authority_for(env.domain1, proto)}/data-1m?[0-{count-1}]'
         curl = CurlClient(env=env)
         r = curl.http_download(urls=[urln], alpn_proto=proto)
         assert r.exit_code == 0
@@ -183,7 +175,7 @@ class TestDownload:
     def test_02_09_1MB_parallel(self, env: Env,
                               httpd, nghttpx, repeat, proto):
         count = 20
-        urln = f'https://{env.authority_for(env.domain1, proto)}/data1.data?[0-{count-1}]'
+        urln = f'https://{env.authority_for(env.domain1, proto)}/data-1m?[0-{count-1}]'
         curl = CurlClient(env=env)
         r = curl.http_download(urls=[urln], alpn_proto=proto, extra_args=[
             '--parallel'
@@ -195,7 +187,7 @@ class TestDownload:
     def test_02_10_10MB_serial(self, env: Env,
                               httpd, nghttpx, repeat, proto):
         count = 20
-        urln = f'https://{env.authority_for(env.domain1, proto)}/data10.data?[0-{count-1}]'
+        urln = f'https://{env.authority_for(env.domain1, proto)}/data-10m?[0-{count-1}]'
         curl = CurlClient(env=env)
         r = curl.http_download(urls=[urln], alpn_proto=proto)
         assert r.exit_code == 0
@@ -205,10 +197,53 @@ class TestDownload:
     def test_02_11_10MB_parallel(self, env: Env,
                               httpd, nghttpx, repeat, proto):
         count = 20
-        urln = f'https://{env.authority_for(env.domain1, proto)}/data10.data?[0-{count-1}]'
+        urln = f'https://{env.authority_for(env.domain1, proto)}/data-10m?[0-{count-1}]'
         curl = CurlClient(env=env)
         r = curl.http_download(urls=[urln], alpn_proto=proto, extra_args=[
             '--parallel'
         ])
         assert r.exit_code == 0
         r.check_stats(count=count, exp_status=200)
+
+    def test_02_20_h2_small_frames(self, env: Env, httpd, repeat):
+        # Test case to reproduce content corruption as observed in
+        # https://github.com/curl/curl/issues/10525
+        # To reliably reproduce, we need an Apache httpd that supports
+        # setting smaller frame sizes. This is not released yet, we
+        # test if it works and back out if not.
+        httpd.set_extra_config(env.domain1, lines=[
+            f'H2MaxDataFrameLen 1024',
+        ])
+        assert httpd.stop()
+        if not httpd.start():
+            # no, not supported, bail out
+            httpd.set_extra_config(env.domain1, lines=None)
+            assert httpd.start()
+            pytest.skip(f'H2MaxDataFrameLen not supported')
+        # ok, make 100 downloads with 2 parallel running and they
+        # are expected to stumble into the issue when using `lib/http2.c`
+        # from curl 7.88.0
+        count = 100
+        urln = f'https://{env.authority_for(env.domain1, "h2")}/data-1m?[0-{count-1}]'
+        curl = CurlClient(env=env)
+        r = curl.http_download(urls=[urln], alpn_proto="h2", extra_args=[
+            '--parallel', '--parallel-max', '2'
+        ])
+        assert r.exit_code == 0
+        r.check_stats(count=count, exp_status=200)
+        srcfile = os.path.join(httpd.docs_dir, 'data-1m')
+        for i in range(count):
+            dfile = curl.download_file(i)
+            assert os.path.exists(dfile)
+            if not filecmp.cmp(srcfile, dfile, shallow=False):
+                diff = "".join(difflib.unified_diff(a=open(srcfile).readlines(),
+                                                    b=open(dfile).readlines(),
+                                                    fromfile=srcfile,
+                                                    tofile=dfile,
+                                                    n=1))
+                assert False, f'download {dfile} differs:\n{diff}'
+        # restore httpd defaults
+        httpd.set_extra_config(env.domain1, lines=None)
+        assert httpd.stop()
+        assert httpd.start()
+
diff --git a/tests/tests-httpd/testenv/curl.py b/tests/tests-httpd/testenv/curl.py
index a2b538e80f..7d0414d7cd 100644
--- a/tests/tests-httpd/testenv/curl.py
+++ b/tests/tests-httpd/testenv/curl.py
@@ -209,6 +209,13 @@ class CurlClient:
         self._rmrf(self._run_dir)
         self._mkpath(self._run_dir)
 
+    @property
+    def run_dir(self) -> str:
+        return self._run_dir
+
+    def download_file(self, i: int) -> str:
+        return os.path.join(self.run_dir, f'download_{i}.data')
+
     def _rmf(self, path):
         if os.path.exists(path):
             return os.remove(path)
@@ -232,8 +239,11 @@ class CurlClient:
         if extra_args is None:
             extra_args = []
         extra_args.extend([
-            '-o', 'download.data',
+            '-o', 'download_#1.data',
         ])
+        # remove any existing ones
+        for i in range(100):
+            self._rmf(self.download_file(i))
         if with_stats:
             extra_args.extend([
                 '-w', '%{json}\\n'
diff --git a/tests/tests-httpd/testenv/httpd.py b/tests/tests-httpd/testenv/httpd.py
index 24399454a5..21e10e65cc 100644
--- a/tests/tests-httpd/testenv/httpd.py
+++ b/tests/tests-httpd/testenv/httpd.py
@@ -307,12 +307,12 @@ class Httpd:
             ]))
 
     def _get_log_level(self):
-        if self.env.verbose > 3:
-            return 'trace2'
-        if self.env.verbose > 2:
-            return 'trace1'
-        if self.env.verbose > 1:
-            return 'debug'
+        #if self.env.verbose > 3:
+        #    return 'trace2'
+        #if self.env.verbose > 2:
+        #    return 'trace1'
+        #if self.env.verbose > 1:
+        #    return 'debug'
         return 'info'
 
     def _curltest_conf(self) -> List[str]: