From: Jim Jagielski Date: Wed, 3 Jun 2026 23:23:47 +0000 (+0000) Subject: pytest_suite: port Perl todo/xfail gaps found when testing against 2.4.x X-Git-Url: http://git.ipfire.org/gitweb/index.cgi?a=commitdiff_plain;h=7fdf4db6d8af1dd2426855399f47bc63a3dac86a;p=thirdparty%2Fapache%2Fhttpd.git pytest_suite: port Perl todo/xfail gaps found when testing against 2.4.x Three test files had behaviour differences between the Python port and the original Perl framework tests when run against a 2.4.x build (tested with Apache/2.4.68-dev at /opt/local2/apache2). test_pr64339.py -- LWP vs httpx default charset for raw bodies For /doc.notxml the proxy returns Content-Type: application/notreallyxml with no charset and a Latin-1 body. LWP defaults to ISO-8859-1 for charset-less responses; httpx defaults to UTF-8, decoding 0xF3 as a replacement character and failing the body match. Add _lwp_text() to mirror LWP: use the Content-Type charset when present, else Latin-1. test_session.py -- port Perl @todo for PR 58171 and PR 56052 session.t marks subtests 53/54 (Session writable after decode failure, PR 58171) and 88/89 (Session writable after expired, PR 56052) as unconditional @todo. The Python port dropped this bookkeeping. On 2.4.x, ap_session_load() sets zz=NULL on a decode failure and allocates a fresh session the memoising provider never sees, so nothing is saved; trunk uses memset-in-place to preserve the provider pointer. Add a _check(..., todo=True) helper that downgrades failures to warnings, matching Perl's todo semantics for both subtests. test_proxy_html.py -- xfail two metafix cases that fail on 2.4.x other header with Content-Type present: mod_proxy_html metafix emits no http-equiv headers for meta_contenttype.html because the leading charset Content-Type meta is consumed by the xml2enc path on 2.4.x. empty content value: metafix locates the content value via a case-insensitive search for 'content'; the header name X-Empty-Content itself matches first, so no value is extracted. Gate both via pytest.xfail when the server is < 2.5.0, leaving assertions active on trunk. git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@1934952 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/test/pytest_suite/tests/t/apache/test_pr64339.py b/test/pytest_suite/tests/t/apache/test_pr64339.py index 4c7d2c9ecf..8d6927e74d 100644 --- a/test/pytest_suite/tests/t/apache/test_pr64339.py +++ b/test/pytest_suite/tests/t/apache/test_pr64339.py @@ -12,6 +12,19 @@ import pytest from apache_pytest import need_module, t_cmp + +def _lwp_text(r): + """Decode the body the way the original Perl client (LWP) did. + + For a body with no ``charset`` in Content-Type, LWP defaults to ISO-8859-1 + while httpx defaults to UTF-8. The /doc.notxml case here proxies a raw + Latin-1 body with media type ``application/notreallyxml`` (no charset), so + httpx's ``r.text`` would UTF-8-decode it into replacement chars and fail the + match. Mirror LWP: use the header charset if present, else Latin-1. + """ + return r.content.decode(r.charset_encoding or "latin-1", errors="replace") + + # (path, expected Content-Type, expected body regex) TESTCASES = [ ("/doc.xml", "application/xml; charset=utf-8", "fóó\n"), @@ -34,4 +47,4 @@ def test_pr64339(http, path, ctype, body): assert t_cmp(r.headers.get("Content-Type"), ctype), ( f"content-type header test for {path}" ) - assert t_cmp(r.text, re.compile(body, re.DOTALL)), f"content test for {path}" + assert t_cmp(_lwp_text(r), re.compile(body, re.DOTALL)), f"content test for {path}" diff --git a/test/pytest_suite/tests/t/modules/test_proxy_html.py b/test/pytest_suite/tests/t/modules/test_proxy_html.py index 964f1d5e32..cb3e4dbabe 100644 --- a/test/pytest_suite/tests/t/modules/test_proxy_html.py +++ b/test/pytest_suite/tests/t/modules/test_proxy_html.py @@ -51,9 +51,19 @@ TESTS = [ {"type": "meta", "path": "meta_special_chars.html", "header": "X-Mixed", "value": "text/html; charset=utf-8", "desc": "complex content value"}, {"type": "meta", "path": "meta_contenttype.html", "header": "X-Other", - "value": "OtherValue", "desc": "other header with Content-Type present"}, + "value": "OtherValue", "desc": "other header with Content-Type present", + # 2.4.x: a page whose Content-Type meta declares a charset is consumed by + # the xml2enc charset path, so mod_proxy_html's metafix emits no http-equiv + # headers for it and the trailing X-Other is never extracted. + "xfail_24": "mod_proxy_html metafix extracts no header when a charset " + "Content-Type meta precedes it (2.4.x)"}, {"type": "meta", "path": "meta_edge_cases.html", "header": "X-Empty-Content", - "value": "", "desc": "empty content value"}, + "value": "", "desc": "empty content value", + # metafix locates the value via a case-insensitive search for "content"; + # the header name "X-Empty-Content" matches first, so no value is + # extracted. A metafix limitation (header name containing "content"). + "xfail_24": "metafix cannot extract a header whose name contains " + "'content' (X-Empty-Content)"}, {"type": "meta", "path": "meta_edge_cases.html", "header": "X-Very-Long-Name-With-Many-Characters", "value": "LongNameTest", "desc": "long header name"}, @@ -161,6 +171,8 @@ def test_proxy_html(http, t): assert t_cmp(r.status_code, 200), f"fetching {t['path']} for {t['desc']}" assert t_cmp(r.headers.get("Content-Type"), re.compile(r"text/html")), \ f"content-type for {t['path']}" + if t.get("xfail_24") and not http.have_min_apache_version("2.5.0"): + pytest.xfail(t["xfail_24"]) assert t_cmp(r.headers.get(t["header"]), t["value"]), \ f"meta header {t['header']} = '{t['value']}' ({t['desc']})" diff --git a/test/pytest_suite/tests/t/modules/test_session.py b/test/pytest_suite/tests/t/modules/test_session.py index 036e0173fa..7b10d30b1a 100644 --- a/test/pytest_suite/tests/t/modules/test_session.py +++ b/test/pytest_suite/tests/t/modules/test_session.py @@ -15,6 +15,7 @@ preserved. import re import time +import warnings import pytest @@ -28,7 +29,32 @@ def _expiry_from_seconds(seconds): return str(seconds) + "0" * (len(str(APR_TIME_PER_SEC)) - 1) -def _check_result(name, res, session=None, dirty=None, expiry=None, response=None): +def _check(cond, msg, todo): + """Assert ``cond``, but tolerate failure for Perl-``todo`` subtests. + + The original session.t lists certain subtest checks in its ``todo`` array + (PR 58171 "writable after decode failure", PR 56052 "writable after + expired"): on an httpd without the relevant fix they fail, on a fixed build + they pass, and either way the harness must not error. mod_session on trunk + resets a session that fails to decode/expire *in place* (``memset`` -- + "preserve pointers to zz in load/save providers"), so a provider that + memoizes the session in ``r->notes`` (the test_session C module does) still + sees the reset and saves it; 2.4.x instead drops it (``zz = NULL``) and + allocates a fresh one the provider never sees, so nothing is saved. + + Mirror Perl's ``todo``: when ``todo`` is set, downgrade a failed check to a + warning and continue (``pytest.xfail`` would abort the rest of the test). + """ + if cond: + return + if todo: + warnings.warn(f"known TODO failure: {msg}", stacklevel=2) + else: + raise AssertionError(msg) + + +def _check_result(name, res, session=None, dirty=None, expiry=None, + response=None, todo=False): # Perl defaults via // : undef -> '(none)'/0/0/''. session = "(none)" if session is None else session dirty = 0 if dirty is None else dirty @@ -45,18 +71,18 @@ def _check_result(name, res, session=None, dirty=None, expiry=None, response=Non m = re.match(r"^(?:(.+)&)?expiry=([0-9]+)(?:&(.*))?$", got_session, re.IGNORECASE) if m: got_expiry = m.group(2)[: -(len(str(APR_TIME_PER_SEC)) - 1)] - assert expiry and time.time() < int(got_expiry), f"expiry ({name})" + _check(bool(expiry) and time.time() < int(got_expiry), f"expiry ({name})", todo) parts = [p for p in (m.group(1), m.group(3)) if p is not None] session_data = "&".join(parts) else: - assert not expiry, f"no expiry ({name})" + _check(not expiry, f"no expiry ({name})", todo) - assert t_cmp(session_data, session), f"session header ({name})" + _check(t_cmp(session_data, session), f"session header ({name})", todo) got_dirty = res.headers.get("X-Test-Session-Dirty") got_dirty = 0 if got_dirty is None else got_dirty - assert t_cmp(got_dirty, dirty), f"session dirty ({name})" + _check(t_cmp(got_dirty, dirty), f"session dirty ({name})", todo) body = res.text.rstrip("\r\n") - assert t_cmp(body, response), f"body ({name})" + _check(t_cmp(body, response), f"body ({name})", todo) return got_session @@ -102,9 +128,11 @@ def test_session(http): READ_SESSION, session=None, dirty=0, expiry=0, response="value") _check_get(http, "Custom decoder failure", f"/on/encode?{SESSION}") _check_get(http, "Identity decoder failure", "/on?&=test") + # PR 58171 todo: only fixed on trunk (mod_session resets the undecodable + # session in place); 2.4.x discards it, so nothing is saved here. _check_post(http, "Session writable after decode failure", f"/on/encode?{SESSION}", CREATE_SESSION, - session=ENCODED_SESSION, dirty=1) + session=ENCODED_SESSION, dirty=1, todo=True) # SessionEnv directive - requires mod_include if http.have_module("include"): @@ -131,8 +159,9 @@ def test_session(http): _check_get(http, "Keep non-expired session", f"/on/expire?{SESSION}&expiry={future_expiry}", session=SESSION, dirty=0, expiry=1) + # PR 56052 todo: like the decode-failure case, only saved on a fixed build. _check_post(http, "Session writable after expired", "/on/expire?expiry=1", - CREATE_SESSION, session=SESSION, dirty=1, expiry=1) + CREATE_SESSION, session=SESSION, dirty=1, expiry=1, todo=True) # SessionExpiryUpdateInterval directive - new in 2.4.41 if http.have_module("version") and http.have_min_apache_version("2.4.41"):