]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
[3.9] gh-100001: Omit control characters in http.server stderr logs. (GH-100002)...
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
Tue, 6 Dec 2022 10:22:12 +0000 (02:22 -0800)
committerGitHub <noreply@github.com>
Tue, 6 Dec 2022 10:22:12 +0000 (11:22 +0100)
* gh-100001: Omit control characters in http.server stderr logs. (GH-100002)

Replace control characters in http.server.BaseHTTPRequestHandler.log_message with an escaped \xHH sequence to avoid causing problems for the terminal the output is printed to.
(cherry picked from commit d8ab0a4dfa48f881b4ac9ab857d2e9de42f72828)

Co-authored-by: Gregory P. Smith <greg@krypto.org>
* also escape \s (backport of PR #100038).

* add versionadded and remove extra 'to'

Co-authored-by: Gregory P. Smith <greg@krypto.org>
Doc/library/http.server.rst
Lib/http/server.py
Lib/test/test_httpservers.py
Misc/NEWS.d/next/Security/2022-12-05-01-39-10.gh-issue-100001.uD05Fc.rst [new file with mode: 0644]

index 4aa10e26f61916eb630b8313b9ff5ff2b53b52fd..94647e99724b33b5a2a2a234511a4e7cae3607a3 100644 (file)
@@ -499,3 +499,12 @@ Security Considerations
 :class:`SimpleHTTPRequestHandler` will follow symbolic links when handling
 requests, this makes it possible for files outside of the specified directory
 to be served.
+
+Earlier versions of Python did not scrub control characters from the
+log messages emitted to stderr from ``python -m http.server`` or the
+default :class:`BaseHTTPRequestHandler` ``.log_message``
+implementation. This could allow remote clients connecting to your
+server to send nefarious control codes to your terminal.
+
+.. versionadded:: 3.9.16
+    scrubbing control characters from log messages
index 6bf9084341a65973662130a9038ee50d5a481d2a..cf8933c3dbd983154cb05c096a8349f002f1f997 100644 (file)
@@ -93,6 +93,7 @@ import email.utils
 import html
 import http.client
 import io
+import itertools
 import mimetypes
 import os
 import posixpath
@@ -563,6 +564,11 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
 
         self.log_message(format, *args)
 
+    # https://en.wikipedia.org/wiki/List_of_Unicode_characters#Control_codes
+    _control_char_table = str.maketrans(
+            {c: fr'\x{c:02x}' for c in itertools.chain(range(0x20), range(0x7f,0xa0))})
+    _control_char_table[ord('\\')] = r'\\'
+
     def log_message(self, format, *args):
         """Log an arbitrary message.
 
@@ -578,12 +584,16 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
         The client ip and current date/time are prefixed to
         every message.
 
+        Unicode control characters are replaced with escaped hex
+        before writing the output to stderr.
+
         """
 
+        message = format % args
         sys.stderr.write("%s - - [%s] %s\n" %
                          (self.address_string(),
                           self.log_date_time_string(),
-                          format%args))
+                          message.translate(self._control_char_table)))
 
     def version_string(self):
         """Return the server software version string."""
index 4acf7a6fea448590ffbc2d22ea5c8b267c18239a..db9ee29e5fbe0034b7f5d2369f8af1765479c74d 100644 (file)
@@ -26,7 +26,7 @@ import time
 import datetime
 import threading
 from unittest import mock
-from io import BytesIO
+from io import BytesIO, StringIO
 
 import unittest
 from test import support
@@ -982,6 +982,25 @@ class BaseHTTPRequestHandlerTestCase(unittest.TestCase):
         match = self.HTTPResponseMatch.search(response)
         self.assertIsNotNone(match)
 
+    def test_unprintable_not_logged(self):
+        # We call the method from the class directly as our Socketless
+        # Handler subclass overrode it... nice for everything BUT this test.
+        self.handler.client_address = ('127.0.0.1', 1337)
+        log_message = BaseHTTPRequestHandler.log_message
+        with mock.patch.object(sys, 'stderr', StringIO()) as fake_stderr:
+            log_message(self.handler, '/foo')
+            log_message(self.handler, '/\033bar\000\033')
+            log_message(self.handler, '/spam %s.', 'a')
+            log_message(self.handler, '/spam %s.', '\033\x7f\x9f\xa0beans')
+        stderr = fake_stderr.getvalue()
+        self.assertNotIn('\033', stderr)  # non-printable chars are caught.
+        self.assertNotIn('\000', stderr)  # non-printable chars are caught.
+        lines = stderr.splitlines()
+        self.assertIn('/foo', lines[0])
+        self.assertIn(r'/\x1bbar\x00\x1b', lines[1])
+        self.assertIn('/spam a.', lines[2])
+        self.assertIn('/spam \\x1b\\x7f\\x9f\xa0beans.', lines[3])
+
     def test_http_1_1(self):
         result = self.send_typical_request(b'GET / HTTP/1.1\r\n\r\n')
         self.verify_http_server_response(result[0])
diff --git a/Misc/NEWS.d/next/Security/2022-12-05-01-39-10.gh-issue-100001.uD05Fc.rst b/Misc/NEWS.d/next/Security/2022-12-05-01-39-10.gh-issue-100001.uD05Fc.rst
new file mode 100644 (file)
index 0000000..a396e95
--- /dev/null
@@ -0,0 +1,6 @@
+``python -m http.server`` no longer allows terminal control characters sent
+within a garbage request to be printed to the stderr server log.
+
+This is done by changing the :mod:`http.server` :class:`BaseHTTPRequestHandler`
+``.log_message`` method to replace control characters with a ``\xHH`` hex escape
+before printing.