]> git.ipfire.org Git - thirdparty/samba.git/commitdiff
s3:script: Avoid UnicodeDecodeError for samba-log-parser processing whole directory
authorPavel Filipenský <pfilipensky@samba.org>
Wed, 9 Jul 2025 20:38:02 +0000 (22:38 +0200)
committerPavel Filipensky <pfilipensky@samba.org>
Tue, 9 Sep 2025 13:50:00 +0000 (13:50 +0000)
When log directory contains zipped files like
old/log.rpcd_spoolss-20250831.gz we get error.
We want to ignore such files.

$ samba-log-parser --traceid 6 --merge-by-timestamp
Traceback (most recent call last):
  File "/usr/bin/samba-log-parser", line 382, in <module>
    main()
    ~~~~^^
  File "/usr/bin/samba-log-parser", line 311, in main
    process_file(
    ~~~~~~~~~~~~^
        record_list,
        ^^^^^^^^^^^^
    ...<3 lines>...
        options.traceid,
        ^^^^^^^^^^^^^^^^
    )
    ^
  File "/usr/bin/samba-log-parser", line 92, in process_file
    data = infile.readlines()
  File "<frozen codecs>", line 325, in decode
UnicodeDecodeError: 'utf-8' codec can't decode byte 0x8b in position 1: invalid start byte

Signed-off-by: Pavel Filipenský <pfilipensky@samba.org>
Reviewed-by: Douglas Bagnall <douglas.bagnall@catalyst.net.nz>
Autobuild-User(master): Pavel Filipensky <pfilipensky@samba.org>
Autobuild-Date(master): Tue Sep  9 13:50:00 UTC 2025 on atb-devel-224

source3/script/samba-log-parser

index a07dfdb4a6c56da76c4c375d859641729c39dd9d..bd8cf580e052a98f272a82d4abc230a809208b78 100755 (executable)
@@ -69,7 +69,12 @@ from collections import defaultdict
 
 def process_file_no_traceid(record_list, fname):
     with open(fname, "r") as infile:
-        data = infile.readlines()
+        try:
+            data = infile.readlines()
+        except UnicodeDecodeError:
+            print("This file is not Unicode encoded: ", fname, file=sys.stderr)
+            return
+
     date = ""
     record_lines = []
 
@@ -89,7 +94,12 @@ def process_file_no_traceid(record_list, fname):
 
 def process_file(record_list, traceid_set, fname, opid, otraceid):
     with open(fname, "r") as infile:
-        data = infile.readlines()
+        try:
+            data = infile.readlines()
+        except UnicodeDecodeError:
+            print("This file is not Unicode encoded: ", fname, file=sys.stderr)
+            return
+
     pid = None
     traceid = 0
     traceid_prev = None