From f636bd3ec08271e79feea6cdd48e48da2af5b1d8 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Pavel=20Filipensk=C3=BD?= Date: Wed, 9 Jul 2025 22:38:02 +0200 Subject: [PATCH] s3:script: Avoid UnicodeDecodeError for samba-log-parser processing whole directory MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit When log directory contains zipped files like old/log.rpcd_spoolss-20250831.gz we get error. We want to ignore such files. $ samba-log-parser --traceid 6 --merge-by-timestamp Traceback (most recent call last): File "/usr/bin/samba-log-parser", line 382, in main() ~~~~^^ File "/usr/bin/samba-log-parser", line 311, in main process_file( ~~~~~~~~~~~~^ record_list, ^^^^^^^^^^^^ ...<3 lines>... options.traceid, ^^^^^^^^^^^^^^^^ ) ^ File "/usr/bin/samba-log-parser", line 92, in process_file data = infile.readlines() File "", line 325, in decode UnicodeDecodeError: 'utf-8' codec can't decode byte 0x8b in position 1: invalid start byte Signed-off-by: Pavel Filipenský Reviewed-by: Douglas Bagnall Autobuild-User(master): Pavel Filipensky Autobuild-Date(master): Tue Sep 9 13:50:00 UTC 2025 on atb-devel-224 --- source3/script/samba-log-parser | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/source3/script/samba-log-parser b/source3/script/samba-log-parser index a07dfdb4a6c..bd8cf580e05 100755 --- a/source3/script/samba-log-parser +++ b/source3/script/samba-log-parser @@ -69,7 +69,12 @@ from collections import defaultdict def process_file_no_traceid(record_list, fname): with open(fname, "r") as infile: - data = infile.readlines() + try: + data = infile.readlines() + except UnicodeDecodeError: + print("This file is not Unicode encoded: ", fname, file=sys.stderr) + return + date = "" record_lines = [] @@ -89,7 +94,12 @@ def process_file_no_traceid(record_list, fname): def process_file(record_list, traceid_set, fname, opid, otraceid): with open(fname, "r") as infile: - data = infile.readlines() + try: + data = infile.readlines() + except UnicodeDecodeError: + print("This file is not Unicode encoded: ", fname, file=sys.stderr) + return + pid = None traceid = 0 traceid_prev = None -- 2.47.3