From 5eee4c2b2aebfd3c8f11d9722e49d838da4e4150 Mon Sep 17 00:00:00 2001 From: Antonio Borneo Date: Mon, 16 Jun 2025 09:59:13 +0200 Subject: [PATCH] checkpatch: use utf-8 match for spell checking MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The current code that checks for misspelling verifies, in a more complex regex, if $rawline matches [^\w]($misspellings)[^\w] Being $rawline a byte-string, a utf-8 character in $rawline can match the non-word-char [^\w]. E.g.: ./scripts/checkpatch.pl --git 81c2f059ab9 WARNING: 'ment' may be misspelled - perhaps 'meant'? #36: FILE: MAINTAINERS:14360: +M: Clément Léger ^^^^ Use a utf-8 version of $rawline for spell checking. Link: https://lkml.kernel.org/r/20250616-b4-checkpatch-upstream-v2-1-5600ce4a3b43@foss.st.com Signed-off-by: Antonio Borneo Signed-off-by: Clément Le Goffic Cc: Andy Whitcroft Cc: Dwaipayan Ray Cc: Joe Perches Cc: Lukas Bulwahn Signed-off-by: Andrew Morton --- scripts/checkpatch.pl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 664f7b7a622c2..489b74d52abe7 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3502,9 +3502,10 @@ sub process { # Check for various typo / spelling mistakes if (defined($misspellings) && ($in_commit_log || $line =~ /^(?:\+|Subject:)/i)) { - while ($rawline =~ /(?:^|[^\w\-'`])($misspellings)(?:[^\w\-'`]|$)/gi) { + my $rawline_utf8 = decode("utf8", $rawline); + while ($rawline_utf8 =~ /(?:^|[^\w\-'`])($misspellings)(?:[^\w\-'`]|$)/gi) { my $typo = $1; - my $blank = copy_spacing($rawline); + my $blank = copy_spacing($rawline_utf8); my $ptr = substr($blank, 0, $-[1]) . "^" x length($typo); my $hereptr = "$hereline$ptr\n"; my $typo_fix = $spelling_fix{lc($typo)}; -- 2.47.2