From: Tom de Vries Date: Sun, 6 Oct 2024 05:59:48 +0000 (+0200) Subject: [gdb/contrib] Add spellcheck.sh X-Git-Tag: gdb-16-branchpoint~729 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=67eca1ccc1ad5237403dc151eb91f5e506dea0c4;p=thirdparty%2Fbinutils-gdb.git [gdb/contrib] Add spellcheck.sh I came across a table containing common misspellings [1], and wrote a script to detect and correct these misspellings. The table also contains entries that have alternatives, like this: ... addres->address, adders ... and for those the script prints a TODO instead. The script downloads the webpage containing the table, extracts the table and caches it in .git/wikipedia-common-misspellings.txt to prevent downloading it over and over again. Example usage: ... $ gdb/contrib/spellcheck.sh gdb* ... ChangeLog files are silently skipped. Checked with shellcheck. Tested on x86_64-linux, by running it on the gdb* dirs on doing a build and test run. The results of running it are in the two following patches. Reviewed-By: Andrew Burgess Approved-By: Tom Tromey [1] https://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines --- diff --git a/gdb/contrib/spellcheck.sh b/gdb/contrib/spellcheck.sh new file mode 100755 index 00000000000..e7db6217d45 --- /dev/null +++ b/gdb/contrib/spellcheck.sh @@ -0,0 +1,287 @@ +#!/bin/bash + +# Copyright (C) 2024 Free Software Foundation, Inc. +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# Script to auto-correct common spelling mistakes. +# +# Example usage: +# $ ./gdb/contrib/spellcheck.sh gdb* + +scriptdir=$(cd "$(dirname "$0")" || exit; pwd -P) + +url=https://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines +cache_dir=$scriptdir/../../.git +cache_file=wikipedia-common-misspellings.txt +dictionary=$cache_dir/$cache_file + +# Separators: space, slash, tab. +grep_separator=" |/| " +sed_separator=" \|/\|\t" + +usage () +{ + echo "usage: $(basename "$0") +" +} + +make_absolute () +{ + local arg + arg="$1" + + case "$arg" in + /*) + ;; + *) + arg=$(pwd -P)/"$arg" + ;; + esac + + echo "$arg" +} + +parse_args () +{ + local files + files=$(mktemp) + trap 'rm -f "$files"' EXIT + + if [ $# -eq -0 ]; then + usage + exit 1 + fi + + local arg + for arg in "$@"; do + if [ -f "$arg" ]; then + arg=$(make_absolute "$arg") + readlink -e "$arg" \ + >> "$files" + elif [ -d "$arg" ]; then + arg=$(make_absolute "$arg") + local f + find "$arg" -type f -exec readlink -e {} \; \ + >> "$files" + else + echo "Not a file or directory: $arg" + exit 1 + fi + done + + mapfile -t unique_files \ + < <(sort -u "$files" \ + | grep -v ChangeLog) + + rm -f "$files" + trap "" EXIT +} + +get_dictionary () +{ + if [ -f "$dictionary" ]; then + return + fi + + local webpage + webpage=$(mktemp) + trap 'rm -f "$webpage"' EXIT + + # Download web page containing table. + wget $url -O "$webpage" + + # Extract table from web page. + awk '/
/,/<\/pre>/' "$webpage" \
+	| sed 's/
//;s/<\/pre>//' \
+	| grep -E -v "^$" \
+	       > "$dictionary"
+
+    rm -f "$webpage"
+    trap "" EXIT
+}
+
+parse_dictionary ()
+{
+    # Parse dictionary.
+    mapfile -t words \
+	    < <(awk -F '->' '{print $1}' "$dictionary")
+    mapfile -t replacements \
+	    < <(awk -F '->' '{print $2}' "$dictionary")
+}
+
+find_files_matching_words ()
+{
+    local pat
+    pat=""
+    for word in "${words[@]}"; do
+	if [ "$pat" = "" ]; then
+	    pat="$word"
+	else
+	    pat="$pat|$word"
+	fi
+    done
+    pat="($pat)"
+
+    local sep
+    sep=$grep_separator
+
+    pat="(^|$sep)$pat($sep|$)"
+
+    grep -E \
+	-l \
+	"$pat" \
+	"$@"
+}
+
+find_files_matching_word ()
+{
+    local pat
+    pat="$1"
+    shift
+
+    local sep
+    sep=$grep_separator
+
+    pat="(^|$sep)$pat($sep|$)"
+
+    grep -E \
+	-l \
+	"$pat" \
+	"$@"
+}
+
+replace_word_in_file ()
+{
+    local word
+    word="$1"
+
+    local replacement
+    replacement="$2"
+
+    local file
+    file="$3"
+
+    local sep
+    sep=$sed_separator
+
+    # Save separator.
+    sep="\($sep\)"
+
+    local repl1 repl2 repl3
+
+    repl1="s%$sep$word$sep%\1$replacement\2%g"
+
+    repl2="s%^$word$sep%$replacement\1%"
+
+    repl3="s%$sep$word$%\1$replacement%"
+
+    sed -i \
+	"$repl1;$repl2;$repl3" \
+	"$file"
+}
+
+replace_word_in_files ()
+{
+    local word
+    word="$1"
+
+    local replacement
+    replacement="$2"
+
+    shift 2
+
+    local id
+    id="$word -> $replacement"
+
+    # Reduce set of files for sed to operate on.
+    local files_matching_word
+    declare -a files_matching_word
+    mapfile -t files_matching_word \
+	    < <(find_files_matching_word "$word" "$@")
+
+    if [ ${#files_matching_word[@]} -eq 0 ]; then
+	return
+    fi
+
+    if echo "$replacement"| grep -q ","; then
+	echo "TODO: $id"
+	return
+    fi
+
+    declare -A md5sums
+
+    local changed f before after
+    changed=false
+    for f in "${files_matching_word[@]}"; do
+	if [ "${md5sums[$f]}" = "" ]; then
+	    md5sums[$f]=$(md5sum "$f")
+	fi
+
+	before="${md5sums[$f]}"
+
+	replace_word_in_file \
+	    "$word" \
+	    "$replacement" \
+	    "$f"
+
+	after=$(md5sum "$f")
+
+	if [ "$after" != "$before" ]; then
+	    md5sums[$f]="$after"
+	    changed=true
+	fi
+    done
+
+    if $changed; then
+	echo "$id"
+    fi
+
+    find_files_matching_word "$word" "${files_matching_word[@]}" \
+	| awk "{ printf \"TODO: $id: replacement failed: %s\n\", \$0}"
+}
+
+main ()
+{
+    declare -a unique_files
+    parse_args "$@"
+
+    get_dictionary
+
+    declare -a words
+    declare -a replacements
+    parse_dictionary
+
+    # Reduce set of files for sed to operate on.
+    local files_matching_words
+    declare -a files_matching_words
+    mapfile -t files_matching_words \
+	    < <(find_files_matching_words "${unique_files[@]}")
+
+    if [ ${#files_matching_words[@]} -eq 0 ]; then
+	return
+    fi
+
+    local i word replacement
+    i=0
+    for word in "${words[@]}"; do
+	replacement=${replacements[$i]}
+	i=$((i + 1))
+
+	replace_word_in_files \
+	    "$word" \
+	    "$replacement" \
+	    "${files_matching_words[@]}"
+    done
+}
+
+main "$@"