]> git.ipfire.org Git - thirdparty/man-pages.git/blob - scripts/find_repeated_words.sh
print_encoding.sh: Script to list pages that employ character sets other than us...
[thirdparty/man-pages.git] / scripts / find_repeated_words.sh
1 #!/bin/sh
2 #
3 # find_repeated_words.sh
4 #
5 # A simple script for finding instances of repeated consecutive words
6 # in manual pages -- human inspection can then determine if these
7 # are real errors in the text.
8 #
9 # Usage: sh find_repeated_words.sh [file...]
10 #
11 ######################################################################
12 #
13 # (C) Copyright 2007 & 2013, Michael Kerrisk
14 # This program is free software; you can redistribute it and/or
15 # modify it under the terms of the GNU General Public License
16 # as published by the Free Software Foundation; either version 2
17 # of the License, or (at your option) any later version.
18 #
19 # This program is distributed in the hope that it will be useful,
20 # but WITHOUT ANY WARRANTY; without even the implied warranty of
21 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 # GNU General Public License for more details
23 # (http://www.gnu.org/licenses/gpl-2.0.html).
24 #
25 #
26
27 for file in "$@" ; do
28 words=$(MANWIDTH=2000 man -l "$file" 2> /dev/null | col -b | \
29 tr ' \008' '\012' | sed -e '/^$/d' | \
30 sed 's/ *$//' |
31 awk 'BEGIN {p=""} {if (p==$0) print p; p=$0}' | \
32 grep '[a-zA-Z]' | tr '\012' ' ')
33 if test -n "$words"; then
34 echo "$file: $words"
35 fi
36 done