[thirdparty/man-pages.git] / scripts / add_parens_for_own_funcs.sh

#!/bin/sh
#
# add_parens_for_own_funcs.sh
#
# This script is designed to fix inconsistencies in the use of
# parentheses after function names in the manual pages.
# It changes manual pages to add these parentheses.
# The problem is how to determine what is a "function name".
# The approach this script takes is the following:
#
#   For each manual page named in the command line that contains 
#           more than one line (i.e., skip man-page link files)
#       Create a set of names taken from the .SH section of the
#               page and from grepping all pages for names that 
#               have .so links to this page
#       For each name obtained above
#           If we can find something that looks like a prototype on 
#                   the page, then
#               Try to substitute instances of that name on the page.
#                   (instances are considered to be words formatted
#		    using ^.[BI] or \f[BI]...\f[PR] -- this script
#		    ignores unformatted instances of function names.)
#           fi
#       done
#   done
#
# The rationale of the above is that the most likely function names
# that appear on a page are those that the manual page is describing.
# It doesn't fix everything, but it catches many instances.
# The rest will have to be done manually.
#
# This script is rather verbose because it provides a computer-assisted
# solution, rather than one that is fully automated.  When running it,
# pipe the output through
#
#            ...  2>&1 | less
#
# and take a good look at the output.  In particular, you can scan
# the output for *possible* problems by looking for the pattern: /^%%%/
# The script's output should be enough to help you determine if the 
# problem is real or not.
#
# Suggested usage (in this case to fix pages in Section 2):
#
#     cd man2
#     sh add_parens_for_own_funcs.sh *.2 2>&1 | tee changes.log | less
#
# Use the "-n" option for a dry run, in order to see what would be
# done, without actually doing it.
#
# (And, yes, there are many ways that this script could probably be 
# made to work faster...)
#
######################################################################
#
# (C) Copyright 2005 & 2013, Michael Kerrisk
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details
# (http://www.gnu.org/licenses/gpl-2.0.html).
#
#
# 

file_base="tmp.$(basename $0)"

work_dst_file="$file_base.dst"
work_src_file="$file_base.src"

matches_for_all_names="$file_base.all_match"
matches_for_this_name="$file_base.this_match"

all_files="$work_dst_file $work_src_file $matches_for_all_names \
	   $matches_for_this_name"

rm -f $all_files

# Command-line option processing

really_do_it=1
while getopts "n" optname; do
    case "$optname" in
    n)	really_do_it=0;
    	;;
    *)  echo "Unknown option: $OPTARG"
        exit 1
	;;
    esac
done

shift $(( $OPTIND - 1 ))

# Only process files with > 1 line -- single-line files are link files 

for page in $(wc "$@" 2> /dev/null | awk '$1 > 1 {print $4}'| \
    grep -v '^total'); do

    echo ">>>>>>>>>>>>>>>>>>>>>>>>>" $page "<<<<<<<<<<<<<<<<<<<<<<<<<"
    echo ">>>>>>>>>>>>>>>>>>>>>>>>>" $page "<<<<<<<<<<<<<<<<<<<<<<<<<" 1>&2

    # Extract names that follow the ".SH NAME" directive -- these will
    # be our guesses about function names to look for

    sh_nlist=$(cat $page | \
        awk 'BEGIN { p = 0 } 
             /^\.SH NAME/     { p = NR } 
	     /^.SH/ && NR > p { p = 0 }	    # Stop at the next .SH directive
	     p > 0 && NR > p  { print $0 }  # These are the lines between
					    # the two .SH directives
	    ')
    sh_nlist=$(echo $sh_nlist | sed -e 's/ *\\-.*//' -e 's/, */ /g')
    echo "### .SH name list:" $sh_nlist

    # Some pages like msgop.2 don't actually list the function names in 
    # the .SH section -- but we can try using link pages to give us 
    # another guess at the right function names to look for

    so_nlist=$(grep -l "^\\.so.*/$(echo $page| \
	     sed -e 's/\.[1-8]$//')\\." $* | \
	     sed -e 's/\.[1-8]$//g')

    echo "### .so name list:" $so_nlist

    # Combine the two lists, eliminate duplicates
    
    nlist=$(echo $sh_nlist $so_nlist | tr ' ' '\012' | sort -u)

    maybechanged=0
    
    cp $page $work_dst_file
    rm -f $matches_for_all_names; # touch $matches_for_all_names

    for rname in $nlist; do	# try each name from out list for this page

        # A very few names in .SH sections contain regexp characters!

	name=$(echo $rname | sed -e 's/\*/\\*/g' -e 's/\./\\./g' \
		-e 's/\[/\\[/g' -e 's/\+/\\+/g')

        echo "########## trying $rname ##########"

	rm -f $matches_for_this_name
	
        grep "^.BR* $name *$" $page | \
	    >> $matches_for_this_name
        grep "^.BR $name [^(\"]$" $page | \
	    >> $matches_for_this_name
        grep '\\fB'"$name"'\\f[PR][ .,;:]' $page | \
	    >> $matches_for_this_name
        grep '\\fB'"$name"'\\f[PR]$' $page | \
	    >> $matches_for_this_name
	
	cat $matches_for_this_name | sed -e 's/^/### MATCH: /'
	cat $matches_for_this_name >> $matches_for_all_names

	# Only process a page if we can see something that looks
	# like a function prototype for this name in the page

        if grep -q "$name *(" $page || \
	    grep -q "$name\\\\f.[\\ ]*(" $page; then 

	    # '.B name$'
	    # '.BR name [^("]*$      
	    # (The use of [^"] in the above eliminates lines
	    # like: .BR func " and " func
	    # Those lines better be done manually.)
	    cp $work_dst_file $work_src_file
            cat $work_src_file | \
		sed \
		-e "s/^.BR* $name *\$/.BR $name ()/" \
		-e "/^.BR *$name [^(\"]*\$/s/^.BR *$name /.BR $name ()/" \
		> $work_dst_file

	    # '\fBname\fP[ .,;:]'
	    # '\fBname\fP$'
	    cp $work_dst_file $work_src_file
            cat $work_src_file | \
		sed \
		-e 's/\\fB'$name'\\fP /\\fB'$name'\\fP() /g' \
		-e 's/\\fB'$name'\\fP\./\\fB'$name'\\fP()./g' \
		-e 's/\\fB'$name'\\fP,/\\fB'$name'\\fP(),/g' \
		-e 's/\\fB'$name'\\fP;/\\fB'$name'\\fP();/g' \
		-e 's/\\fB'$name'\\fP:/\\fB'$name'\\fP():/g' \
		-e 's/\\fB'$name'\\fP$/\\fB'$name'\\fP()/g' \
		> $work_dst_file

	    # '\fBname\fR[ .,;:]'
	    # '\fBname\fR$'
	    cp $work_dst_file $work_src_file
            cat $work_src_file | \
		sed \
		-e 's/\\fB'$name'\\fR /\\fB'$name'\\fR() /g' \
		-e 's/\\fB'$name'\\fR\./\\fB'$name'\\fR()./g' \
		-e 's/\\fB'$name'\\fR,/\\fB'$name'\\fR(),/g' \
		-e 's/\\fB'$name'\\fR;/\\fB'$name'\\fR();/g' \
		-e 's/\\fB'$name'\\fR:/\\fB'$name'\\fR():/g' \
		-e 's/\\fB'$name'\\fR$/\\fB'$name'\\fR()/g' \
		> $work_dst_file

	    maybechanged=1
        else
            echo "%%%%%%%%%% WARNING: NO PROTOTYPE MATCHES FOR: $name"
        fi
    done

    # If the file was changed, then:
    # show "diff -U" output to user;
    # and count number of changed lines and compare it with what 
    # we expected, displaying a warning if it wasn't what was expected

    if test $maybechanged -ne 0 && ! cmp -s $page $work_dst_file; then
        diff -u $page $work_dst_file

        made_matches=$(diff -U 0 $page $work_dst_file | grep '^\+[^+]' | \
		wc -l | awk '{print $1}')

	# The following line makes the changes -- comment it out if you 
        # just want to do a dry run to see what changes would be made.

	if test $really_do_it -ne 0; then
            cat $work_dst_file > $page
	fi

    else
        echo "### NOTHING CHANGED"
	made_matches=0
    fi

    min_match=$(cat $matches_for_all_names | \
	    sort -u | wc -l | awk '{print $1}')

    echo "### Expected matches >= $min_match"
    echo "### Made matches $made_matches"

    if test $made_matches -lt $min_match; then
        echo "%%%%%%%%%% WARNING: NOT ENOUGH MATCHES: " \
	    "$made_matches < $min_match"
    fi
    
done 

# clean up

rm -f $all_files
exit 0
Commit	Line	Data
f8fc5a23 MK	1	#!/bin/sh
	2	#
	3	# add_parens_for_own_funcs.sh
	4	#
	5	# This script is designed to fix inconsistencies in the use of
	6	# parentheses after function names in the manual pages.
	7	# It changes manual pages to add these parentheses.
	8	# The problem is how to determine what is a "function name".
	9	# The approach this script takes is the following:
	10	#
	11	# For each manual page named in the command line that contains
	12	# more than one line (i.e., skip man-page link files)
	13	# Create a set of names taken from the .SH section of the
	14	# page and from grepping all pages for names that
	15	# have .so links to this page
	16	# For each name obtained above
	17	# If we can find something that looks like a prototype on
	18	# the page, then
	19	# Try to substitute instances of that name on the page.
	20	# (instances are considered to be words formatted
	21	# using ^.[BI] or \f[BI]...\f[PR] -- this script
3511dcdb	22	# ignores unformatted instances of function names.)
f8fc5a23 MK	23	# fi
	24	# done
	25	# done
	26	#
	27	# The rationale of the above is that the most likely function names
	28	# that appear on a page are those that the manual page is describing.
	29	# It doesn't fix everything, but it catches many instances.
	30	# The rest will have to be done manually.
	31	#
	32	# This script is rather verbose because it provides a computer-assisted
	33	# solution, rather than one that is fully automated. When running it,
	34	# pipe the output through
	35	#
	36	# ... 2>&1 \| less
	37	#
	38	# and take a good look at the output. In particular, you can scan
	39	# the output for possible problems by looking for the pattern: /^%%%/
	40	# The script's output should be enough to help you determine if the
	41	# problem is real or not.
	42	#
	43	# Suggested usage (in this case to fix pages in Section 2):
	44	#
	45	# cd man2
	46	# sh add_parens_for_own_funcs.sh *.2 2>&1 \| tee changes.log \| less
	47	#
	48	# Use the "-n" option for a dry run, in order to see what would be
	49	# done, without actually doing it.
	50	#
	51	# (And, yes, there are many ways that this script could probably be
	52	# made to work faster...)
	53	#
	54	######################################################################
	55	#
404b6a03 MK	56	# (C) Copyright 2005 & 2013, Michael Kerrisk
	57	# This program is free software; you can redistribute it and/or
	58	# modify it under the terms of the GNU General Public License
	59	# as published by the Free Software Foundation; either version 2
	60	# of the License, or (at your option) any later version.
	61	#
	62	# This program is distributed in the hope that it will be useful,
	63	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	64	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	65	# GNU General Public License for more details
	66	# (http://www.gnu.org/licenses/gpl-2.0.html).
	67	#
	68	#
f8fc5a23 MK	69	#
	70
	71	file_base="tmp.$(basename $0)"
	72
	73	work_dst_file="$file_base.dst"
	74	work_src_file="$file_base.src"
	75
	76	matches_for_all_names="$file_base.all_match"
	77	matches_for_this_name="$file_base.this_match"
	78
	79	all_files="$work_dst_file $work_src_file $matches_for_all_names \
	80	$matches_for_this_name"
	81
	82	rm -f $all_files
	83
	84	# Command-line option processing
	85
	86	really_do_it=1
	87	while getopts "n" optname; do
	88	case "$optname" in
	89	n) really_do_it=0;
	90	;;
	91	*) echo "Unknown option: $OPTARG"
	92	exit 1
	93	;;
	94	esac
	95	done
	96
3511dcdb	97	shift $(( $OPTIND - 1 ))
f8fc5a23 MK	98
	99	# Only process files with > 1 line -- single-line files are link files
	100
3511dcdb	101	for page in $(wc "$@" 2> /dev/null \| awk '$1 > 1 {print $4}'\| \
f8fc5a23 MK	102	grep -v '^total'); do
	103
	104	echo ">>>>>>>>>>>>>>>>>>>>>>>>>" $page "<<<<<<<<<<<<<<<<<<<<<<<<<"
	105	echo ">>>>>>>>>>>>>>>>>>>>>>>>>" $page "<<<<<<<<<<<<<<<<<<<<<<<<<" 1>&2
	106
	107	# Extract names that follow the ".SH NAME" directive -- these will
	108	# be our guesses about function names to look for
	109
	110	sh_nlist=$(cat $page \| \
	111	awk 'BEGIN { p = 0 }
3511dcdb MK	112	/^\.SH NAME/ { p = NR }
	113	/^.SH/ && NR > p { p = 0 } # Stop at the next .SH directive
	114	p > 0 && NR > p { print $0 } # These are the lines between
	115	# the two .SH directives
f8fc5a23 MK	116	')
	117	sh_nlist=$(echo $sh_nlist \| sed -e 's/ \\-.//' -e 's/, */ /g')
	118	echo "### .SH name list:" $sh_nlist
	119
	120	# Some pages like msgop.2 don't actually list the function names in
	121	# the .SH section -- but we can try using link pages to give us
	122	# another guess at the right function names to look for
	123
	124	so_nlist=$(grep -l "^\\.so.*/$(echo $page\| \
	125	sed -e 's/\.[1-8]$//')\\." $* \| \
	126	sed -e 's/\.[1-8]$//g')
	127
	128	echo "### .so name list:" $so_nlist
	129
	130	# Combine the two lists, eliminate duplicates
	131
	132	nlist=$(echo $sh_nlist $so_nlist \| tr ' ' '\012' \| sort -u)
	133
	134	maybechanged=0
	135
	136	cp $page $work_dst_file
	137	rm -f $matches_for_all_names; # touch $matches_for_all_names
	138
	139	for rname in $nlist; do # try each name from out list for this page
	140
	141	# A very few names in .SH sections contain regexp characters!
	142
	143	name=$(echo $rname \| sed -e 's/\/\\/g' -e 's/\./\\./g' \
	144	-e 's/\[/\\[/g' -e 's/\+/\\+/g')
	145
	146	echo "########## trying $rname ##########"
	147
	148	rm -f $matches_for_this_name
	149
	150	grep "^.BR* $name *$" $page \| \
	151	>> $matches_for_this_name
	152	grep "^.BR $name [^(\"]$" $page \| \
	153	>> $matches_for_this_name
	154	grep '\\fB'"$name"'\\f[PR][ .,;:]' $page \| \
	155	>> $matches_for_this_name
	156	grep '\\fB'"$name"'\\f[PR]$' $page \| \
	157	>> $matches_for_this_name
	158
	159	cat $matches_for_this_name \| sed -e 's/^/### MATCH: /'
	160	cat $matches_for_this_name >> $matches_for_all_names
	161
	162	# Only process a page if we can see something that looks
	163	# like a function prototype for this name in the page
	164
	165	if grep -q "$name *(" $page \|\| \
	166	grep -q "$name\\\\f.[\\ ]*(" $page; then
	167
	168	# '.B name$'
	169	# '.BR name [^("]*$
	170	# (The use of [^"] in the above eliminates lines
	171	# like: .BR func " and " func
	172	# Those lines better be done manually.)
	173	cp $work_dst_file $work_src_file
	174	cat $work_src_file \| \
	175	sed \
	176	-e "s/^.BR* $name *\$/.BR $name ()/" \
	177	-e "/^.BR $name [^(\"]\$/s/^.BR *$name /.BR $name ()/" \
	178	> $work_dst_file
	179
180	# '\fBname\fP[ .,;:]'
181	# '\fBname\fP$'
182	cp $work_dst_file $work_src_file
183	cat $work_src_file \| \
184	sed \
185	-e 's/\\fB'$name'\\fP /\\fB'$name'\\fP() /g' \
186	-e 's/\\fB'$name'\\fP\./\\fB'$name'\\fP()./g' \
187	-e 's/\\fB'$name'\\fP,/\\fB'$name'\\fP(),/g' \
188	-e 's/\\fB'$name'\\fP;/\\fB'$name'\\fP();/g' \
189	-e 's/\\fB'$name'\\fP:/\\fB'$name'\\fP():/g' \
190	-e 's/\\fB'$name'\\fP$/\\fB'$name'\\fP()/g' \
191	> $work_dst_file
192
193	# '\fBname\fR[ .,;:]'
194	# '\fBname\fR$'
195	cp $work_dst_file $work_src_file
196	cat $work_src_file \| \
197	sed \
198	-e 's/\\fB'$name'\\fR /\\fB'$name'\\fR() /g' \
199	-e 's/\\fB'$name'\\fR\./\\fB'$name'\\fR()./g' \
200	-e 's/\\fB'$name'\\fR,/\\fB'$name'\\fR(),/g' \
201	-e 's/\\fB'$name'\\fR;/\\fB'$name'\\fR();/g' \
202	-e 's/\\fB'$name'\\fR:/\\fB'$name'\\fR():/g' \
203	-e 's/\\fB'$name'\\fR$/\\fB'$name'\\fR()/g' \
204	> $work_dst_file
205
206	maybechanged=1
207	else
208	echo "%%%%%%%%%% WARNING: NO PROTOTYPE MATCHES FOR: $name"
209	fi
210	done
211
212	# If the file was changed, then:
213	# show "diff -U" output to user;
214	# and count number of changed lines and compare it with what
215	# we expected, displaying a warning if it wasn't what was expected
216
217	if test $maybechanged -ne 0 && ! cmp -s $page $work_dst_file; then
218	diff -u $page $work_dst_file
219
220	made_matches=$(diff -U 0 $page $work_dst_file \| grep '^\+[^+]' \| \
221	wc -l \| awk '{print $1}')
222
223	# The following line makes the changes -- comment it out if you
224	# just want to do a dry run to see what changes would be made.
225
226	if test $really_do_it -ne 0; then
227	cat $work_dst_file > $page
228	fi
229
230	else
231	echo "### NOTHING CHANGED"
232	made_matches=0
233	fi
234
235	min_match=$(cat $matches_for_all_names \| \
236	sort -u \| wc -l \| awk '{print $1}')
237
238	echo "### Expected matches >= $min_match"
239	echo "### Made matches $made_matches"
240
241	if test $made_matches -lt $min_match; then
242	echo "%%%%%%%%%% WARNING: NOT ENOUGH MATCHES: " \
243	"$made_matches < $min_match"
244	fi
245
246	done
247
248	# clean up
249
250	rm -f $all_files
251	exit 0