#
# SPDX-License-Identifier: curl
#
-# Input: a cmdline docs markdown, it gets modified *in place*
+# Input: cmdline docs markdown files, they get modified *in place*
#
-# The main purpose is to strip off the leading meta-data part, but also to
-# clean up whatever else the spell checker might have a problem with that we
-# still deem is fine.
-
-my $header = 1;
-while(1) {
- # set this if the markdown has no meta-data header to skip
- if($ARGV[0] eq "--no-header") {
- shift @ARGV;
- $header = 0;
+# Strip off the leading meta-data/header part, remove all known curl symbols
+# and long command line options. Also clean up whatever else the spell checker
+# might have a problem with that we still deem is fine.
+#
+
+open(S, "<./docs/libcurl/symbols-in-versions")
+ || die "can't find symbols-in-versions";
+while(<S>) {
+ if(/^([^ ]*) /) {
+ push @asyms, $1;
+ }
+}
+close(S);
+
+# init the opts table with "special" options not easy to figure out
+my @aopts = (
+ '--ftp-ssl-reqd', # old alias
+ );
+
+open(O, "<./docs/options-in-versions")
+ || die "can't find options-in-versions";
+while(<O>) {
+ chomp;
+ if(/^([^ ]+)/) {
+ my $o = $1;
+ push @aopts, $o;
+ if($o =~ /^--no-(.*)/) {
+ # for the --no options, also make one without it
+ push @aopts, "--$1";
+ }
+ elsif($o =~ /^--disable-(.*)/) {
+ # for the --disable options, also make the special ones
+ push @aopts, "--$1";
+ push @aopts, "--no-$1";
+ }
+ }
+}
+close(O);
+
+open(C, "<./.github/scripts/spellcheck.curl")
+ || die "can't find spellcheck.curl";
+while(<C>) {
+ if(/^\#/) {
+ next;
}
- else {
- last;
+ chomp;
+ if(/^([^ ]+)/) {
+ push @asyms, $1;
}
}
+close(C);
-my $f = $ARGV[0];
+# longest symbols first
+my @syms = sort { length($b) <=> length($a) } @asyms;
-open(F, "<$f") or die;
+# longest cmdline options first
+my @opts = sort { length($b) <=> length($a) } @aopts;
-my $ignore = $header;
-my $sepcount = 0;
-my @out;
-while(<F>) {
- if(/^---/ && $header) {
- if(++$sepcount == 2) {
+sub process {
+ my ($f) = @_;
+
+ my $ignore = 0;
+ my $sepcount = 0;
+ my $out;
+ my $line = 0;
+ open(F, "<$f") or die;
+
+ while(<F>) {
+ $line++;
+ if(/^---/ && ($line == 1)) {
+ $ignore = 1;
+ next;
+ }
+ elsif(/^---/ && $ignore) {
$ignore = 0;
+ next;
}
- next;
- }
- next if($ignore);
+ next if($ignore);
+
+ my $l = $_;
- # strip out backticked words
- $_ =~ s/`[^`]+`//g;
+ # strip out backticked words
+ $l =~ s/`[^`]+`//g;
- # strip out all long command line options
- $_ =~ s/--[a-z0-9-]+//g;
+ # **bold**
+ $l =~ s/\*\*(\S.*?)\*\*//g;
+ # *italics*
+ $l =~ s/\*(\S.*?)\*//g;
- # strip out https URLs, we don't want them spellchecked
- $_ =~ s!https://[a-z0-9\#_/.-]+!!gi;
+ # strip out https URLs, we don't want them spellchecked
+ $l =~ s!https://[a-z0-9\#_/.-]+!!gi;
- push @out, $_;
+ $out .= $l;
+ }
+ close(F);
+
+ # cut out all known curl cmdline options
+ map { $out =~ s/$_//g; } (@opts);
+
+ # cut out all known curl symbols
+ map { $out =~ s/\b$_\b//g; } (@syms);
+
+ if(!$ignore) {
+ open(O, ">$f") or die;
+ print O $out;
+ close(O);
+ }
}
-close(F);
-if(!$ignore) {
- open(O, ">$f") or die;
- print O @out;
- close(O);
+for my $f (@ARGV) {
+ process($f);
}
+++ /dev/null
-#!/usr/bin/env perl
-# Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
-#
-# SPDX-License-Identifier: curl
-#
-# Given: a libcurl curldown man page
-# Outputs: the same file, minus the SYNOPSIS and the EXAMPLE sections
-#
-
-my $f = $ARGV[0];
-
-open(F, "<$f") or die;
-
-my @out;
-my $ignore = 0;
-while(<F>) {
- if($_ =~ /^# (SYNOPSIS|EXAMPLE)/) {
- $ignore = 1;
- }
- elsif($ignore && ($_ =~ /^# [A-Z]/)) {
- $ignore = 0;
- }
- elsif(!$ignore) {
- # **bold**
- $_ =~ s/\*\*(\S.*?)\*\*//g;
- # *italics*
- $_ =~ s/\*(\S.*?)\*//g;
-
- $_ =~ s/CURL(M|SH|U|H)code//g;
- $_ =~ s/CURL_[A-Z0-9_]*//g;
- $_ =~ s/CURLALTSVC_[A-Z0-9_]*//g;
- $_ =~ s/CURLAUTH_[A-Z0-9_]*//g;
- $_ =~ s/CURLE_[A-Z0-9_]*//g;
- $_ =~ s/CURLFORM_[A-Z0-9_]*//g;
- $_ =~ s/CURLFTP_[A-Z0-9_]*//g;
- $_ =~ s/CURLFTPAUTH_[A-Z0-9_]*//g;
- $_ =~ s/CURLFTPMETHOD_[A-Z0-9_]*//g;
- $_ =~ s/CURLFTPSSL_[A-Z0-9_]*//g;
- $_ =~ s/CURLGSSAPI_[A-Z0-9_]*//g;
- $_ =~ s/CURLHEADER_[A-Z0-9_]*//g;
- $_ =~ s/CURLINFO_[A-Z0-9_]*//g;
- $_ =~ s/CURLM_[A-Z0-9_]*//g;
- $_ =~ s/CURLMIMEOPT_[A-Z0-9_]*//g;
- $_ =~ s/CURLMOPT_[A-Z0-9_]*//g;
- $_ =~ s/CURLOPT_[A-Z0-9_]*//g;
- $_ =~ s/CURLPIPE_[A-Z0-9_]*//g;
- $_ =~ s/CURLPROTO_[A-Z0-9_]*//g;
- $_ =~ s/CURLPROXY_[A-Z0-9_]*//g;
- $_ =~ s/CURLPX_[A-Z0-9_]*//g;
- $_ =~ s/CURLSHE_[A-Z0-9_]*//g;
- $_ =~ s/CURLSHOPT_[A-Z0-9_]*//g;
- $_ =~ s/CURLSSLOPT_[A-Z0-9_]*//g;
- $_ =~ s/CURLSSH_[A-Z0-9_]*//g;
- $_ =~ s/CURLSSLBACKEND_[A-Z0-9_]*//g;
- $_ =~ s/CURLU_[A-Z0-9_]*//g;
- $_ =~ s/CURLUPART_[A-Z0-9_]*//g;
- #$_ =~ s/\bCURLU\b//g; # stand-alone CURLU
- $_ =~ s/CURLUE_[A-Z0-9_]*//g;
- $_ =~ s/CURLHE_[A-Z0-9_]*//g;
- $_ =~ s/CURLWS_[A-Z0-9_]*//g;
- $_ =~ s/CURLKH[A-Z0-9_]*//g;
- $_ =~ s/CURLUPART_[A-Z0-9_]*//g;
- $_ =~ s/CURLUSESSL_[A-Z0-9_]*//g;
- $_ =~ s/CURLPAUSE_[A-Z0-9_]*//g;
- $_ =~ s/CURLHSTS_[A-Z0-9_]*//g;
- $_ =~ s/curl_global_([a-z_]*)//g;
- $_ =~ s/curl_(strequal|strnequal|formadd|waitfd|formget|getdate|formfree)//g;
- $_ =~ s/curl_easy_([a-z]*)//g;
- $_ =~ s/curl_multi_([a-z_]*)//g;
- $_ =~ s/curl_mime_(subparts|addpart|filedata|data_cb)//g;
- $_ =~ s/curl_ws_(send|recv|meta)//g;
- $_ =~ s/curl_url_(dup)//g;
- $_ =~ s/curl_pushheader_by(name|num)//g;
- $_ =~ s/libcurl-(env|ws)//g;
- $_ =~ s/libcurl\\-(env|ws)//g;
- $_ =~ s/(^|\W)((tftp|https|http|ftp):\/\/[a-z0-9\-._~%:\/?\#\[\]\@!\$&'()*+,;=\\]+)//gi;
- push @out, $_;
- }
-}
-close(F);
-
-open(O, ">$f") or die;
-for my $l (@out) {
- print O $l;
-}
-close(O);
--- /dev/null
+# Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
+#
+# SPDX-License-Identifier: curl
+#
+# common variable types + structs
+# callback typedefs
+# public functions names
+# some man page names
+curl_fileinfo
+curl_forms
+curl_hstsentry
+curl_httppost
+curl_index
+curl_khkey
+curl_pushheaders
+curl_waitfd
+CURLcode
+CURLformoption
+CURLHcode
+CURLMcode
+CURLMsg
+CURLSHcode
+CURLUcode
+curl_calloc_callback
+curl_chunk_bgn_callback
+curl_chunk_end_callback
+curl_conv_callback
+curl_debug_callback
+curl_fnmatch_callback
+curl_formget_callback
+curl_free_callback
+curl_hstsread_callback
+curl_hstswrite_callback
+curl_ioctl_callback
+curl_malloc_callback
+curl_multi_timer_callback
+curl_opensocket_callback
+curl_prereq_callback
+curl_progress_callback
+curl_push_callback
+curl_read_callback
+curl_realloc_callback
+curl_resolver_start_callback
+curl_seek_callback
+curl_socket_callback
+curl_sockopt_callback
+curl_ssl_ctx_callback
+curl_strdup_callback
+curl_trailer_callback
+curl_write_callback
+curl_xferinfo_callback
+curl_strequal
+curl_strnequal
+curl_mime_init
+curl_mime_free
+curl_mime_addpart
+curl_mime_name
+curl_mime_filename
+curl_mime_type
+curl_mime_encoder
+curl_mime_data
+curl_mime_filedata
+curl_mime_data_cb
+curl_mime_subparts
+curl_mime_headers
+curl_formadd
+curl_formget
+curl_formfree
+curl_getdate
+curl_getenv
+curl_version
+curl_easy_escape
+curl_escape
+curl_easy_unescape
+curl_unescape
+curl_free
+curl_global_init
+curl_global_init_mem
+curl_global_cleanup
+curl_global_trace
+curl_global_sslset
+curl_slist_append
+curl_slist_free_all
+curl_getdate
+curl_share_init
+curl_share_setopt
+curl_share_cleanup
+curl_version_info
+curl_easy_strerror
+curl_share_strerror
+curl_easy_pause
+curl_easy_ssls_import
+curl_easy_ssls_export
+curl_easy_init
+curl_easy_setopt
+curl_easy_perform
+curl_easy_cleanup
+curl_easy_getinfo
+curl_easy_duphandle
+curl_easy_reset
+curl_easy_recv
+curl_easy_send
+curl_easy_upkeep
+curl_easy_header
+curl_easy_nextheader
+curl_mprintf
+curl_mfprintf
+curl_msprintf
+curl_msnprintf
+curl_mvprintf
+curl_mvfprintf
+curl_mvsprintf
+curl_mvsnprintf
+curl_maprintf
+curl_mvaprintf
+curl_multi_init
+curl_multi_add_handle
+curl_multi_remove_handle
+curl_multi_fdset
+curl_multi_waitfds
+curl_multi_wait
+curl_multi_poll
+curl_multi_wakeup
+curl_multi_perform
+curl_multi_cleanup
+curl_multi_info_read
+curl_multi_strerror
+curl_multi_socket
+curl_multi_socket_action
+curl_multi_socket_all
+curl_multi_timeout
+curl_multi_setopt
+curl_multi_assign
+curl_multi_get_handles
+curl_pushheader_bynum
+curl_pushheader_byname
+curl_multi_waitfds
+curl_easy_option_by_name
+curl_easy_option_by_id
+curl_easy_option_next
+curl_url
+curl_url_cleanup
+curl_url_dup
+curl_url_get
+curl_url_set
+curl_url_strerror
+curl_ws_recv
+curl_ws_send
+curl_ws_meta
+libcurl-env
+libcurl-ws
persist-credentials: false
name: checkout
- - name: trim all man page *.md files
- run: find docs -name "*.md" ! -name "_*" -print0 | xargs -0 -n1 .github/scripts/cleancmd.pl
-
- - name: trim libcurl man page *.md files
- run: find docs/libcurl \( -name "curl_*.md" -o -name "libcurl*.md" \) -print0 | xargs -0 -n1 .github/scripts/cleanspell.pl
-
- - name: trim libcurl option man page *.md files
- run: find docs/libcurl/opts -name "CURL*.md" -print0 | xargs -0 -n1 .github/scripts/cleanspell.pl
-
- - name: trim cmdline docs markdown _*.md files
- run: find docs/cmdline-opts -name "_*.md" -print0 | xargs -0 -n1 .github/scripts/cleancmd.pl --no-header
-
- - name: trim docs/ markdown _*.md files
- run: git ls-files docs/*.md docs/internals/*.md | xargs -n1 .github/scripts/cleancmd.pl --no-header
+ - name: trim all *.md files in docs/
+ run: .github/scripts/cleancmd.pl $(find docs -name "*.md")
- name: setup the custom wordlist
run: grep -v '^#' .github/scripts/spellcheck.words > wordlist.txt