From 5b5e8a29c13aa583f8a674cb65f92a204da24be8 Mon Sep 17 00:00:00 2001 From: Andrew Dunstan Date: Mon, 6 Oct 2025 07:53:31 -0400 Subject: [PATCH] Revert "Improve docs syntax checking" This reverts commit b292256272623d1a7532f3893a4565d1944742b4. Further discussion is needed Discussion: https://postgr.es/m/0198ec0f-0269-4cf4-b4a7-22c05b3047cb@eisentraut.org --- .cirrus.tasks.yml | 3 -- doc/src/sgml/Makefile | 18 ++++++-- doc/src/sgml/meson.build | 23 --------- doc/src/sgml/sgml_syntax_check.pl | 77 ------------------------------- 4 files changed, 15 insertions(+), 106 deletions(-) delete mode 100755 doc/src/sgml/sgml_syntax_check.pl diff --git a/.cirrus.tasks.yml b/.cirrus.tasks.yml index 1c937247a9a..eca9d62fc22 100644 --- a/.cirrus.tasks.yml +++ b/.cirrus.tasks.yml @@ -627,8 +627,6 @@ task: TEST_JOBS: 8 IMAGE: ghcr.io/cirruslabs/macos-runner:sonoma - XML_CATALOG_FILES: /opt/local/share/xml/docbook/4.5/catalog.xml - CIRRUS_WORKING_DIR: ${HOME}/pgsql/ CCACHE_DIR: ${HOME}/ccache MACPORTS_CACHE: ${HOME}/macports-cache @@ -643,7 +641,6 @@ task: MACOS_PACKAGE_LIST: >- ccache - docbook-xml-4.5 icu kerberos5 lz4 diff --git a/doc/src/sgml/Makefile b/doc/src/sgml/Makefile index 4d45b805da2..b53b2694a6b 100644 --- a/doc/src/sgml/Makefile +++ b/doc/src/sgml/Makefile @@ -124,7 +124,7 @@ ifeq ($(STYLE),website) XSLTPROC_HTML_FLAGS += --param website.stylesheet 1 endif -html: check html-stamp +html: html-stamp html-stamp: stylesheet.xsl postgres-full.xml $(ALL_IMAGES) $(XSLTPROC) $(XMLINCLUDE) $(XSLTPROCFLAGS) $(XSLTPROC_HTML_FLAGS) $(wordlist 1,2,$^) @@ -200,8 +200,8 @@ MAKEINFO = makeinfo ## # Quick syntax check without style processing -check: postgres.sgml $(ALL_SGML) - $(PERL) $(srcdir)/sgml_syntax_check.pl --xmllint "$(XMLLINT)" --srcdir $(srcdir) +check: postgres.sgml $(ALL_SGML) check-tabs check-nbsp + $(XMLLINT) $(XMLINCLUDE) --noout --valid $< ## @@ -261,6 +261,18 @@ clean-man: endif # sqlmansectnum != 7 +# tabs are harmless, but it is best to avoid them in SGML files +check-tabs: + @( ! grep ' ' $(wildcard $(srcdir)/*.sgml $(srcdir)/func/*.sgml $(srcdir)/ref/*.sgml $(srcdir)/*.xsl) ) || \ + (echo "Tabs appear in SGML/XML files" 1>&2; exit 1) + +# Non-breaking spaces are harmless, but it is best to avoid them in SGML files. +# Use perl command because non-GNU grep or sed could not have hex escape sequence. +check-nbsp: + @ ( $(PERL) -ne '/\xC2\xA0/ and print("$$ARGV:$$_"),$$n++; END {exit($$n>0)}' \ + $(wildcard $(srcdir)/*.sgml $(srcdir)/func/*.sgml $(srcdir)/ref/*.sgml $(srcdir)/*.xsl $(srcdir)/images/*.xsl) ) || \ + (echo "Non-breaking spaces appear in SGML/XML files" 1>&2; exit 1) + ## ## Clean ## diff --git a/doc/src/sgml/meson.build b/doc/src/sgml/meson.build index ce0dea587cd..6ae192eac68 100644 --- a/doc/src/sgml/meson.build +++ b/doc/src/sgml/meson.build @@ -306,26 +306,3 @@ endif if alldocs.length() != 0 alias_target('alldocs', alldocs) endif - -sgml_syntax_check = files( - 'sgml_syntax_check.pl' -) - -test( - 'sgml_syntax_check', - perl, - protocol: 'exitcode', - suite: 'doc', - args: [ - sgml_syntax_check, - '--xmllint', - '@0@ --nonet'.format(xmllint_bin.full_path()), - '--srcdir', - meson.current_source_dir(), - '--builddir', - meson.current_build_dir(), - ], - depends: doc_generated -) - -testprep_targets += doc_generated diff --git a/doc/src/sgml/sgml_syntax_check.pl b/doc/src/sgml/sgml_syntax_check.pl deleted file mode 100755 index 2264700a453..00000000000 --- a/doc/src/sgml/sgml_syntax_check.pl +++ /dev/null @@ -1,77 +0,0 @@ -# /usr/bin/perl - -# Copyright (c) 2025, PostgreSQL Global Development Group - -# doc/src/sgml/sgml_syntax_check.pl - -use strict; -use warnings FATAL => 'all'; -use Getopt::Long; - -use File::Find; - -my $xmllint; -my $srcdir; -my $builddir; - -GetOptions( - 'xmllint:s' => \$xmllint, - 'srcdir:s' => \$srcdir, - 'builddir:s' => \$builddir) or die "$0: wrong arguments"; - -die "$0: --srcdir must be specified\n" unless defined $srcdir; - -my $xmlinclude = "--path . --path $srcdir"; -$xmlinclude .= " --path $builddir" if defined $builddir; - -# find files to process - all the sgml and xsl files (including in subdirectories) -my @files_to_process; -my @dirs_to_search = ($srcdir); -push @dirs_to_search, $builddir if defined $builddir; -find( - sub { - return unless -f $_; - return if $_ !~ /\.(sgml|xsl)$/; - push @files_to_process, $File::Find::name; - }, - @dirs_to_search,); - -# tabs and non-breaking spaces are harmless, but it is best to avoid them in SGML files -sub check_tabs_and_nbsp -{ - my $errors = 0; - for my $f (@files_to_process) - { - open my $fh, "<:encoding(UTF-8)", $f or die "Can't open $f: $!"; - my $line_no = 0; - while (<$fh>) - { - $line_no++; - if (/\t/) - { - print STDERR "Tab found in $f:$line_no\n"; - $errors++; - } - if (/\xC2\xA0/) - { - print STDERR "$f:$line_no: contains non-breaking space\n"; - $errors++; - } - } - close($fh); - } - - if ($errors) - { - die "Tabs and/or non-breaking spaces appear in SGML/XML files\n"; - } -} - -sub run_xmllint -{ - my $cmd = "$xmllint $xmlinclude --noout --valid postgres.sgml"; - system($cmd) == 0 or die "xmllint validation failed\n"; -} - -run_xmllint(); -check_tabs_and_nbsp(); -- 2.47.3