contrib/texi2pod.pl

   1 #! /usr/bin/perl -w
   2
   3 # This does trivial (and I mean _trivial_) conversion of Texinfo
   4 # markup to Perl POD format.  It's intended to be used to extract
   5 # something suitable for a manpage from a Texinfo document.
   6
   7 $output = 0;
   8 $skipping = 0;
   9 %sects = ();
  10 $section = "";
  11 @icstack = ();
  12 @endwstack = ();
  13 @skstack = ();
  14 $shift = "";
  15 %defs = ();
  16 $fnno = 1;
  17
  18 while ($_ = shift) {
  19     if (/^-D(.*)$/) {
  20         if ($1 ne "") {
  21             $flag = $1;
  22         } else {
  23             $flag = shift;
  24         }
  25         die "no flag specified for -D\n"
  26             unless $flag ne "";
  27         die "flags may only contain letters, digits, hyphens, and underscores\n"
  28             unless $flag =~ /^[a-zA-Z0-9_-]+$/;
  29         $defs{$flag} = "";
  30     } elsif (/^-/) {
  31         usage();
  32     } else {
  33         $in = $_, next unless defined $in;
  34         $out = $_, next unless defined $out;
  35         usage();
  36     }
  37 }
  38
  39 if (defined $in) {
  40     open(STDIN, $in) or die "opening \"$in\": $!\n";
  41 }
  42 if (defined $out) {
  43     open(STDOUT, ">$out") or die "opening \"$out\": $!\n";
  44 }
  45
  46 while(<STDIN>)
  47 {
  48     # Certain commands are discarded without further processing.
  49     /^\@(?:
  50          [a-z]+index            # @*index: useful only in complete manual
  51          |need                  # @need: useful only in printed manual
  52          |(?:end\s+)?group      # @group .. @end group: ditto
  53          |page                  # @page: ditto
  54          |node                  # @node: useful only in .info file
  55         )\b/x and next;
  56
  57     chomp;
  58
  59     # Look for filename and title markers.
  60     /^\@setfilename\s+([^.]+)/ and $fn = $1, next;
  61     /^\@settitle\s+([^.]+)/ and $tl = $1, next;
  62
  63     # Look for blocks surrounded by @c man begin SECTION ... @c man end.
  64     # This really oughta be @ifman ... @end ifman and the like, but such
  65     # would require rev'ing all other Texinfo translators.
  66     /^\@c man begin ([A-Z]+)/ and $sect = $1, $output = 1, next;
  67     /^\@c man end/ and do {
  68         $sects{$sect} = "" unless exists $sects{$sect};
  69         $sects{$sect} .= postprocess($section);
  70         $section = "";
  71         $output = 0;
  72         next;
  73     };
  74     next unless $output;
  75
  76     # Discard comments.  (Can't do it above, because then we'd never see
  77     # @c man lines.)
  78     /^\@c\b/ and next;
  79
  80     # End-block handler goes up here because it needs to operate even
  81     # if we are skipping.
  82     /^\@end\s+([a-z]+)/ and do {
  83         # Ignore @end foo, where foo is not an operation which may
  84         # cause us to skip, if we are presently skipping.
  85         my $ended = $1;
  86         next if $skipping && $ended !~ /^(?:ifset|ifclear|ignore|menu)$/;
  87
  88         die "\@end $ended without \@$ended at line $.\n" unless defined $endw;
  89         die "\@$endw ended by \@end $ended at line $.\n" unless $ended eq $endw;
  90
  91         $endw = pop @endwstack;
  92
  93         if ($ended =~ /^(?:ifset|ifclear|ignore|menu)$/) {
  94             $skipping = pop @skstack;
  95             next;
  96         } elsif ($ended =~ /^(?:example|smallexample)$/) {
  97             $shift = "";
  98             $_ = "";    # need a paragraph break
  99         } elsif ($ended =~ /^(?:itemize|enumerate|table)$/) {
 100             $_ = "\n=back\n";
 101             $ic = pop @icstack;
 102         } else {
 103             die "unknown command \@end $ended at line $.\n";
 104         }
 105     };
 106
 107     # We must handle commands which can cause skipping even while we
 108     # are skipping, otherwise we will not process nested conditionals
 109     # correctly.
 110     /^\@ifset\s+([a-zA-Z0-9_-]+)/ and do {
 111         push @endwstack, $endw;
 112         push @skstack, $skipping;
 113         $endw = "ifset";
 114         $skipping = 1 unless exists $defs{$1};
 115         next;
 116     };
 117
 118     /^\@ifclear\s+([a-zA-Z0-9_-]+)/ and do {
 119         push @endwstack, $endw;
 120         push @skstack, $skipping;
 121         $endw = "ifclear";
 122         $skipping = 1 if exists $defs{$1};
 123         next;
 124     };
 125
 126     /^\@(ignore|menu)\b/ and do {
 127         push @endwstack, $endw;
 128         push @skstack, $skipping;
 129         $endw = $1;
 130         $skipping = 1;
 131         next;
 132     };
 133
 134     next if $skipping;
 135
 136     # Character entities.  First the ones that can be replaced by raw text
 137     # or discarded outright:
 138     s/\@copyright\{\}/(c)/g;
 139     s/\@dots\{\}/.../g;
 140     s/\@enddots\{\}/..../g;
 141     s/\@([.!? ])/$1/g;
 142     s/\@[:-]//g;
 143     s/\@bullet(?:\{\})?/*/g;
 144     s/\@TeX\{\}/TeX/g;
 145     s/\@pounds\{\}/\#/g;
 146     s/\@minus(?:\{\})?/-/g;
 147
 148     # Now the ones that have to be replaced by special escapes
 149     # (which will be turned back into text by unmunge())
 150     s/&/&amp;/g;
 151     s/\@\{/&lbrace;/g;
 152     s/\@\}/&rbrace;/g;
 153     s/\@\@/&at;/g;
 154     # POD doesn't interpret E<> inside a verbatim block.
 155     if ($shift eq "") {
 156         s/</&lt;/g;
 157         s/>/&gt;/g;
 158     } else {
 159         s/</&LT;/g;
 160         s/>/&GT;/g;
 161     }
 162
 163     # Single line command handlers.
 164     /^\@set\s+([a-zA-Z0-9_-]+)\s*(.*)$/ and $defs{$1} = $2, next;
 165     /^\@clear\s+([a-zA-Z0-9_-]+)/ and delete $defs{$1}, next;
 166
 167     /^\@section\s+(.+)$/ and $_ = "\n=head2 $1\n";
 168     /^\@subsection\s+(.+)$/ and $_ = "\n=head3 $1\n";
 169
 170     # Block command handlers:
 171     /^\@itemize\s+(\@[a-z]+|\*|-)/ and do {
 172         push @endwstack, $endw;
 173         push @icstack, $ic;
 174         $ic = $1;
 175         $_ = "\n=over 4\n";
 176         $endw = "itemize";
 177     };
 178
 179     /^\@enumerate(?:\s+([A-Z0-9]+))?/ and do {
 180         push @endwstack, $endw;
 181         push @icstack, $ic;
 182         if (defined $1) {
 183             $ic = $1 . ".";
 184         } else {
 185             $ic = "1.";
 186         }
 187         $_ = "\n=over 4\n";
 188         $endw = "enumerate";
 189     };
 190
 191     /^\@table\s+(\@[a-z]+)/ and do {
 192         push @endwstack, $endw;
 193         push @icstack, $ic;
 194         $ic = $1;
 195         $ic =~ s/\@(?:samp|strong|key|gcctabopt|env)/B/;
 196         $ic =~ s/\@(?:code|kbd)/C/;
 197         $ic =~ s/\@(?:dfn|var|emph|cite|i)/I/;
 198         $ic =~ s/\@(?:file)/F/;
 199         $_ = "\n=over 4\n";
 200         $endw = "table";
 201     };
 202
 203     /^\@((?:small)?example)/ and do {
 204         push @endwstack, $endw;
 205         $endw = $1;
 206         $shift = "\t";
 207         $_ = "";        # need a paragraph break
 208     };
 209
 210     /^\@itemx?\s*(.+)?$/ and do {
 211         if (defined $1) {
 212             # Entity escapes prevent munging by the <> processing below.
 213             $_ = "\n=item $ic\&LT;$1\&GT;\n";
 214         } else {
 215             $_ = "\n=item $ic\n";
 216             $ic =~ y/A-Ya-y1-8/B-Zb-z2-9/;
 217         }
 218     };
 219
 220     $section .= $shift.$_."\n";
 221 }
 222
 223 die "No filename or title\n" unless defined $fn && defined $tl;
 224
 225 $sects{NAME} = "$fn \- $tl\n";
 226 $sects{FOOTNOTES} .= "=back\n" if exists $sects{FOOTNOTES};
 227
 228 for $sect (qw(NAME SYNOPSIS DESCRIPTION OPTIONS ENVIRONMENT FILES
 229               BUGS NOTES FOOTNOTES SEEALSO AUTHOR COPYRIGHT)) {
 230     if(exists $sects{$sect}) {
 231         $head = $sect;
 232         $head =~ s/SEEALSO/SEE ALSO/;
 233         print "=head1 $head\n\n";
 234         print scalar unmunge ($sects{$sect});
 235         print "\n";
 236     }
 237 }
 238
 239 sub usage
 240 {
 241     die "usage: $0 [-D toggle...] [infile [outfile]]\n";
 242 }
 243
 244 sub postprocess
 245 {
 246     local $_ = $_[0];
 247
 248     # @value{foo} is replaced by whatever 'foo' is defined as.
 249     s/\@value\{([a-zA-Z0-9_-]+)\}/$defs{$1}/g;
 250
 251     # Formatting commands.
 252     # Temporary escape for @r.
 253     s/\@r\{([^\}]*)\}/R<$1>/g;
 254     s/\@(?:dfn|var|emph|cite|i)\{([^\}]*)\}/I<$1>/g;
 255     s/\@(?:code|kbd)\{([^\}]*)\}/C<$1>/g;
 256     s/\@(?:gccoptlist|samp|strong|key|option|env|command|b)\{([^\}]*)\}/B<$1>/g;
 257     s/\@sc\{([^\}]*)\}/\U$1/g;
 258     s/\@file\{([^\}]*)\}/F<$1>/g;
 259     s/\@w\{([^\}]*)\}/S<$1>/g;
 260     s/\@(?:dmn|math)\{([^\}]*)\}/$1/g;
 261
 262     # Handle @r inside bold.
 263     1 while s/B<((?:[^<>]*|I<[^<>*]*>)*)R<([^>]*)>/B<$1>${2}B</g;
 264
 265     # Cross references are thrown away, as are @noindent and @refill.
 266     # (@noindent is impossible in .pod, and @refill is unnecessary.)
 267     # @* is also impossible in .pod; we discard it and any newline that
 268     # follows it.  Similarly, our macro @gol must be discarded.
 269
 270     s/\(?\@xref\{(?:[^\}]*)\}(?:[^.<]|(?:<[^<>]*>))*\.\)?//g;
 271     s/\s+\(\@pxref\{(?:[^\}]*)\}\)//g;
 272     s/;\s+\@pxref\{(?:[^\}]*)\}//g;
 273     s/\@noindent\s*//g;
 274     s/\@refill//g;
 275     s/\@gol//g;
 276     s/\@\*\s*\n?//g;
 277
 278     # @uref can take one, two, or three arguments, with different
 279     # semantics each time.  @url and @email are just like @uref with
 280     # one argument, for our purposes.
 281     s/\@(?:uref|url|email)\{([^\},]*)\}/&lt;B<$1>&gt;/g;
 282     s/\@uref\{([^\},]*),([^\},]*)\}/$2 (C<$1>)/g;
 283     s/\@uref\{([^\},]*),([^\},]*),([^\},]*)\}/$3/g;
 284
 285     # Turn B<blah I<blah> blah> into B<blah> I<blah> B<blah> to
 286     # match Texinfo semantics of @emph inside @samp.
 287     s/&LT;/</g;
 288     s/&GT;/>/g;
 289     1 while (s/B<([^<>]*)I<([^>]+)>/B<$1>I<$2>B</g);
 290     1 while (s/I<([^<>]*)B<([^>]+)>/I<$1>B<$2>I</g);
 291     s/[BI]<>//g;
 292     s/([BI])<(\s+)([^>]+)>/$2$1<$3>/g;
 293     s/([BI])<([^>]+?)(\s+)>/$1<$2>$3/g;
 294
 295     # Extract footnotes.  This has to be done after all other
 296     # processing because otherwise the regexp will choke on formatting
 297     # inside @footnote.
 298     while (/\@footnote/g) {
 299         s/\@footnote\{([^\}]+)\}/[$fnno]/;
 300         add_footnote($1, $fnno);
 301         $fnno++;
 302     }
 303
 304     return $_;
 305 }
 306
 307 sub unmunge
 308 {
 309     # Replace escaped symbols with their equivalents.
 310     local $_ = $_[0];
 311
 312     s/&lt;/E<lt>/g;
 313     s/&gt;/E<gt>/g;
 314     s/&lbrace;/\{/g;
 315     s/&rbrace;/\}/g;
 316     s/&at;/\@/g;
 317     s/&amp;/&/g;
 318     return $_;
 319 }
 320
 321 sub add_footnote
 322 {
 323     unless (exists $sects{FOOTNOTES}) {
 324         $sects{FOOTNOTES} = "\n=over 4\n\n";
 325     }
 326
 327     $sects{FOOTNOTES} .= "=item $fnno.\n\n"; $fnno++;
 328     $sects{FOOTNOTES} .= $_[0];
 329     $sects{FOOTNOTES} .= "\n\n";
 330 }
 331