]> git.ipfire.org Git - thirdparty/cups.git/blob - man/mantohtml.c
Initial work on man page modernization (STR #4372)
[thirdparty/cups.git] / man / mantohtml.c
1 /*
2 * "$Id$"
3 *
4 * Man page to HTML conversion program.
5 *
6 * Copyright 2007-2010, 2014 by Apple Inc.
7 * Copyright 2004-2006 by Easy Software Products.
8 *
9 * These coded instructions, statements, and computer programs are the
10 * property of Apple Inc. and are protected by Federal copyright
11 * law. Distribution and use rights are outlined in the file "LICENSE.txt"
12 * which should have been included with this file. If this file is
13 * file is missing or damaged, see the license at "http://www.cups.org/".
14 */
15
16 /*
17 * Include necessary headers.
18 */
19
20 #include <cups/string-private.h>
21 #include <cups/array-private.h>
22 #include <unistd.h>
23
24
25 /*
26 * Local globals...
27 */
28
29 static const char /* Start/end tags for fonts */
30 * const start_fonts[] = { "", "<b>", "<i>" },
31 * const end_fonts[] = { "", "</b>", "</i>" };
32
33
34 /*
35 * Local functions...
36 */
37
38 static void html_alternate(const char *s, const char *first, const char *second, FILE *fp);
39 static void html_fputs(const char *s, int *font, FILE *fp);
40 static void html_putc(int ch, FILE *fp);
41 static void strmove(char *d, const char *s);
42
43
44 /*
45 * 'main()' - Convert a man page to HTML.
46 */
47
48 int /* O - Exit status */
49 main(int argc, /* I - Number of command-line args */
50 char *argv[]) /* I - Command-line arguments */
51 {
52 FILE *infile, /* Input file */
53 *outfile; /* Output file */
54 char line[1024], /* Line from file */
55 *lineptr, /* Pointer into line */
56 name[1024]; /* Man page name */
57 int section = -1, /* Man page section */
58 pre = 0, /* Preformatted */
59 font = 0, /* Current font */
60 linenum = 0; /* Current line number */
61 const char *list = NULL; /* Current list, if any */
62 const char *post = NULL; /* Text to add after the current line */
63
64
65 /*
66 * Check arguments...
67 */
68
69 if (argc > 3)
70 {
71 fputs("Usage: mantohtml [filename.man [filename.html]]\n", stderr);
72 return (1);
73 }
74
75 /*
76 * Open files as needed...
77 */
78
79 if (argc > 1)
80 {
81 if ((infile = fopen(argv[1], "r")) == NULL)
82 {
83 perror(argv[1]);
84 return (1);
85 }
86 }
87 else
88 infile = stdin;
89
90 if (argc > 2)
91 {
92 if ((outfile = fopen(argv[2], "w")) == NULL)
93 {
94 perror(argv[2]);
95 fclose(infile);
96 return (1);
97 }
98 }
99 else
100 outfile = stdout;
101
102 /*
103 * Read from input and write the output...
104 */
105
106 fputs("<!DOCTYPE HTML>\n"
107 "<html>\n"
108 "<!-- SECTION: Man Pages -->\n"
109 "<head>\n"
110 "\t<link rel=\"stylesheet\" type=\"text/css\" "
111 "href=\"../cups-printable.css\">\n", outfile);
112
113 while (fgets(line, sizeof(line), infile))
114 {
115 size_t linelen = strlen(line); /* Length of line */
116
117 if (linelen > 0 && line[linelen - 1] == '\n')
118 line[linelen - 1] = '\0';
119
120 linenum ++;
121
122 if (line[0] == '.')
123 {
124 /*
125 * Strip leading whitespace...
126 */
127
128 while (line[1] == ' ' || line[1] == '\t')
129 strmove(line + 1, line + 2);
130
131 /*
132 * Process man page commands...
133 */
134
135 if (!strncmp(line, ".TH ", 4) && section < 0)
136 {
137 /*
138 * Grab man page title...
139 */
140
141 sscanf(line + 4, "%s%d", name, &section);
142
143 fprintf(outfile,
144 "\t<title>%s(%d)</title>\n"
145 "</head>\n"
146 "<body>\n"
147 "<h1 class=\"title\">%s(%d)</h1>\n"
148 "%s",
149 name, section, name, section, start_fonts[font]);
150 }
151 else if (section < 0)
152 continue;
153 else if (!strncmp(line, ".SH ", 4) || !strncmp(line, ".SS ", 4))
154 {
155 /*
156 * Grab heading...
157 */
158
159 int first = 1;
160
161 fputs(end_fonts[font], outfile);
162 font = 0;
163
164 if (list)
165 {
166 fprintf(outfile, "</%s>\n", list);
167 list = NULL;
168 }
169
170 if (line[2] == 'H')
171 fputs("<h2 class=\"title\"><a name=\"", outfile);
172 else
173 fputs("<h3><a name=\"", outfile);
174
175 for (lineptr = line + 4; *lineptr; lineptr ++)
176 if (*lineptr == '\"')
177 continue;
178 else if (isalnum(*lineptr & 255))
179 html_putc(*lineptr, outfile);
180 else
181 html_putc('_', outfile);
182
183 fputs("\">", outfile);
184
185 for (lineptr = line + 4; *lineptr; lineptr ++)
186 {
187 if (*lineptr == '\"')
188 continue;
189 else if (*lineptr == ' ')
190 {
191 html_putc(' ', outfile);
192
193 first = 1;
194 }
195 else
196 {
197 if (first)
198 html_putc(*lineptr, outfile);
199 else
200 html_putc(tolower(*lineptr & 255), outfile);
201
202 first = 0;
203 }
204 }
205
206 if (line[2] == 'H')
207 fputs("</a></h2>\n", outfile);
208 else
209 fputs("</a></h3>\n", outfile);
210 }
211 else if (!strncmp(line, ".B ", 3))
212 {
213 /*
214 * Grab bold text...
215 */
216
217 fputs(end_fonts[font], outfile);
218 font = 0;
219
220 html_alternate(line + 3, "b", "b", outfile);
221 }
222 else if (!strncmp(line, ".I ", 3))
223 {
224 /*
225 * Grab italic text...
226 */
227
228 fputs(end_fonts[font], outfile);
229 font = 0;
230
231 html_alternate(line + 3, "i", "i", outfile);
232 }
233 else if (!strncmp(line, ".BI ", 4))
234 {
235 /*
236 * Alternating bold and italic text...
237 */
238
239 fputs(end_fonts[font], outfile);
240 font = 0;
241
242 html_alternate(line + 4, "b", "i", outfile);
243 }
244 else if (!strncmp(line, ".BR ", 4))
245 {
246 /*
247 * Alternating bold and roman (plain) text...
248 */
249
250 fputs(end_fonts[font], outfile);
251 font = 0;
252
253 html_alternate(line + 4, "b", NULL, outfile);
254 }
255 else if (!strncmp(line, ".IB ", 4))
256 {
257 /*
258 * Alternating italic and bold text...
259 */
260
261 fputs(end_fonts[font], outfile);
262 font = 0;
263
264 html_alternate(line + 4, "i", "b", outfile);
265 }
266 else if (!strncmp(line, ".IR ", 4))
267 {
268 /*
269 * Alternating italic and roman (plain) text...
270 */
271
272 fputs(end_fonts[font], outfile);
273 font = 0;
274
275 html_alternate(line + 4, "i", NULL, outfile);
276 }
277 else if (!strncmp(line, ".RB ", 4))
278 {
279 /*
280 * Alternating roman (plain) and bold text...
281 */
282
283 fputs(end_fonts[font], outfile);
284 font = 0;
285
286 html_alternate(line + 4, NULL, "b", outfile);
287 }
288 else if (!strncmp(line, ".RI ", 4))
289 {
290 /*
291 * Alternating roman (plain) and italic text...
292 */
293
294 fputs(end_fonts[font], outfile);
295 font = 0;
296
297 html_alternate(line + 4, NULL, "i", outfile);
298 }
299 else if (!strncmp(line, ".SB ", 4))
300 {
301 /*
302 * Alternating small and bold text...
303 */
304
305 fputs(end_fonts[font], outfile);
306 font = 0;
307
308 html_alternate(line + 4, "small", "b", outfile);
309 }
310 else if (!strncmp(line, ".SM ", 4))
311 {
312 /*
313 * Small text...
314 */
315
316 fputs(end_fonts[font], outfile);
317 font = 0;
318
319 html_alternate(line + 4, "small", "small", outfile);
320 }
321 else if (!strcmp(line, ".LP") || !strcmp(line, ".PP") || !strcmp(line, ".P"))
322 {
323 /*
324 * New paragraph...
325 */
326
327 fputs(end_fonts[font], outfile);
328 font = 0;
329
330 if (list)
331 {
332 fprintf(outfile, "</%s>\n", list);
333 list = NULL;
334 }
335
336 fputs("<p>", outfile);
337 }
338 else if (!strcmp(line, ".RS") || !strncmp(line, ".RS ", 4))
339 {
340 /*
341 * Indent...
342 */
343
344 float amount = 3.0; /* Indentation */
345
346 if (line[3])
347 amount = atof(line + 4);
348
349 fputs(end_fonts[font], outfile);
350 font = 0;
351
352 if (list)
353 {
354 fprintf(outfile, "</%s>\n", list);
355 list = NULL;
356 }
357
358 fprintf(outfile, "<div style=\"margin-left: %.1fem;\">\n", amount);
359 }
360 else if (!strcmp(line, ".RE"))
361 {
362 /*
363 * Unindent...
364 */
365
366 fputs(end_fonts[font], outfile);
367 font = 0;
368
369 fputs("</div>\n", outfile);
370 }
371 else if (!strcmp(line, ".HP") || !strncmp(line, ".HP ", 4) ||
372 !strcmp(line, ".TP") || !strncmp(line, ".TP ", 4))
373 {
374 /*
375 * Hanging paragraph/tagged list...
376 *
377 * .HP i
378 * .TP i
379 */
380
381 float amount = 3.0; /* Indentation */
382
383 if (line[3])
384 amount = atof(line + 4);
385
386 fputs(end_fonts[font], outfile);
387 font = 0;
388
389 if (list)
390 {
391 fprintf(outfile, "</%s>\n", list);
392 list = NULL;
393 }
394
395 fprintf(outfile, "<p style=\"margin-left: %.1fem; text-indent: %.1fem\">", amount, -amount);
396
397 if (line[1] == 'T')
398 post = "<br>\n";
399 }
400 else if (!strncmp(line, ".IP ", 4))
401 {
402 /*
403 * Indented paragraph...
404 *
405 * .IP x i
406 */
407
408 float amount = 3.0; /* Indentation */
409 const char *newlist = NULL; /* New list style */
410 const char *newtype = NULL; /* New list numbering type */
411
412 fputs(end_fonts[font], outfile);
413 font = 0;
414
415 lineptr = line + 4;
416 while (isspace(*lineptr & 255))
417 lineptr ++;
418
419 if (!strncmp(lineptr, "\\(bu", 4) || !strncmp(lineptr, "\\(em", 4))
420 {
421 /*
422 * Bullet list...
423 */
424
425 newlist = "ul";
426 }
427 else if (isdigit(*lineptr & 255))
428 {
429 /*
430 * Numbered list...
431 */
432
433 newlist = "ol";
434 }
435 else if (islower(*lineptr & 255))
436 {
437 /*
438 * Lowercase alpha list...
439 */
440
441 newlist = "ol";
442 newtype = "a";
443 }
444 else if (isupper(*lineptr & 255))
445 {
446 /*
447 * Lowercase alpha list...
448 */
449
450 newlist = "ol";
451 newtype = "A";
452 }
453
454 while (!isspace(*lineptr & 255))
455 lineptr ++;
456 while (isspace(*lineptr & 255))
457 lineptr ++;
458
459 if (isdigit(*lineptr & 255))
460 amount = atof(lineptr);
461
462 if (newlist && list && strcmp(newlist, list))
463 {
464 fprintf(outfile, "</%s>\n", list);
465 list = NULL;
466 }
467
468 if (newlist && !list)
469 {
470 if (newtype)
471 fprintf(outfile, "<%s type=\"%s\">\n", newlist, newtype);
472 else
473 fprintf(outfile, "<%s>\n", newlist);
474
475 list = newlist;
476 }
477
478 if (list)
479 fprintf(outfile, "<li style=\"margin-left: %.1fem;\">", amount);
480 else
481 fprintf(outfile, "<p style=\"margin-left: %.1fem;\">", amount);
482 }
483 else if (!strncmp(line, ".br", 3))
484 {
485 /*
486 * Grab line break...
487 */
488
489 fputs("<br>\n", outfile);
490 }
491 else if (!strncmp(line, ".de ", 4))
492 {
493 /*
494 * Define macro - ignore...
495 */
496
497 while (fgets(line, sizeof(line), infile))
498 {
499 linenum ++;
500
501 if (!strncmp(line, "..", 2))
502 break;
503 }
504 }
505 else if (!strncmp(line, ".ds ", 4) || !strncmp(line, ".rm ", 4) ||
506 !strncmp(line, ".tr ", 4) || !strncmp(line, ".hy ", 4) ||
507 !strncmp(line, ".IX ", 4) || !strncmp(line, ".PD", 3) ||
508 !strncmp(line, ".Sp", 3))
509 {
510 /*
511 * Ignore unused commands...
512 */
513 }
514 else if (!strncmp(line, ".Vb", 3) || !strncmp(line, ".nf", 3))
515 {
516 /*
517 * Start preformatted...
518 */
519
520 fputs(end_fonts[font], outfile);
521 font = 0;
522
523 if (list)
524 {
525 fprintf(outfile, "</%s>\n", list);
526 list = NULL;
527 }
528
529 pre = 1;
530 fputs("<pre>\n", outfile);
531 }
532 else if (!strncmp(line, ".Ve", 3) || !strncmp(line, ".fi", 3))
533 {
534 /*
535 * End preformatted...
536 */
537
538 fputs(end_fonts[font], outfile);
539 font = 0;
540
541 if (pre)
542 {
543 pre = 0;
544 fputs("</pre>\n", outfile);
545 }
546 }
547 else if (!strncmp(line, ".\\}", 3))
548 {
549 /*
550 * Ignore close block...
551 */
552 }
553 else if (!strncmp(line, ".ie", 3) || !strncmp(line, ".if", 3) ||
554 !strncmp(line, ".el", 3))
555 {
556 /*
557 * If/else - ignore...
558 */
559
560 if (strchr(line, '{') != NULL)
561 {
562 /*
563 * Skip whole block...
564 */
565
566 while (fgets(line, sizeof(line), infile))
567 {
568 linenum ++;
569
570 if (strchr(line, '}') != NULL)
571 break;
572 }
573 }
574 }
575 #if 0
576 else if (!strncmp(line, ". ", 4))
577 {
578 /*
579 * Grab ...
580 */
581 }
582 #endif /* 0 */
583 else if (strncmp(line, ".\\\"", 3))
584 {
585 /*
586 * Unknown...
587 */
588
589 if ((lineptr = strchr(line, ' ')) != NULL)
590 *lineptr = '\0';
591 else if ((lineptr = strchr(line, '\n')) != NULL)
592 *lineptr = '\0';
593
594 fprintf(stderr, "mantohtml: Unknown man page command \'%s\' on line %d.\n", line, linenum);
595 }
596
597 /*
598 * Skip continuation lines...
599 */
600
601 lineptr = line + strlen(line) - 1;
602 if (lineptr >= line && *lineptr == '\\')
603 {
604 while (fgets(line, sizeof(line), infile))
605 {
606 linenum ++;
607 lineptr = line + strlen(line) - 2;
608
609 if (lineptr < line || *lineptr != '\\')
610 break;
611 }
612 }
613 }
614 else
615 {
616 /*
617 * Process man page text...
618 */
619
620 html_fputs(line, &font, outfile);
621 putc('\n', outfile);
622
623 if (post)
624 {
625 fputs(post, outfile);
626 post = NULL;
627 }
628 }
629 }
630
631 fprintf(outfile, "%s\n", end_fonts[font]);
632 font = 0;
633
634 if (list)
635 {
636 fprintf(outfile, "</%s>\n", list);
637 list = NULL;
638 }
639
640 fputs("</body>\n"
641 "</html>\n", outfile);
642
643 /*
644 * Close files...
645 */
646
647 if (infile != stdin)
648 fclose(infile);
649
650 if (outfile != stdout)
651 fclose(outfile);
652
653 /*
654 * Return with no errors...
655 */
656
657 return (0);
658 }
659
660
661 /*
662 * 'html_alternate()' - Alternate words between two styles of text.
663 */
664
665 static void
666 html_alternate(const char *s, /* I - String */
667 const char *first, /* I - First style or NULL */
668 const char *second, /* I - Second style of NULL */
669 FILE *fp) /* I - File */
670 {
671 int i = 0; /* Which style */
672 int quote = 0; /* Saw quote? */
673 int dolinks, /* Do hyperlinks to other man pages? */
674 link = 0; /* Doing a link now? */
675
676
677 /*
678 * Skip leading whitespace...
679 */
680
681 while (isspace(*s & 255))
682 s ++;
683
684 dolinks = first && !strcmp(first, "b") && !second;
685
686 while (*s)
687 {
688 if (!i && dolinks)
689 {
690 /*
691 * See if we need to make a link to a man page...
692 */
693
694 const char *end; /* End of current word */
695 const char *next; /* Start of next word */
696
697 for (end = s; *end && !isspace(*end & 255); end ++);
698 for (next = end; isspace(*next & 255); next ++);
699
700 if (isalnum(*s & 255) && *next == '(')
701 {
702 /*
703 * See if the man file is available locally...
704 */
705
706 char name[1024], /* Name */
707 manfile[1024], /* Man page filename */
708 manurl[1024]; /* Man page URL */
709
710 strlcpy(name, s, sizeof(name));
711 if ((size_t)(end - s) < sizeof(name))
712 name[end - s] = '\0';
713
714 snprintf(manfile, sizeof(manfile), "%s.man", name);
715 snprintf(manurl, sizeof(manurl), "man-%s.html?TOPIC=Man+Pages", name);
716
717 if (!access(manfile, 0))
718 {
719 /*
720 * Local man page, do a link...
721 */
722
723 fprintf(fp, "<a href=\"%s\">", manurl);
724 link = 1;
725 }
726 }
727 }
728
729 if (!i && first)
730 fprintf(fp, "<%s>", first);
731 else if (i && second)
732 fprintf(fp, "<%s>", second);
733
734 while ((!isspace(*s & 255) || quote) && *s)
735 {
736 if (*s == '\"')
737 quote = !quote;
738 else if (*s == '\\' && s[1])
739 {
740 s ++;
741 html_putc(*s++, fp);
742 }
743 else
744 html_putc(*s++, fp);
745 }
746
747 if (!i && first)
748 fprintf(fp, "</%s>", first);
749 else if (i && second)
750 fprintf(fp, "</%s>", second);
751
752 if (i && link)
753 {
754 fputs("</a>", fp);
755 link = 0;
756 }
757
758 i = 1 - i;
759
760 /*
761 * Skip trailing whitespace...
762 */
763
764 while (isspace(*s & 255))
765 s ++;
766
767 if (*s && *s != '(')
768 putc(' ', fp);
769 }
770
771 putc('\n', fp);
772 }
773
774 /*
775 * 'html_fputs()' - Output a string, quoting as needed HTML entities.
776 */
777
778 static void
779 html_fputs(const char *s, /* I - String */
780 int *font, /* IO - Font */
781 FILE *fp) /* I - File */
782 {
783 while (*s)
784 {
785 if (*s == '\\')
786 {
787 s ++;
788 if (!*s)
789 break;
790
791 if (*s == 'f')
792 {
793 int newfont; /* New font */
794
795 s ++;
796 if (!*s)
797 break;
798
799 if (!font)
800 {
801 s ++;
802 continue;
803 }
804
805 switch (*s++)
806 {
807 case 'R' :
808 case 'P' :
809 newfont = 0;
810 break;
811
812 case 'b' :
813 case 'B' :
814 newfont = 1;
815 break;
816
817 case 'i' :
818 case 'I' :
819 newfont = 2;
820 break;
821
822 default :
823 fprintf(stderr, "mantohtml: Unknown font \"\\f%c\" ignored.\n", s[-1]);
824 newfont = *font;
825 break;
826 }
827
828 if (newfont != *font)
829 {
830 fputs(end_fonts[*font], fp);
831 *font = newfont;
832 fputs(start_fonts[*font], fp);
833 }
834 }
835 else if (*s == '*')
836 {
837 /*
838 * Substitute macro...
839 */
840
841 s ++;
842 if (!*s)
843 break;
844
845 switch (*s++)
846 {
847 case 'R' :
848 fputs("&reg;", fp);
849 break;
850
851 case '(' :
852 if (!strncmp(s, "lq", 2))
853 fputs("&ldquo;", fp);
854 else if (!strncmp(s, "rq", 2))
855 fputs("&rdquo;", fp);
856 else if (!strncmp(s, "Tm", 2))
857 fputs("<sup>TM</sup>", fp);
858 else
859 fprintf(stderr, "mantohtml: Unknown macro \"\\*(%2s\" ignored.\n", s);
860
861 if (*s)
862 s ++;
863 if (*s)
864 s ++;
865 break;
866
867 default :
868 fprintf(stderr, "mantohtml: Unknown macro \"\\*%c\" ignored.\n", s[-1]);
869 break;
870 }
871 }
872 else if (*s == '[')
873 {
874 /*
875 * Substitute escaped character...
876 */
877
878 s ++;
879 if (!strncmp(s, "co]", 3))
880 fputs("&copy;", fp);
881 else if (!strncmp(s, "de]", 3))
882 fputs("&deg;", fp);
883 else if (!strncmp(s, "rg]", 3))
884 fputs("&reg;", fp);
885 else if (!strncmp(s, "tm]", 3))
886 fputs("<sup>TM</sup>", fp);
887
888 if (*s)
889 s ++;
890 if (*s)
891 s ++;
892 if (*s)
893 s ++;
894 }
895 else if (isdigit(s[0]) && isdigit(s[1]) &&
896 isdigit(s[2]))
897 {
898 fprintf(fp, "&#%d;", ((s[0] - '0') * 8 + s[1] - '0') * 8 + s[2] - '0');
899 s += 3;
900 }
901 else
902 {
903 if (*s != '\\' && *s == '\"' && *s == '\'' && *s == '-')
904 fprintf(stderr, "mantohtml: Unrecognized escape \"\\%c\" ignored.\n", *s);
905
906 html_putc(*s++, fp);
907 }
908 }
909 else if (!strncmp(s, "http://", 7) || !strncmp(s, "https://", 8) || !strncmp(s, "ftp://", 6))
910 {
911 /*
912 * Embed URL...
913 */
914
915 const char *end = s + 6; /* End of URL */
916
917 while (*end && !isspace(*end & 255))
918 end ++;
919
920 fprintf(fp, "<a href=\"%*s\">%*s</a>", (int)(end - s), s, (int)(end - s), s);
921 s = end;
922 }
923 else
924 html_putc(*s++ & 255, fp);
925 }
926 }
927
928
929 /*
930 * 'html_putc()' - Put a single character, using entities as needed.
931 */
932
933 static void
934 html_putc(int ch, /* I - Character */
935 FILE *fp) /* I - File */
936 {
937 if (ch == '&')
938 fputs("&amp;", fp);
939 else if (ch == '<')
940 fputs("&lt;", fp);
941 else
942 putc(ch, fp);
943 }
944
945
946 /*
947 * 'strmove()' - Move characters within a string.
948 */
949
950 static void
951 strmove(char *d, /* I - Destination */
952 const char *s) /* I - Source */
953 {
954 while (*s)
955 *d++ = *s++;
956
957 *d = '\0';
958 }
959
960
961 /*
962 * End of "$Id$".
963 */