]> git.ipfire.org Git - thirdparty/binutils-gdb.git/blame - readline/doc/texindex.c
Initial revision
[thirdparty/binutils-gdb.git] / readline / doc / texindex.c
CommitLineData
be9485d5
SG
1/* Prepare Tex index dribble output into an actual index.
2 Copyright (C) 1987 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 1, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
17
18#include <stdio.h>
19#include <ctype.h>
20#include <errno.h>
21extern int errno;
22
23#ifdef VMS
24#ifndef VAX11C
25#define noshare
26#endif
27
28#include <perror.h>
29#include <file.h>
30
31#define EXIT_SUCCESS ((1 << 28) | 1)
32#define EXIT_FATAL ((1 << 28) | 4)
33#define unlink delete
34#define tell(fd) lseek(fd, 0L, 1)
35
36#else /* Not VMS */
37
38#ifdef USG
39#include <sys/types.h>
40#include <sys/fcntl.h>
41#endif
42#include <sys/file.h>
43
44#define EXIT_SUCCESS 0
45#define EXIT_FATAL 1
46
47#endif /* Not VMS */
48
49
50#ifndef L_XTND
51#define L_XTND 2
52#endif
53
54#ifdef VMS
55extern noshare int sys_nerr;
56extern noshare char *sys_errlist[];
57#else
58extern int sys_nerr;
59extern char *sys_errlist[];
60#endif
61
62/* When sorting in core, this structure describes one line
63 and the position and length of its first keyfield. */
64
65struct lineinfo
66 {
67 char *text; /* The actual text of the line */
68 union
69 { /* The start of the key (for textual comparison) */
70 char *text;
71 long number; /* or the numeric value (for numeric comparison) */
72 } key;
73 long keylen; /* Length of key field */
74 };
75
76/* This structure describes a field to use as a sort key */
77
78struct keyfield
79 {
80 int startwords; /* # words to skip */
81 int startchars; /* and # additional chars to skip, to start of field */
82 int endwords; /* similar, from beg (or end) of line, to find end of field */
83 int endchars;
84 char ignore_blanks; /* Ignore spaces and tabs within the field */
85 char fold_case; /* Convert upper case to lower before comparing */
86 char reverse; /* Compare in reverse order */
87 char numeric; /* Parse text as an integer and compare the integers */
88 char positional; /* Sort according to position within the file */
89 char braced; /* Count balanced-braced groupings as fields */
90 };
91
92/* Vector of keyfields to use */
93
94struct keyfield keyfields[3];
95
96/* Number of keyfields stored in that vector. */
97
98int num_keyfields = 3;
99
100/* Vector of input file names, terminated with a zero (null pointer) */
101
102char **infiles;
103
104/* Vector of corresponding output file names, or zero meaning default it */
105
106char **outfiles;
107
108/* Length of `infiles' */
109
110int num_infiles;
111
112/* Pointer to the array of pointers to lines being sorted */
113
114char **linearray;
115
116/* The allocated length of `linearray'. */
117
118long nlines;
119
120/* Directory to use for temporary files. On Unix, it ends with a slash. */
121
122char *tempdir;
123
124/* Start of filename to use for temporary files. */
125
126char *tempbase;
127
128/* Number of last temporary file. */
129
130int tempcount;
131
132/* Number of last temporary file already deleted.
133 Temporary files are deleted by `flush_tempfiles' in order of creation. */
134
135int last_deleted_tempcount;
136
137/* During in-core sort, this points to the base of the data block
138 which contains all the lines of data. */
139
140char *text_base;
141
142/* Additional command switches */
143
144int keep_tempfiles; /* Nonzero means do not delete tempfiles -- for debugging */
145
146/* Forward declarations of functions in this file */
147
148void decode_command ();
149void sort_in_core ();
150void sort_offline ();
151char **parsefile ();
152char *find_field ();
153char *find_pos ();
154long find_value ();
155char *find_braced_pos ();
156char *find_braced_end ();
157void writelines ();
158int compare_full ();
159long readline ();
160int merge_files ();
161int merge_direct ();
162char *concat ();
163char *maketempname ();
164void flush_tempfiles ();
165char *tempcopy ();
166
167extern char *mktemp ();
168\f
169#define MAX_IN_CORE_SORT 500000
170
171int
172main (argc, argv)
173 int argc;
174 char **argv;
175{
176 int i;
177
178 tempcount = 0;
179 last_deleted_tempcount = 0;
180
181 /* Describe the kind of sorting to do. */
182 /* The first keyfield uses the first braced field and folds case */
183 keyfields[0].braced = 1;
184 keyfields[0].fold_case = 1;
185 keyfields[0].endwords = -1;
186 keyfields[0].endchars = -1;
187 /* The second keyfield uses the second braced field, numerically */
188 keyfields[1].braced = 1;
189 keyfields[1].numeric = 1;
190 keyfields[1].startwords = 1;
191 keyfields[1].endwords = -1;
192 keyfields[1].endchars = -1;
193 /* The third keyfield (which is ignored while discarding duplicates)
194 compares the whole line */
195 keyfields[2].endwords = -1;
196 keyfields[2].endchars = -1;
197
198 decode_command (argc, argv);
199
200 tempbase = mktemp (concat ("txiXXXXXX", "", ""));
201
202 /* Process input files completely, one by one. */
203
204 for (i = 0; i < num_infiles; i++)
205 {
206 int desc;
207 long ptr;
208 char *outfile;
209 char *p;
210
211 desc = open (infiles[i], 0, 0);
212 if (desc < 0) pfatal_with_name (infiles[i]);
213 lseek (desc, 0, L_XTND);
214 ptr = tell (desc);
215 close (desc);
216
217 outfile = outfiles[i];
218 if (!outfile)
219 {
220 outfile = concat (infiles[i], "s", "");
221 }
222
223 if (ptr < MAX_IN_CORE_SORT)
224 /* Sort a small amount of data */
225 sort_in_core (infiles[i], ptr, outfile);
226 else
227 sort_offline (infiles[i], ptr, outfile);
228 }
229
230 flush_tempfiles (tempcount);
231 exit (EXIT_SUCCESS);
232}
233\f
234/* This page decodes the command line arguments to set the parameter variables
235 and set up the vector of keyfields and the vector of input files */
236
237void
238decode_command (argc, argv)
239 int argc;
240 char **argv;
241{
242 int i;
243 char **ip;
244 char **op;
245
246 /* Store default values into parameter variables */
247
248#ifdef VMS
249 tempdir = "sys$scratch:";
250#else
251 tempdir = "/tmp/";
252#endif
253
254 keep_tempfiles = 0;
255
256 /* Allocate argc input files, which must be enough. */
257
258 infiles = (char **) xmalloc (argc * sizeof (char *));
259 outfiles = (char **) xmalloc (argc * sizeof (char *));
260 ip = infiles;
261 op = outfiles;
262
263 /* First find all switches that control the default kind-of-sort */
264
265 for (i = 1; i < argc; i++)
266 {
267 int tem = classify_arg (argv[i]);
268 char c;
269 char *p;
270
271 if (tem <= 0)
272 {
273 *ip++ = argv[i];
274 *op++ = 0;
275 continue;
276 }
277 if (tem > 1)
278 {
279 if (i + 1 == argc)
280 fatal ("switch %s given with no argument following it", argv[i]);
281 else if (!strcmp (argv[i], "-T"))
282 tempdir = argv[i + 1];
283 else if (!strcmp (argv[i], "-o"))
284 *(op - 1) = argv[i + 1];
285 i += tem - 1;
286 continue;
287 }
288
289 p = &argv[i][1];
290 while (c = *p++)
291 switch (c)
292 {
293 case 'k':
294 keep_tempfiles = 1;
295 break;
296
297 default:
298 fatal ("invalid command switch %c", c);
299 }
300 switchdone: ;
301 }
302
303 /* Record number of keyfields, terminate list of filenames */
304
305 num_infiles = ip - infiles;
306 *ip = 0;
307}
308
309/* Return 0 for an argument that is not a switch;
310 for a switch, return 1 plus the number of following arguments that the switch swallows.
311*/
312
313int
314classify_arg (arg)
315 char *arg;
316{
317 if (!strcmp (arg, "-T") || !strcmp (arg, "-o"))
318 return 2;
319 if (arg[0] == '-')
320 return 1;
321 return 0;
322}
323\f
324/* Create a name for a temporary file */
325
326char *
327maketempname (count)
328 int count;
329{
330 char tempsuffix[10];
331 sprintf (tempsuffix, "%d", count);
332 return concat (tempdir, tempbase, tempsuffix);
333}
334
335/* Delete all temporary files up to the specified count */
336
337void
338flush_tempfiles (to_count)
339 int to_count;
340{
341 if (keep_tempfiles) return;
342 while (last_deleted_tempcount < to_count)
343 unlink (maketempname (++last_deleted_tempcount));
344}
345
346/* Copy an input file into a temporary file, and return the temporary file name */
347
348#define BUFSIZE 1024
349
350char *
351tempcopy (idesc)
352 int idesc;
353{
354 char *outfile = maketempname (++tempcount);
355 int odesc;
356 char buffer[BUFSIZE];
357
358 odesc = open (outfile, O_WRONLY | O_CREAT, 0666);
359
360 if (odesc < 0) pfatal_with_name (outfile);
361
362 while (1)
363 {
364 int nread = read (idesc, buffer, BUFSIZE);
365 write (odesc, buffer, nread);
366 if (!nread) break;
367 }
368
369 close (odesc);
370
371 return outfile;
372}
373\f
374/* Compare two lines, provided as pointers to pointers to text,
375 according to the specified set of keyfields */
376
377int
378compare_full (line1, line2)
379 char **line1, **line2;
380{
381 int i;
382
383 /* Compare using the first keyfield;
384 if that does not distinguish the lines, try the second keyfield; and so on. */
385
386 for (i = 0; i < num_keyfields; i++)
387 {
388 long length1, length2;
389 char *start1 = find_field (&keyfields[i], *line1, &length1);
390 char *start2 = find_field (&keyfields[i], *line2, &length2);
391 int tem = compare_field (&keyfields[i], start1, length1, *line1 - text_base,
392 start2, length2, *line2 - text_base);
393 if (tem)
394 {
395 if (keyfields[i].reverse)
396 return - tem;
397 return tem;
398 }
399 }
400
401 return 0; /* Lines match exactly */
402}
403
404/* Compare two lines described by structures
405 in which the first keyfield is identified in advance.
406 For positional sorting, assumes that the order of the lines in core
407 reflects their nominal order. */
408
409int
410compare_prepared (line1, line2)
411 struct lineinfo *line1, *line2;
412{
413 int i;
414 int tem;
415 char *text1, *text2;
416
417 /* Compare using the first keyfield, which has been found for us already */
418 if (keyfields->positional)
419 {
420 if (line1->text - text_base > line2->text - text_base)
421 tem = 1;
422 else
423 tem = -1;
424 }
425 else if (keyfields->numeric)
426 tem = line1->key.number - line2->key.number;
427 else
428 tem = compare_field (keyfields, line1->key.text, line1->keylen, 0, line2->key.text, line2->keylen, 0);
429 if (tem)
430 {
431 if (keyfields->reverse)
432 return - tem;
433 return tem;
434 }
435
436 text1 = line1->text;
437 text2 = line2->text;
438
439 /* Compare using the second keyfield;
440 if that does not distinguish the lines, try the third keyfield; and so on. */
441
442 for (i = 1; i < num_keyfields; i++)
443 {
444 long length1, length2;
445 char *start1 = find_field (&keyfields[i], text1, &length1);
446 char *start2 = find_field (&keyfields[i], text2, &length2);
447 int tem = compare_field (&keyfields[i], start1, length1, text1 - text_base,
448 start2, length2, text2 - text_base);
449 if (tem)
450 {
451 if (keyfields[i].reverse)
452 return - tem;
453 return tem;
454 }
455 }
456
457 return 0; /* Lines match exactly */
458}
459
460/* Like compare_full but more general.
461 You can pass any strings, and you can say how many keyfields to use.
462 `pos1' and `pos2' should indicate the nominal positional ordering of
463 the two lines in the input. */
464
465int
466compare_general (str1, str2, pos1, pos2, use_keyfields)
467 char *str1, *str2;
468 long pos1, pos2;
469 int use_keyfields;
470{
471 int i;
472
473 /* Compare using the first keyfield;
474 if that does not distinguish the lines, try the second keyfield; and so on. */
475
476 for (i = 0; i < use_keyfields; i++)
477 {
478 long length1, length2;
479 char *start1 = find_field (&keyfields[i], str1, &length1);
480 char *start2 = find_field (&keyfields[i], str2, &length2);
481 int tem = compare_field (&keyfields[i], start1, length1, pos1, start2, length2, pos2);
482 if (tem)
483 {
484 if (keyfields[i].reverse)
485 return - tem;
486 return tem;
487 }
488 }
489
490 return 0; /* Lines match exactly */
491}
492
493/* Find the start and length of a field in `str' according to `keyfield'.
494 A pointer to the starting character is returned, and the length
495 is stored into the int that `lengthptr' points to. */
496
497char *
498find_field (keyfield, str, lengthptr)
499 struct keyfield *keyfield;
500 char *str;
501 long *lengthptr;
502{
503 char *start;
504 char *end;
505 char *(*fun) ();
506
507 if (keyfield->braced) fun = find_braced_pos;
508 else fun = find_pos;
509
510 start = ( *fun )(str, keyfield->startwords, keyfield->startchars,
511 keyfield->ignore_blanks);
512 if (keyfield->endwords < 0)
513 {
514 if (keyfield->braced)
515 end = find_braced_end (start);
516 else
517 {
518 end = start;
519 while (*end && *end != '\n') end++;
520 }
521 }
522 else
523 {
524 end = ( *fun )(str, keyfield->endwords, keyfield->endchars, 0);
525 if (end - str < start - str) end = start;
526 }
527 *lengthptr = end - start;
528 return start;
529}
530
531/* Find a pointer to a specified place within `str',
532 skipping (from the beginning) `words' words and then `chars' chars.
533 If `ignore_blanks' is nonzero, we skip all blanks
534 after finding the specified word. */
535
536char *
537find_pos (str, words, chars, ignore_blanks)
538 char *str;
539 int words, chars;
540 int ignore_blanks;
541{
542 int i;
543 char *p = str;
544
545 for (i = 0; i < words; i++)
546 {
547 char c;
548 /* Find next bunch of nonblanks and skip them. */
549 while ((c = *p) == ' ' || c == '\t') p++;
550 while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t')) p++;
551 if (!*p || *p == '\n') return p;
552 }
553
554 while (*p == ' ' || *p == '\t') p++;
555
556 for (i = 0; i < chars; i++)
557 {
558 if (!*p || *p == '\n') break;
559 p++;
560 }
561 return p;
562}
563
564/* Like find_pos but assumes that each field is surrounded by braces
565 and that braces within fields are balanced. */
566
567char *
568find_braced_pos (str, words, chars, ignore_blanks)
569 char *str;
570 int words, chars;
571 int ignore_blanks;
572{
573 int i;
574 int bracelevel;
575 char *p = str;
576 char c;
577
578 for (i = 0; i < words; i++)
579 {
580 bracelevel = 1;
581 while ((c = *p++) != '{' && c != '\n' && c);
582 if (c != '{')
583 return p - 1;
584 while (bracelevel)
585 {
586 c = *p++;
587 if (c == '{') bracelevel++;
588 if (c == '}') bracelevel--;
589#if 0
590 if (c == '\\' || c == '@') c = *p++; /* \ quotes braces and \ */
591#endif
592 if (c == 0 || c == '\n') return p-1;
593 }
594 }
595
596 while ((c = *p++) != '{' && c != '\n' && c);
597
598 if (c != '{')
599 return p-1;
600
601 if (ignore_blanks)
602 while ((c = *p) == ' ' || c == '\t') p++;
603
604 for (i = 0; i < chars; i++)
605 {
606 if (!*p || *p == '\n') break;
607 p++;
608 }
609 return p;
610}
611
612/* Find the end of the balanced-brace field which starts at `str'.
613 The position returned is just before the closing brace. */
614
615char *
616find_braced_end (str)
617 char *str;
618{
619 int bracelevel;
620 char *p = str;
621 char c;
622
623 bracelevel = 1;
624 while (bracelevel)
625 {
626 c = *p++;
627 if (c == '{') bracelevel++;
628 if (c == '}') bracelevel--;
629#if 0
630 if (c == '\\' || c == '@') c = *p++;
631#endif
632 if (c == 0 || c == '\n') return p-1;
633 }
634 return p - 1;
635}
636
637long
638find_value (start, length)
639 char *start;
640 long length;
641{
642 while (length != 0L) {
643 if (isdigit(*start))
644 return atol(start);
645 length--;
646 start++;
647 }
648 return 0l;
649}
650
651/* Vector used to translate characters for comparison.
652 This is how we make all alphanumerics follow all else,
653 and ignore case in the first sorting. */
654int char_order[256];
655
656init_char_order ()
657{
658 int i;
659 for (i = 1; i < 256; i++)
660 char_order[i] = i;
661
662 for (i = '0'; i <= '9'; i++)
663 char_order[i] += 512;
664
665 for (i = 'a'; i <= 'z'; i++) {
666 char_order[i] = 512 + i;
667 char_order[i + 'A' - 'a'] = 512 + i;
668 }
669}
670
671/* Compare two fields (each specified as a start pointer and a character count)
672 according to `keyfield'. The sign of the value reports the relation between the fields */
673
674int
675compare_field (keyfield, start1, length1, pos1, start2, length2, pos2)
676 struct keyfield *keyfield;
677 char *start1;
678 long length1;
679 long pos1;
680 char *start2;
681 long length2;
682 long pos2;
683{
684 if (keyfields->positional)
685 {
686 if (pos1 > pos2)
687 return 1;
688 else
689 return -1;
690 }
691 if (keyfield->numeric)
692 {
693 long value = find_value (start1, length1) - find_value (start2, length2);
694 if (value > 0) return 1;
695 if (value < 0) return -1;
696 return 0;
697 }
698 else
699 {
700 char *p1 = start1;
701 char *p2 = start2;
702 char *e1 = start1 + length1;
703 char *e2 = start2 + length2;
704
705 int fold_case = keyfield->fold_case;
706
707 while (1)
708 {
709 int c1, c2;
710
711 if (p1 == e1) c1 = 0;
712 else c1 = *p1++;
713 if (p2 == e2) c2 = 0;
714 else c2 = *p2++;
715
716 if (char_order[c1] != char_order[c2])
717 return char_order[c1] - char_order[c2];
718 if (!c1) break;
719 }
720
721 /* Strings are equal except possibly for case. */
722 p1 = start1;
723 p2 = start2;
724 while (1)
725 {
726 int c1, c2;
727
728 if (p1 == e1) c1 = 0;
729 else c1 = *p1++;
730 if (p2 == e2) c2 = 0;
731 else c2 = *p2++;
732
733 if (c1 != c2)
734 /* Reverse sign here so upper case comes out last. */
735 return c2 - c1;
736 if (!c1) break;
737 }
738
739 return 0;
740 }
741}
742\f
743/* A `struct linebuffer' is a structure which holds a line of text.
744 `readline' reads a line from a stream into a linebuffer
745 and works regardless of the length of the line. */
746
747struct linebuffer
748 {
749 long size;
750 char *buffer;
751 };
752
753/* Initialize a linebuffer for use */
754
755void
756initbuffer (linebuffer)
757 struct linebuffer *linebuffer;
758{
759 linebuffer->size = 200;
760 linebuffer->buffer = (char *) xmalloc (200);
761}
762
763/* Read a line of text from `stream' into `linebuffer'.
764 Return the length of the line. */
765
766long
767readline (linebuffer, stream)
768 struct linebuffer *linebuffer;
769 FILE *stream;
770{
771 char *buffer = linebuffer->buffer;
772 char *p = linebuffer->buffer;
773 char *end = p + linebuffer->size;
774
775 while (1)
776 {
777 int c = getc (stream);
778 if (p == end)
779 {
780 buffer = (char *) xrealloc (buffer, linebuffer->size *= 2);
781 p += buffer - linebuffer->buffer;
782 end += buffer - linebuffer->buffer;
783 linebuffer->buffer = buffer;
784 }
785 if (c < 0 || c == '\n')
786 {
787 *p = 0;
788 break;
789 }
790 *p++ = c;
791 }
792
793 return p - buffer;
794}
795\f
796/* Sort an input file too big to sort in core. */
797
798void
799sort_offline (infile, nfiles, total, outfile)
800 char *infile;
801 long total;
802 char *outfile;
803{
804 int ntemps = 2 * (total + MAX_IN_CORE_SORT - 1) / MAX_IN_CORE_SORT; /* More than enough */
805 char **tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
806 FILE *istream = fopen (infile, "r");
807 int i;
808 struct linebuffer lb;
809 long linelength;
810 int failure = 0;
811
812 initbuffer (&lb);
813
814 /* Read in one line of input data. */
815
816 linelength = readline (&lb, istream);
817
818 if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
819 {
820 error ("%s: not a texinfo index file", infile);
821 return;
822 }
823
824 /* Split up the input into `ntemps' temporary files, or maybe fewer,
825 and put the new files' names into `tempfiles' */
826
827 for (i = 0; i < ntemps; i++)
828 {
829 char *outname = maketempname (++tempcount);
830 FILE *ostream = fopen (outname, "w");
831 long tempsize = 0;
832
833 if (!ostream) pfatal_with_name (outname);
834 tempfiles[i] = outname;
835
836 /* Copy lines into this temp file as long as it does not make file "too big"
837 or until there are no more lines. */
838
839 while (tempsize + linelength + 1 <= MAX_IN_CORE_SORT)
840 {
841 tempsize += linelength + 1;
842 fputs (lb.buffer, ostream);
843 putc ('\n', ostream);
844
845 /* Read another line of input data. */
846
847 linelength = readline (&lb, istream);
848 if (!linelength && feof (istream)) break;
849
850 if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
851 {
852 error ("%s: not a texinfo index file", infile);
853 failure = 1;
854 goto fail;
855 }
856 }
857 fclose (ostream);
858 if (feof (istream)) break;
859 }
860
861 free (lb.buffer);
862
863 fail:
864 /* Record number of temp files we actually needed. */
865
866 ntemps = i;
867
868 /* Sort each tempfile into another tempfile.
869 Delete the first set of tempfiles and put the names of the second into `tempfiles' */
870
871 for (i = 0; i < ntemps; i++)
872 {
873 char *newtemp = maketempname (++tempcount);
874 sort_in_core (&tempfiles[i], MAX_IN_CORE_SORT, newtemp);
875 if (!keep_tempfiles)
876 unlink (tempfiles[i]);
877 tempfiles[i] = newtemp;
878 }
879
880 if (failure)
881 return;
882
883 /* Merge the tempfiles together and indexify */
884
885 merge_files (tempfiles, ntemps, outfile);
886}
887\f
888/* Sort `infile', whose size is `total',
889 assuming that is small enough to be done in-core,
890 then indexify it and send the output to `outfile' (or to stdout). */
891
892void
893sort_in_core (infile, total, outfile)
894 char *infile;
895 long total;
896 char *outfile;
897{
898 char **nextline;
899 char *data = (char *) xmalloc (total + 1);
900 char *file_data;
901 long file_size;
902 int i;
903 FILE *ostream = stdout;
904 struct lineinfo *lineinfo;
905
906 /* Read the contents of the file into the moby array `data' */
907
908 int desc = open (infile, 0, 0);
909
910 if (desc < 0)
911 fatal ("failure reopening %s", infile);
912 for (file_size = 0; ; )
913 {
914 if ((i = read (desc, data + file_size, total - file_size)) <= 0)
915 break;
916 file_size += i;
917 }
918 file_data = data;
919 data[file_size] = 0;
920
921 close (desc);
922
923 if (file_size > 0 && data[0] != '\\' && data[0] != '@')
924 {
925 error ("%s: not a texinfo index file", infile);
926 return;
927 }
928
929 init_char_order ();
930
931 /* Sort routines want to know this address */
932
933 text_base = data;
934
935 /* Create the array of pointers to lines, with a default size frequently enough. */
936
937 nlines = total / 50;
938 if (!nlines) nlines = 2;
939 linearray = (char **) xmalloc (nlines * sizeof (char *));
940
941 /* `nextline' points to the next free slot in this array.
942 `nlines' is the allocated size. */
943
944 nextline = linearray;
945
946 /* Parse the input file's data, and make entries for the lines. */
947
948 nextline = parsefile (infile, nextline, file_data, file_size);
949 if (nextline == 0)
950 {
951 error ("%s: not a texinfo index file", infile);
952 return;
953 }
954
955 /* Sort the lines */
956
957 /* If we have enough space, find the first keyfield of each line in advance.
958 Make a `struct lineinfo' for each line, which records the keyfield
959 as well as the line, and sort them. */
960
961 lineinfo = (struct lineinfo *) malloc ((nextline - linearray) * sizeof (struct lineinfo));
962
963 if (lineinfo)
964 {
965 struct lineinfo *lp;
966 char **p;
967
968 for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
969 {
970 lp->text = *p;
971 lp->key.text = find_field (keyfields, *p, &lp->keylen);
972 if (keyfields->numeric)
973 lp->key.number = find_value (lp->key.text, lp->keylen);
974 }
975
976 qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo), compare_prepared);
977
978 for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
979 *p = lp->text;
980
981 free (lineinfo);
982 }
983 else
984 qsort (linearray, nextline - linearray, sizeof (char *), compare_full);
985
986 /* Open the output file */
987
988 if (outfile)
989 {
990 ostream = fopen (outfile, "w");
991 if (!ostream)
992 pfatal_with_name (outfile);
993 }
994
995 writelines (linearray, nextline - linearray, ostream);
996 if (outfile) fclose (ostream);
997
998 free (linearray);
999 free (data);
1000}
1001\f
1002/* Parse an input string in core into lines.
1003 DATA is the input string, and SIZE is its length.
1004 Data goes in LINEARRAY starting at NEXTLINE.
1005 The value returned is the first entry in LINEARRAY still unused.
1006 Value 0 means input file contents are invalid. */
1007
1008char **
1009parsefile (filename, nextline, data, size)
1010 char *filename;
1011 char **nextline;
1012 char *data;
1013 long size;
1014{
1015 char *p, *end;
1016 char **line = nextline;
1017
1018 p = data;
1019 end = p + size;
1020 *end = 0;
1021
1022 while (p != end)
1023 {
1024 if (p[0] != '\\' && p[0] != '@')
1025 return 0;
1026
1027 *line = p;
1028 while (*p && *p != '\n') p++;
1029 if (p != end) p++;
1030
1031 line++;
1032 if (line == linearray + nlines)
1033 {
1034 char **old = linearray;
1035 linearray = (char **) xrealloc (linearray, sizeof (char *) * (nlines *= 4));
1036 line += linearray - old;
1037 }
1038 }
1039
1040 return line;
1041}
1042\f
1043/* Indexification is a filter applied to the sorted lines
1044 as they are being written to the output file.
1045 Multiple entries for the same name, with different page numbers,
1046 get combined into a single entry with multiple page numbers.
1047 The first braced field, which is used for sorting, is discarded.
1048 However, its first character is examined, folded to lower case,
1049 and if it is different from that in the previous line fed to us
1050 a \initial line is written with one argument, the new initial.
1051
1052 If an entry has four braced fields, then the second and third
1053 constitute primary and secondary names.
1054 In this case, each change of primary name
1055 generates a \primary line which contains only the primary name,
1056 and in between these are \secondary lines which contain
1057 just a secondary name and page numbers.
1058*/
1059
1060/* The last primary name we wrote a \primary entry for.
1061 If only one level of indexing is being done, this is the last name seen */
1062char *lastprimary;
1063int lastprimarylength; /* Length of storage allocated for lastprimary */
1064
1065/* Similar, for the secondary name. */
1066char *lastsecondary;
1067int lastsecondarylength;
1068
1069/* Zero if we are not in the middle of writing an entry.
1070 One if we have written the beginning of an entry but have not
1071 yet written any page numbers into it.
1072 Greater than one if we have written the beginning of an entry
1073 plus at least one page number. */
1074int pending;
1075
1076/* The initial (for sorting purposes) of the last primary entry written.
1077 When this changes, a \initial {c} line is written */
1078
1079char * lastinitial;
1080
1081int lastinitiallength;
1082
1083/* When we need a string of length 1 for the value of lastinitial,
1084 store it here. */
1085
1086char lastinitial1[2];
1087
1088/* Initialize static storage for writing an index */
1089
1090void
1091init_index ()
1092{
1093 pending = 0;
1094 lastinitial = lastinitial1;
1095 lastinitial1[0] = 0;
1096 lastinitial1[1] = 0;
1097 lastinitiallength = 0;
1098 lastprimarylength = 100;
1099 lastprimary = (char *) xmalloc (lastprimarylength + 1);
1100 bzero (lastprimary, lastprimarylength + 1);
1101 lastsecondarylength = 100;
1102 lastsecondary = (char *) xmalloc (lastsecondarylength + 1);
1103 bzero (lastsecondary, lastsecondarylength + 1);
1104}
1105
1106/* Indexify. Merge entries for the same name,
1107 insert headers for each initial character, etc. */
1108
1109indexify (line, ostream)
1110 char *line;
1111 FILE *ostream;
1112{
1113 char *primary, *secondary, *pagenumber;
1114 int primarylength, secondarylength, pagelength;
1115 int len = strlen (line);
1116 int nosecondary;
1117 int initiallength;
1118 char *initial;
1119 char initial1[2];
1120 register char *p;
1121
1122 /* First, analyze the parts of the entry fed to us this time */
1123
1124 p = find_braced_pos (line, 0, 0, 0);
1125 if (*p == '{')
1126 {
1127 initial = p;
1128 /* Get length of inner pair of braces starting at p,
1129 including that inner pair of braces. */
1130 initiallength = find_braced_end (p + 1) + 1 - p;
1131 }
1132 else
1133 {
1134 initial = initial1;
1135 initial1[0] = *p;
1136 initial1[1] = 0;
1137 initiallength = 1;
1138
1139 if (initial1[0] >= 'a' && initial1[0] <= 'z')
1140 initial1[0] -= 040;
1141 }
1142
1143 pagenumber = find_braced_pos (line, 1, 0, 0);
1144 pagelength = find_braced_end (pagenumber) - pagenumber;
1145 if (pagelength == 0)
1146 abort ();
1147
1148 primary = find_braced_pos (line, 2, 0, 0);
1149 primarylength = find_braced_end (primary) - primary;
1150
1151 secondary = find_braced_pos (line, 3, 0, 0);
1152 nosecondary = !*secondary;
1153 if (!nosecondary)
1154 secondarylength = find_braced_end (secondary) - secondary;
1155
1156 /* If the primary is different from before, make a new primary entry */
1157 if (strncmp (primary, lastprimary, primarylength))
1158 {
1159 /* Close off current secondary entry first, if one is open */
1160 if (pending)
1161 {
1162 fputs ("}\n", ostream);
1163 pending = 0;
1164 }
1165
1166 /* If this primary has a different initial, include an entry for the initial */
1167 if (initiallength != lastinitiallength ||
1168 strncmp (initial, lastinitial, initiallength))
1169 {
1170 fprintf (ostream, "\\initial {");
1171 fwrite (initial, 1, initiallength, ostream);
1172 fprintf (ostream, "}\n", initial);
1173 if (initial == initial1)
1174 {
1175 lastinitial = lastinitial1;
1176 *lastinitial1 = *initial1;
1177 }
1178 else
1179 {
1180 lastinitial = initial;
1181 }
1182 lastinitiallength = initiallength;
1183 }
1184
1185 /* Make the entry for the primary. */
1186 if (nosecondary)
1187 fputs ("\\entry {", ostream);
1188 else
1189 fputs ("\\primary {", ostream);
1190 fwrite (primary, primarylength, 1, ostream);
1191 if (nosecondary)
1192 {
1193 fputs ("}{", ostream);
1194 pending = 1;
1195 }
1196 else
1197 fputs ("}\n", ostream);
1198
1199 /* Record name of most recent primary */
1200 if (lastprimarylength < primarylength)
1201 {
1202 lastprimarylength = primarylength + 100;
1203 lastprimary = (char *) xrealloc (lastprimary,
1204 1 + lastprimarylength);
1205 }
1206 strncpy (lastprimary, primary, primarylength);
1207 lastprimary[primarylength] = 0;
1208
1209 /* There is no current secondary within this primary, now */
1210 lastsecondary[0] = 0;
1211 }
1212
1213 /* Should not have an entry with no subtopic following one with a subtopic */
1214
1215 if (nosecondary && *lastsecondary)
1216 error ("entry %s follows an entry with a secondary name", line);
1217
1218 /* Start a new secondary entry if necessary */
1219 if (!nosecondary && strncmp (secondary, lastsecondary, secondarylength))
1220 {
1221 if (pending)
1222 {
1223 fputs ("}\n", ostream);
1224 pending = 0;
1225 }
1226
1227 /* Write the entry for the secondary. */
1228 fputs ("\\secondary {", ostream);
1229 fwrite (secondary, secondarylength, 1, ostream);
1230 fputs ("}{", ostream);
1231 pending = 1;
1232
1233 /* Record name of most recent secondary */
1234 if (lastsecondarylength < secondarylength)
1235 {
1236 lastsecondarylength = secondarylength + 100;
1237 lastsecondary = (char *) xrealloc (lastsecondary,
1238 1 + lastsecondarylength);
1239 }
1240 strncpy (lastsecondary, secondary, secondarylength);
1241 lastsecondary[secondarylength] = 0;
1242 }
1243
1244 /* Here to add one more page number to the current entry */
1245 if (pending++ != 1)
1246 fputs (", ", ostream); /* Punctuate first, if this is not the first */
1247 fwrite (pagenumber, pagelength, 1, ostream);
1248}
1249
1250/* Close out any unfinished output entry */
1251
1252void
1253finish_index (ostream)
1254 FILE *ostream;
1255{
1256 if (pending)
1257 fputs ("}\n", ostream);
1258 free (lastprimary);
1259 free (lastsecondary);
1260}
1261\f
1262/* Copy the lines in the sorted order.
1263 Each line is copied out of the input file it was found in. */
1264
1265void
1266writelines (linearray, nlines, ostream)
1267 char **linearray;
1268 int nlines;
1269 FILE *ostream;
1270{
1271 char **stop_line = linearray + nlines;
1272 char **next_line;
1273
1274 init_index ();
1275
1276 /* Output the text of the lines, and free the buffer space */
1277
1278 for (next_line = linearray; next_line != stop_line; next_line++)
1279 {
1280 /* If -u was specified, output the line only if distinct from previous one. */
1281 if (next_line == linearray
1282 /* Compare previous line with this one, using only the explicitly specd keyfields */
1283 || compare_general (*(next_line - 1), *next_line, 0L, 0L, num_keyfields - 1))
1284 {
1285 char *p = *next_line;
1286 char c;
1287 while ((c = *p++) && c != '\n');
1288 *(p-1) = 0;
1289 indexify (*next_line, ostream);
1290 }
1291 }
1292
1293 finish_index (ostream);
1294}
1295\f
1296/* Assume (and optionally verify) that each input file is sorted;
1297 merge them and output the result.
1298 Returns nonzero if any input file fails to be sorted.
1299
1300 This is the high-level interface that can handle an unlimited number of files. */
1301
1302#define MAX_DIRECT_MERGE 10
1303
1304int
1305merge_files (infiles, nfiles, outfile)
1306 char **infiles;
1307 int nfiles;
1308 char *outfile;
1309{
1310 char **tempfiles;
1311 int ntemps;
1312 int i;
1313 int value = 0;
1314 int start_tempcount = tempcount;
1315
1316 if (nfiles <= MAX_DIRECT_MERGE)
1317 return merge_direct (infiles, nfiles, outfile);
1318
1319 /* Merge groups of MAX_DIRECT_MERGE input files at a time,
1320 making a temporary file to hold each group's result. */
1321
1322 ntemps = (nfiles + MAX_DIRECT_MERGE - 1) / MAX_DIRECT_MERGE;
1323 tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
1324 for (i = 0; i < ntemps; i++)
1325 {
1326 int nf = MAX_DIRECT_MERGE;
1327 if (i + 1 == ntemps)
1328 nf = nfiles - i * MAX_DIRECT_MERGE;
1329 tempfiles[i] = maketempname (++tempcount);
1330 value |= merge_direct (&infiles[i * MAX_DIRECT_MERGE], nf, tempfiles[i]);
1331 }
1332
1333 /* All temporary files that existed before are no longer needed
1334 since their contents have been merged into our new tempfiles.
1335 So delete them. */
1336 flush_tempfiles (start_tempcount);
1337
1338 /* Now merge the temporary files we created. */
1339
1340 merge_files (tempfiles, ntemps, outfile);
1341
1342 free (tempfiles);
1343
1344 return value;
1345}
1346\f
1347/* Assume (and optionally verify) that each input file is sorted;
1348 merge them and output the result.
1349 Returns nonzero if any input file fails to be sorted.
1350
1351 This version of merging will not work if the number of
1352 input files gets too high. Higher level functions
1353 use it only with a bounded number of input files. */
1354
1355int
1356merge_direct (infiles, nfiles, outfile)
1357 char **infiles;
1358 int nfiles;
1359 char *outfile;
1360{
1361 char **ip = infiles;
1362 struct linebuffer *lb1, *lb2;
1363 struct linebuffer **thisline, **prevline;
1364 FILE **streams;
1365 int i;
1366 int nleft;
1367 int lossage = 0;
1368 int *file_lossage;
1369 struct linebuffer *prev_out = 0;
1370 FILE *ostream = stdout;
1371
1372 if (outfile)
1373 {
1374 ostream = fopen (outfile, "w");
1375 }
1376 if (!ostream) pfatal_with_name (outfile);
1377
1378 init_index ();
1379
1380 if (nfiles == 0)
1381 {
1382 if (outfile)
1383 fclose (ostream);
1384 return 0;
1385 }
1386
1387 /* For each file, make two line buffers.
1388 Also, for each file, there is an element of `thisline'
1389 which points at any time to one of the file's two buffers,
1390 and an element of `prevline' which points to the other buffer.
1391 `thisline' is supposed to point to the next available line from the file,
1392 while `prevline' holds the last file line used,
1393 which is remembered so that we can verify that the file is properly sorted. */
1394
1395 /* lb1 and lb2 contain one buffer each per file */
1396 lb1 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
1397 lb2 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
1398
1399 /* thisline[i] points to the linebuffer holding the next available line in file i,
1400 or is zero if there are no lines left in that file. */
1401 thisline = (struct linebuffer **) xmalloc (nfiles * sizeof (struct linebuffer *));
1402 /* prevline[i] points to the linebuffer holding the last used line from file i.
1403 This is just for verifying that file i is properly sorted. */
1404 prevline = (struct linebuffer **) xmalloc (nfiles * sizeof (struct linebuffer *));
1405 /* streams[i] holds the input stream for file i. */
1406 streams = (FILE **) xmalloc (nfiles * sizeof (FILE *));
1407 /* file_lossage[i] is nonzero if we already know file i is not properly sorted. */
1408 file_lossage = (int *) xmalloc (nfiles * sizeof (int));
1409
1410 /* Allocate and initialize all that storage */
1411
1412 for (i = 0; i < nfiles; i++)
1413 {
1414 initbuffer (&lb1[i]);
1415 initbuffer (&lb2[i]);
1416 thisline[i] = &lb1[i];
1417 prevline[i] = &lb2[i];
1418 file_lossage[i] = 0;
1419 streams[i] = fopen (infiles[i], "r");
1420 if (!streams[i])
1421 pfatal_with_name (infiles[i]);
1422
1423 readline (thisline[i], streams[i]);
1424 }
1425
1426 /* Keep count of number of files not at eof */
1427 nleft = nfiles;
1428
1429 while (nleft)
1430 {
1431 struct linebuffer *best = 0;
1432 struct linebuffer *exch;
1433 int bestfile = -1;
1434 int i;
1435
1436 /* Look at the next avail line of each file; choose the least one. */
1437
1438 for (i = 0; i < nfiles; i++)
1439 {
1440 if (thisline[i] &&
1441 (!best ||
1442 0 < compare_general (best->buffer, thisline[i]->buffer,
1443 (long) bestfile, (long) i, num_keyfields)))
1444 {
1445 best = thisline[i];
1446 bestfile = i;
1447 }
1448 }
1449
1450 /* Output that line, unless it matches the previous one and we don't want duplicates */
1451
1452 if (!(prev_out &&
1453 !compare_general (prev_out->buffer, best->buffer, 0L, 1L, num_keyfields - 1)))
1454 indexify (best->buffer, ostream);
1455 prev_out = best;
1456
1457 /* Now make the line the previous of its file, and fetch a new line from that file */
1458
1459 exch = prevline[bestfile];
1460 prevline[bestfile] = thisline[bestfile];
1461 thisline[bestfile] = exch;
1462
1463 while (1)
1464 {
1465 /* If the file has no more, mark it empty */
1466
1467 if (feof (streams[bestfile]))
1468 {
1469 thisline[bestfile] = 0;
1470 nleft--; /* Update the number of files still not empty */
1471 break;
1472 }
1473 readline (thisline[bestfile], streams[bestfile]);
1474 if (thisline[bestfile]->buffer[0] || !feof (streams[bestfile])) break;
1475 }
1476 }
1477
1478 finish_index (ostream);
1479
1480 /* Free all storage and close all input streams */
1481
1482 for (i = 0; i < nfiles; i++)
1483 {
1484 fclose (streams[i]);
1485 free (lb1[i].buffer);
1486 free (lb2[i].buffer);
1487 }
1488 free (file_lossage);
1489 free (lb1);
1490 free (lb2);
1491 free (thisline);
1492 free (prevline);
1493 free (streams);
1494
1495 if (outfile)
1496 fclose (ostream);
1497
1498 return lossage;
1499}
1500\f
1501/* Print error message and exit. */
1502
1503fatal (s1, s2)
1504 char *s1, *s2;
1505{
1506 error (s1, s2);
1507 exit (EXIT_FATAL);
1508}
1509
1510/* Print error message. `s1' is printf control string, `s2' is arg for it. */
1511
1512error (s1, s2)
1513 char *s1, *s2;
1514{
1515 printf ("texindex: ");
1516 printf (s1, s2);
1517 printf ("\n");
1518}
1519
1520perror_with_name (name)
1521 char *name;
1522{
1523 char *s;
1524
1525 if (errno < sys_nerr)
1526 s = concat ("", sys_errlist[errno], " for %s");
1527 else
1528 s = "cannot open %s";
1529 error (s, name);
1530}
1531
1532pfatal_with_name (name)
1533 char *name;
1534{
1535 char *s;
1536
1537 if (errno < sys_nerr)
1538 s = concat ("", sys_errlist[errno], " for %s");
1539 else
1540 s = "cannot open %s";
1541 fatal (s, name);
1542}
1543
1544/* Return a newly-allocated string whose contents concatenate those of s1, s2, s3. */
1545
1546char *
1547concat (s1, s2, s3)
1548 char *s1, *s2, *s3;
1549{
1550 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
1551 char *result = (char *) xmalloc (len1 + len2 + len3 + 1);
1552
1553 strcpy (result, s1);
1554 strcpy (result + len1, s2);
1555 strcpy (result + len1 + len2, s3);
1556 *(result + len1 + len2 + len3) = 0;
1557
1558 return result;
1559}
1560
1561/* Like malloc but get fatal error if memory is exhausted. */
1562
1563int
1564xmalloc (size)
1565 int size;
1566{
1567 int result = malloc (size);
1568 if (!result)
1569 fatal ("virtual memory exhausted", 0);
1570 return result;
1571}
1572
1573
1574int
1575xrealloc (ptr, size)
1576 char *ptr;
1577 int size;
1578{
1579 int result = realloc (ptr, size);
1580 if (!result)
1581 fatal ("virtual memory exhausted");
1582 return result;
1583}
1584
1585bzero (b, length)
1586 register char *b;
1587 register int length;
1588{
1589#ifdef VMS
1590 short zero = 0;
1591 long max_str = 65535;
1592 long len;
1593
1594 while (length > max_str)
1595 {
1596 (void) LIB$MOVC5 (&zero, &zero, &zero, &max_str, b);
1597 length -= max_str;
1598 b += max_str;
1599 }
1600 len = length;
1601 (void) LIB$MOVC5 (&zero, &zero, &zero, &len, b);
1602#else
1603 while (length-- > 0)
1604 *b++ = 0;
1605#endif /* not VMS */
1606}