]> git.ipfire.org Git - thirdparty/squid.git/blame - lib/GNUregex.c
updating to squid-1.0.1
[thirdparty/squid.git] / lib / GNUregex.c
CommitLineData
090089c4 1#include "autoconf.h" /* get the #defines from GNU autoconf */
2/* Extended regular expression matching and search library,
3 version 0.12.
4 (Implements POSIX draft P10003.2/D11.2, except for
5 internationalization features.)
6
7 Copyright (C) 1993 Free Software Foundation, Inc.
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
22
23/* AIX requires this to be the first thing in the file. */
24#if defined (_AIX) && !defined (REGEX_MALLOC)
25 #pragma alloca
26#endif
27
28#define _GNU_SOURCE
29
30/* We need this for `regex.h', and perhaps for the Emacs include files. */
31#include <sys/types.h>
32
33#ifdef HAVE_CONFIG_H
34#include "config.h"
35#endif
36
1cf7b8fa 37#if !HAVE_ALLOCA
38#define REGEX_MALLOC 1
39#endif
40
090089c4 41/* The `emacs' switch turns on certain matching commands
42 that make sense only in Emacs. */
43#ifdef emacs
44
45#include "lisp.h"
46#include "buffer.h"
47#include "syntax.h"
48
49/* Emacs uses `NULL' as a predicate. */
50#undef NULL
51
52#else /* not emacs */
53
54/* We used to test for `BSTRING' here, but only GCC and Emacs define
55 `BSTRING', as far as I know, and neither of them use this code. */
56#if HAVE_STRING_H || STDC_HEADERS
57#include <string.h>
58#ifndef bcmp
59#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
60#endif
61#ifndef bcopy
62#define bcopy(s, d, n) memcpy ((d), (s), (n))
63#endif
64#ifndef bzero
65#define bzero(s, n) memset ((s), 0, (n))
66#endif
67#else
68#include <strings.h>
69#endif
70
71#ifdef STDC_HEADERS
72#include <stdlib.h>
73#else
74char *malloc ();
75char *realloc ();
76#endif
77
78
79/* Define the syntax stuff for \<, \>, etc. */
80
81/* This must be nonzero for the wordchar and notwordchar pattern
82 commands in re_match_2. */
83#ifndef Sword
84#define Sword 1
85#endif
86
87#ifdef SYNTAX_TABLE
88
89extern char *re_syntax_table;
90
91#else /* not SYNTAX_TABLE */
92
93/* How many characters in the character set. */
94#define CHAR_SET_SIZE 256
95
96static char re_syntax_table[CHAR_SET_SIZE];
97
98static void
99init_syntax_once ()
100{
101 register int c;
102 static int done = 0;
103
104 if (done)
105 return;
106
107 bzero (re_syntax_table, sizeof re_syntax_table);
108
109 for (c = 'a'; c <= 'z'; c++)
110 re_syntax_table[c] = Sword;
111
112 for (c = 'A'; c <= 'Z'; c++)
113 re_syntax_table[c] = Sword;
114
115 for (c = '0'; c <= '9'; c++)
116 re_syntax_table[c] = Sword;
117
118 re_syntax_table['_'] = Sword;
119
120 done = 1;
121}
122
123#endif /* not SYNTAX_TABLE */
124
125#define SYNTAX(c) re_syntax_table[c]
126
127#endif /* not emacs */
128\f
129/* Get the interface, including the syntax bits. */
130#include "GNUregex.h"
131
132/* isalpha etc. are used for the character classes. */
133#include <ctype.h>
134
135#ifndef isascii
136#define isascii(c) 1
137#endif
138
139#ifdef isblank
140#define ISBLANK(c) (isascii (c) && isblank (c))
141#else
142#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
143#endif
144#ifdef isgraph
145#define ISGRAPH(c) (isascii (c) && isgraph (c))
146#else
147#define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c))
148#endif
149
150#define ISPRINT(c) (isascii (c) && isprint (c))
151#define ISDIGIT(c) (isascii (c) && isdigit (c))
152#define ISALNUM(c) (isascii (c) && isalnum (c))
153#define ISALPHA(c) (isascii (c) && isalpha (c))
154#define ISCNTRL(c) (isascii (c) && iscntrl (c))
155#define ISLOWER(c) (isascii (c) && islower (c))
156#define ISPUNCT(c) (isascii (c) && ispunct (c))
157#define ISSPACE(c) (isascii (c) && isspace (c))
158#define ISUPPER(c) (isascii (c) && isupper (c))
159#define ISXDIGIT(c) (isascii (c) && isxdigit (c))
160
161#ifndef NULL
162#define NULL 0
163#endif
164
165/* We remove any previous definition of `SIGN_EXTEND_CHAR',
166 since ours (we hope) works properly with all combinations of
167 machines, compilers, `char' and `unsigned char' argument types.
168 (Per Bothner suggested the basic approach.) */
169#undef SIGN_EXTEND_CHAR
170#if __STDC__
171#define SIGN_EXTEND_CHAR(c) ((signed char) (c))
172#else /* not __STDC__ */
173/* As in Harbison and Steele. */
174#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
175#endif
176\f
177/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
178 use `alloca' instead of `malloc'. This is because using malloc in
179 re_search* or re_match* could cause memory leaks when C-g is used in
180 Emacs; also, malloc is slower and causes storage fragmentation. On
181 the other hand, malloc is more portable, and easier to debug.
182
183 Because we sometimes use alloca, some routines have to be macros,
184 not functions -- `alloca'-allocated space disappears at the end of the
185 function it is called in. */
186
187#ifdef REGEX_MALLOC
188
189#define REGEX_ALLOCATE malloc
190#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
191
192#else /* not REGEX_MALLOC */
193
194/* Emacs already defines alloca, sometimes. */
195#ifndef alloca
196
197/* Make alloca work the best possible way. */
198#ifdef __GNUC__
199#define alloca __builtin_alloca
200#else /* not __GNUC__ */
201#if HAVE_ALLOCA_H
202#include <alloca.h>
203#else /* not __GNUC__ or HAVE_ALLOCA_H */
204#ifndef _AIX /* Already did AIX, up at the top. */
205char *alloca ();
206#endif /* not _AIX */
207#endif /* not HAVE_ALLOCA_H */
208#endif /* not __GNUC__ */
209
210#endif /* not alloca */
211
212#define REGEX_ALLOCATE alloca
213
214/* Assumes a `char *destination' variable. */
215#define REGEX_REALLOCATE(source, osize, nsize) \
216 (destination = (char *) alloca (nsize), \
217 bcopy (source, destination, osize), \
218 destination)
219
220#endif /* not REGEX_MALLOC */
221
222
223/* True if `size1' is non-NULL and PTR is pointing anywhere inside
224 `string1' or just past its end. This works if PTR is NULL, which is
225 a good thing. */
226#define FIRST_STRING_P(ptr) \
227 (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
228
229/* (Re)Allocate N items of type T using malloc, or fail. */
230#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
231#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
232#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
233
234#define BYTEWIDTH 8 /* In bits. */
235
236#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
237
238#define MAX(a, b) ((a) > (b) ? (a) : (b))
239#define MIN(a, b) ((a) < (b) ? (a) : (b))
240
241typedef char boolean;
242#define false 0
243#define true 1
244\f
245/* These are the command codes that appear in compiled regular
246 expressions. Some opcodes are followed by argument bytes. A
247 command code can specify any interpretation whatsoever for its
248 arguments. Zero bytes may appear in the compiled regular expression.
249
250 The value of `exactn' is needed in search.c (search_buffer) in Emacs.
251 So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
252 `exactn' we use here must also be 1. */
253
254typedef enum
255{
256 no_op = 0,
257
258 /* Followed by one byte giving n, then by n literal bytes. */
259 exactn = 1,
260
261 /* Matches any (more or less) character. */
262 anychar,
263
264 /* Matches any one char belonging to specified set. First
265 following byte is number of bitmap bytes. Then come bytes
266 for a bitmap saying which chars are in. Bits in each byte
267 are ordered low-bit-first. A character is in the set if its
268 bit is 1. A character too large to have a bit in the map is
269 automatically not in the set. */
270 charset,
271
272 /* Same parameters as charset, but match any character that is
273 not one of those specified. */
274 charset_not,
275
276 /* Start remembering the text that is matched, for storing in a
277 register. Followed by one byte with the register number, in
278 the range 0 to one less than the pattern buffer's re_nsub
279 field. Then followed by one byte with the number of groups
280 inner to this one. (This last has to be part of the
281 start_memory only because we need it in the on_failure_jump
282 of re_match_2.) */
283 start_memory,
284
285 /* Stop remembering the text that is matched and store it in a
286 memory register. Followed by one byte with the register
287 number, in the range 0 to one less than `re_nsub' in the
288 pattern buffer, and one byte with the number of inner groups,
289 just like `start_memory'. (We need the number of inner
290 groups here because we don't have any easy way of finding the
291 corresponding start_memory when we're at a stop_memory.) */
292 stop_memory,
293
294 /* Match a duplicate of something remembered. Followed by one
295 byte containing the register number. */
296 duplicate,
297
298 /* Fail unless at beginning of line. */
299 begline,
300
301 /* Fail unless at end of line. */
302 endline,
303
304 /* Succeeds if at beginning of buffer (if emacs) or at beginning
305 of string to be matched (if not). */
306 begbuf,
307
308 /* Analogously, for end of buffer/string. */
309 endbuf,
310
311 /* Followed by two byte relative address to which to jump. */
312 jump,
313
314 /* Same as jump, but marks the end of an alternative. */
315 jump_past_alt,
316
317 /* Followed by two-byte relative address of place to resume at
318 in case of failure. */
319 on_failure_jump,
320
321 /* Like on_failure_jump, but pushes a placeholder instead of the
322 current string position when executed. */
323 on_failure_keep_string_jump,
324
325 /* Throw away latest failure point and then jump to following
326 two-byte relative address. */
327 pop_failure_jump,
328
329 /* Change to pop_failure_jump if know won't have to backtrack to
330 match; otherwise change to jump. This is used to jump
331 back to the beginning of a repeat. If what follows this jump
332 clearly won't match what the repeat does, such that we can be
333 sure that there is no use backtracking out of repetitions
334 already matched, then we change it to a pop_failure_jump.
335 Followed by two-byte address. */
336 maybe_pop_jump,
337
338 /* Jump to following two-byte address, and push a dummy failure
339 point. This failure point will be thrown away if an attempt
340 is made to use it for a failure. A `+' construct makes this
341 before the first repeat. Also used as an intermediary kind
342 of jump when compiling an alternative. */
343 dummy_failure_jump,
344
345 /* Push a dummy failure point and continue. Used at the end of
346 alternatives. */
347 push_dummy_failure,
348
349 /* Followed by two-byte relative address and two-byte number n.
350 After matching N times, jump to the address upon failure. */
351 succeed_n,
352
353 /* Followed by two-byte relative address, and two-byte number n.
354 Jump to the address N times, then fail. */
355 jump_n,
356
357 /* Set the following two-byte relative address to the
358 subsequent two-byte number. The address *includes* the two
359 bytes of number. */
360 set_number_at,
361
362 wordchar, /* Matches any word-constituent character. */
363 notwordchar, /* Matches any char that is not a word-constituent. */
364
365 wordbeg, /* Succeeds if at word beginning. */
366 wordend, /* Succeeds if at word end. */
367
368 wordbound, /* Succeeds if at a word boundary. */
369 notwordbound /* Succeeds if not at a word boundary. */
370
371#ifdef emacs
372 ,before_dot, /* Succeeds if before point. */
373 at_dot, /* Succeeds if at point. */
374 after_dot, /* Succeeds if after point. */
375
376 /* Matches any character whose syntax is specified. Followed by
377 a byte which contains a syntax code, e.g., Sword. */
378 syntaxspec,
379
380 /* Matches any character whose syntax is not that specified. */
381 notsyntaxspec
382#endif /* emacs */
383} re_opcode_t;
384\f
385/* Common operations on the compiled pattern. */
386
387/* Store NUMBER in two contiguous bytes starting at DESTINATION. */
388
389#define STORE_NUMBER(destination, number) \
390 do { \
391 (destination)[0] = (number) & 0377; \
392 (destination)[1] = (number) >> 8; \
393 } while (0)
394
395/* Same as STORE_NUMBER, except increment DESTINATION to
396 the byte after where the number is stored. Therefore, DESTINATION
397 must be an lvalue. */
398
399#define STORE_NUMBER_AND_INCR(destination, number) \
400 do { \
401 STORE_NUMBER (destination, number); \
402 (destination) += 2; \
403 } while (0)
404
405/* Put into DESTINATION a number stored in two contiguous bytes starting
406 at SOURCE. */
407
408#define EXTRACT_NUMBER(destination, source) \
409 do { \
410 (destination) = *(source) & 0377; \
411 (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \
412 } while (0)
413
414#ifdef DEBUG
415static void
416extract_number (dest, source)
417 int *dest;
418 unsigned char *source;
419{
420 int temp = SIGN_EXTEND_CHAR (*(source + 1));
421 *dest = *source & 0377;
422 *dest += temp << 8;
423}
424
425#ifndef EXTRACT_MACROS /* To debug the macros. */
426#undef EXTRACT_NUMBER
427#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
428#endif /* not EXTRACT_MACROS */
429
430#endif /* DEBUG */
431
432/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
433 SOURCE must be an lvalue. */
434
435#define EXTRACT_NUMBER_AND_INCR(destination, source) \
436 do { \
437 EXTRACT_NUMBER (destination, source); \
438 (source) += 2; \
439 } while (0)
440
441#ifdef DEBUG
442static void
443extract_number_and_incr (destination, source)
444 int *destination;
445 unsigned char **source;
446{
447 extract_number (destination, *source);
448 *source += 2;
449}
450
451#ifndef EXTRACT_MACROS
452#undef EXTRACT_NUMBER_AND_INCR
453#define EXTRACT_NUMBER_AND_INCR(dest, src) \
454 extract_number_and_incr (&dest, &src)
455#endif /* not EXTRACT_MACROS */
456
457#endif /* DEBUG */
458\f
459/* If DEBUG is defined, Regex prints many voluminous messages about what
460 it is doing (if the variable `debug' is nonzero). If linked with the
461 main program in `iregex.c', you can enter patterns and strings
462 interactively. And if linked with the main program in `main.c' and
463 the other test files, you can run the already-written tests. */
464
465#ifdef DEBUG
466
467/* We use standard I/O for debugging. */
468#include <stdio.h>
469
470/* It is useful to test things that ``must'' be true when debugging. */
471#include <assert.h>
472
473static int debug = 0;
474
475#define DEBUG_STATEMENT(e) e
476#define DEBUG_PRINT1(x) if (debug) printf (x)
477#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
478#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
479#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
480#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
481 if (debug) print_partial_compiled_pattern (s, e)
482#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
483 if (debug) print_double_string (w, s1, sz1, s2, sz2)
484
485
486extern void printchar ();
487
488/* Print the fastmap in human-readable form. */
489
490void
491print_fastmap (fastmap)
492 char *fastmap;
493{
494 unsigned was_a_range = 0;
495 unsigned i = 0;
496
497 while (i < (1 << BYTEWIDTH))
498 {
499 if (fastmap[i++])
500 {
501 was_a_range = 0;
502 printchar (i - 1);
503 while (i < (1 << BYTEWIDTH) && fastmap[i])
504 {
505 was_a_range = 1;
506 i++;
507 }
508 if (was_a_range)
509 {
510 printf ("-");
511 printchar (i - 1);
512 }
513 }
514 }
515 putchar ('\n');
516}
517
518
519/* Print a compiled pattern string in human-readable form, starting at
520 the START pointer into it and ending just before the pointer END. */
521
522void
523print_partial_compiled_pattern (start, end)
524 unsigned char *start;
525 unsigned char *end;
526{
527 int mcnt, mcnt2;
528 unsigned char *p = start;
529 unsigned char *pend = end;
530
531 if (start == NULL)
532 {
533 printf ("(null)\n");
534 return;
535 }
536
537 /* Loop over pattern commands. */
538 while (p < pend)
539 {
540 switch ((re_opcode_t) *p++)
541 {
542 case no_op:
543 printf ("/no_op");
544 break;
545
546 case exactn:
547 mcnt = *p++;
548 printf ("/exactn/%d", mcnt);
549 do
550 {
551 putchar ('/');
552 printchar (*p++);
553 }
554 while (--mcnt);
555 break;
556
557 case start_memory:
558 mcnt = *p++;
559 printf ("/start_memory/%d/%d", mcnt, *p++);
560 break;
561
562 case stop_memory:
563 mcnt = *p++;
564 printf ("/stop_memory/%d/%d", mcnt, *p++);
565 break;
566
567 case duplicate:
568 printf ("/duplicate/%d", *p++);
569 break;
570
571 case anychar:
572 printf ("/anychar");
573 break;
574
575 case charset:
576 case charset_not:
577 {
578 register int c;
579
580 printf ("/charset%s",
581 (re_opcode_t) *(p - 1) == charset_not ? "_not" : "");
582
583 assert (p + *p < pend);
584
585 for (c = 0; c < *p; c++)
586 {
587 unsigned bit;
588 unsigned char map_byte = p[1 + c];
589
590 putchar ('/');
591
592 for (bit = 0; bit < BYTEWIDTH; bit++)
593 if (map_byte & (1 << bit))
594 printchar (c * BYTEWIDTH + bit);
595 }
596 p += 1 + *p;
597 break;
598 }
599
600 case begline:
601 printf ("/begline");
602 break;
603
604 case endline:
605 printf ("/endline");
606 break;
607
608 case on_failure_jump:
609 extract_number_and_incr (&mcnt, &p);
610 printf ("/on_failure_jump/0/%d", mcnt);
611 break;
612
613 case on_failure_keep_string_jump:
614 extract_number_and_incr (&mcnt, &p);
615 printf ("/on_failure_keep_string_jump/0/%d", mcnt);
616 break;
617
618 case dummy_failure_jump:
619 extract_number_and_incr (&mcnt, &p);
620 printf ("/dummy_failure_jump/0/%d", mcnt);
621 break;
622
623 case push_dummy_failure:
624 printf ("/push_dummy_failure");
625 break;
626
627 case maybe_pop_jump:
628 extract_number_and_incr (&mcnt, &p);
629 printf ("/maybe_pop_jump/0/%d", mcnt);
630 break;
631
632 case pop_failure_jump:
633 extract_number_and_incr (&mcnt, &p);
634 printf ("/pop_failure_jump/0/%d", mcnt);
635 break;
636
637 case jump_past_alt:
638 extract_number_and_incr (&mcnt, &p);
639 printf ("/jump_past_alt/0/%d", mcnt);
640 break;
641
642 case jump:
643 extract_number_and_incr (&mcnt, &p);
644 printf ("/jump/0/%d", mcnt);
645 break;
646
647 case succeed_n:
648 extract_number_and_incr (&mcnt, &p);
649 extract_number_and_incr (&mcnt2, &p);
650 printf ("/succeed_n/0/%d/0/%d", mcnt, mcnt2);
651 break;
652
653 case jump_n:
654 extract_number_and_incr (&mcnt, &p);
655 extract_number_and_incr (&mcnt2, &p);
656 printf ("/jump_n/0/%d/0/%d", mcnt, mcnt2);
657 break;
658
659 case set_number_at:
660 extract_number_and_incr (&mcnt, &p);
661 extract_number_and_incr (&mcnt2, &p);
662 printf ("/set_number_at/0/%d/0/%d", mcnt, mcnt2);
663 break;
664
665 case wordbound:
666 printf ("/wordbound");
667 break;
668
669 case notwordbound:
670 printf ("/notwordbound");
671 break;
672
673 case wordbeg:
674 printf ("/wordbeg");
675 break;
676
677 case wordend:
678 printf ("/wordend");
679
680#ifdef emacs
681 case before_dot:
682 printf ("/before_dot");
683 break;
684
685 case at_dot:
686 printf ("/at_dot");
687 break;
688
689 case after_dot:
690 printf ("/after_dot");
691 break;
692
693 case syntaxspec:
694 printf ("/syntaxspec");
695 mcnt = *p++;
696 printf ("/%d", mcnt);
697 break;
698
699 case notsyntaxspec:
700 printf ("/notsyntaxspec");
701 mcnt = *p++;
702 printf ("/%d", mcnt);
703 break;
704#endif /* emacs */
705
706 case wordchar:
707 printf ("/wordchar");
708 break;
709
710 case notwordchar:
711 printf ("/notwordchar");
712 break;
713
714 case begbuf:
715 printf ("/begbuf");
716 break;
717
718 case endbuf:
719 printf ("/endbuf");
720 break;
721
722 default:
723 printf ("?%d", *(p-1));
724 }
725 }
726 printf ("/\n");
727}
728
729
730void
731print_compiled_pattern (bufp)
732 struct re_pattern_buffer *bufp;
733{
734 unsigned char *buffer = bufp->buffer;
735
736 print_partial_compiled_pattern (buffer, buffer + bufp->used);
737 printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
738
739 if (bufp->fastmap_accurate && bufp->fastmap)
740 {
741 printf ("fastmap: ");
742 print_fastmap (bufp->fastmap);
743 }
744
745 printf ("re_nsub: %d\t", bufp->re_nsub);
746 printf ("regs_alloc: %d\t", bufp->regs_allocated);
747 printf ("can_be_null: %d\t", bufp->can_be_null);
748 printf ("newline_anchor: %d\n", bufp->newline_anchor);
749 printf ("no_sub: %d\t", bufp->no_sub);
750 printf ("not_bol: %d\t", bufp->not_bol);
751 printf ("not_eol: %d\t", bufp->not_eol);
752 printf ("syntax: %d\n", bufp->syntax);
753 /* Perhaps we should print the translate table? */
754}
755
756
757void
758print_double_string (where, string1, size1, string2, size2)
759 const char *where;
760 const char *string1;
761 const char *string2;
762 int size1;
763 int size2;
764{
765 unsigned this_char;
766
767 if (where == NULL)
768 printf ("(null)");
769 else
770 {
771 if (FIRST_STRING_P (where))
772 {
773 for (this_char = where - string1; this_char < size1; this_char++)
774 printchar (string1[this_char]);
775
776 where = string2;
777 }
778
779 for (this_char = where - string2; this_char < size2; this_char++)
780 printchar (string2[this_char]);
781 }
782}
783
784#else /* not DEBUG */
785
786#undef assert
787#define assert(e)
788
789#define DEBUG_STATEMENT(e)
790#define DEBUG_PRINT1(x)
791#define DEBUG_PRINT2(x1, x2)
792#define DEBUG_PRINT3(x1, x2, x3)
793#define DEBUG_PRINT4(x1, x2, x3, x4)
794#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
795#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
796
797#endif /* not DEBUG */
798\f
799/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
800 also be assigned to arbitrarily: each pattern buffer stores its own
801 syntax, so it can be changed between regex compilations. */
802reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
803
804
805/* Specify the precise syntax of regexps for compilation. This provides
806 for compatibility for various utilities which historically have
807 different, incompatible syntaxes.
808
809 The argument SYNTAX is a bit mask comprised of the various bits
810 defined in regex.h. We return the old syntax. */
811
812reg_syntax_t
813re_set_syntax (syntax)
814 reg_syntax_t syntax;
815{
816 reg_syntax_t ret = re_syntax_options;
817
818 re_syntax_options = syntax;
819 return ret;
820}
821\f
822/* This table gives an error message for each of the error codes listed
823 in regex.h. Obviously the order here has to be same as there. */
824
825static const char *re_error_msg[] =
826 { NULL, /* REG_NOERROR */
827 "No match", /* REG_NOMATCH */
828 "Invalid regular expression", /* REG_BADPAT */
829 "Invalid collation character", /* REG_ECOLLATE */
830 "Invalid character class name", /* REG_ECTYPE */
831 "Trailing backslash", /* REG_EESCAPE */
832 "Invalid back reference", /* REG_ESUBREG */
833 "Unmatched [ or [^", /* REG_EBRACK */
834 "Unmatched ( or \\(", /* REG_EPAREN */
835 "Unmatched \\{", /* REG_EBRACE */
836 "Invalid content of \\{\\}", /* REG_BADBR */
837 "Invalid range end", /* REG_ERANGE */
838 "Memory exhausted", /* REG_ESPACE */
839 "Invalid preceding regular expression", /* REG_BADRPT */
840 "Premature end of regular expression", /* REG_EEND */
841 "Regular expression too big", /* REG_ESIZE */
842 "Unmatched ) or \\)", /* REG_ERPAREN */
843 };
844\f
845/* Subroutine declarations and macros for regex_compile. */
846
847static void store_op1 (), store_op2 ();
848static void insert_op1 (), insert_op2 ();
849static boolean at_begline_loc_p (), at_endline_loc_p ();
850static boolean group_in_compile_stack ();
851static reg_errcode_t compile_range ();
852
853/* Fetch the next character in the uncompiled pattern---translating it
854 if necessary. Also cast from a signed character in the constant
855 string passed to us by the user to an unsigned char that we can use
856 as an array index (in, e.g., `translate'). */
857#define PATFETCH(c) \
858 do {if (p == pend) return REG_EEND; \
859 c = (unsigned char) *p++; \
860 if (translate) c = translate[c]; \
861 } while (0)
862
863/* Fetch the next character in the uncompiled pattern, with no
864 translation. */
865#define PATFETCH_RAW(c) \
866 do {if (p == pend) return REG_EEND; \
867 c = (unsigned char) *p++; \
868 } while (0)
869
870/* Go backwards one character in the pattern. */
871#define PATUNFETCH p--
872
873
874/* If `translate' is non-null, return translate[D], else just D. We
875 cast the subscript to translate because some data is declared as
876 `char *', to avoid warnings when a string constant is passed. But
877 when we use a character as a subscript we must make it unsigned. */
878#define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
879
880
881/* Macros for outputting the compiled pattern into `buffer'. */
882
883/* If the buffer isn't allocated when it comes in, use this. */
884#define INIT_BUF_SIZE 32
885
886/* Make sure we have at least N more bytes of space in buffer. */
887#define GET_BUFFER_SPACE(n) \
888 while (b - bufp->buffer + (n) > bufp->allocated) \
889 EXTEND_BUFFER ()
890
891/* Make sure we have one more byte of buffer space and then add C to it. */
892#define BUF_PUSH(c) \
893 do { \
894 GET_BUFFER_SPACE (1); \
895 *b++ = (unsigned char) (c); \
896 } while (0)
897
898
899/* Ensure we have two more bytes of buffer space and then append C1 and C2. */
900#define BUF_PUSH_2(c1, c2) \
901 do { \
902 GET_BUFFER_SPACE (2); \
903 *b++ = (unsigned char) (c1); \
904 *b++ = (unsigned char) (c2); \
905 } while (0)
906
907
908/* As with BUF_PUSH_2, except for three bytes. */
909#define BUF_PUSH_3(c1, c2, c3) \
910 do { \
911 GET_BUFFER_SPACE (3); \
912 *b++ = (unsigned char) (c1); \
913 *b++ = (unsigned char) (c2); \
914 *b++ = (unsigned char) (c3); \
915 } while (0)
916
917
918/* Store a jump with opcode OP at LOC to location TO. We store a
919 relative address offset by the three bytes the jump itself occupies. */
920#define STORE_JUMP(op, loc, to) \
921 store_op1 (op, loc, (to) - (loc) - 3)
922
923/* Likewise, for a two-argument jump. */
924#define STORE_JUMP2(op, loc, to, arg) \
925 store_op2 (op, loc, (to) - (loc) - 3, arg)
926
927/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */
928#define INSERT_JUMP(op, loc, to) \
929 insert_op1 (op, loc, (to) - (loc) - 3, b)
930
931/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */
932#define INSERT_JUMP2(op, loc, to, arg) \
933 insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
934
935
936/* This is not an arbitrary limit: the arguments which represent offsets
937 into the pattern are two bytes long. So if 2^16 bytes turns out to
938 be too small, many things would have to change. */
939#define MAX_BUF_SIZE (1L << 16)
940
941
942/* Extend the buffer by twice its current size via realloc and
943 reset the pointers that pointed into the old block to point to the
944 correct places in the new one. If extending the buffer results in it
945 being larger than MAX_BUF_SIZE, then flag memory exhausted. */
946#define EXTEND_BUFFER() \
947 do { \
948 unsigned char *old_buffer = bufp->buffer; \
949 if (bufp->allocated == MAX_BUF_SIZE) \
950 return REG_ESIZE; \
951 bufp->allocated <<= 1; \
952 if (bufp->allocated > MAX_BUF_SIZE) \
953 bufp->allocated = MAX_BUF_SIZE; \
954 bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
955 if (bufp->buffer == NULL) \
956 return REG_ESPACE; \
957 /* If the buffer moved, move all the pointers into it. */ \
958 if (old_buffer != bufp->buffer) \
959 { \
960 b = (b - old_buffer) + bufp->buffer; \
961 begalt = (begalt - old_buffer) + bufp->buffer; \
962 if (fixup_alt_jump) \
963 fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
964 if (laststart) \
965 laststart = (laststart - old_buffer) + bufp->buffer; \
966 if (pending_exact) \
967 pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
968 } \
969 } while (0)
970
971
972/* Since we have one byte reserved for the register number argument to
973 {start,stop}_memory, the maximum number of groups we can report
974 things about is what fits in that byte. */
975#define MAX_REGNUM 255
976
977/* But patterns can have more than `MAX_REGNUM' registers. We just
978 ignore the excess. */
979typedef unsigned regnum_t;
980
981
982/* Macros for the compile stack. */
983
984/* Since offsets can go either forwards or backwards, this type needs to
985 be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */
986typedef int pattern_offset_t;
987
988typedef struct
989{
990 pattern_offset_t begalt_offset;
991 pattern_offset_t fixup_alt_jump;
992 pattern_offset_t inner_group_offset;
993 pattern_offset_t laststart_offset;
994 regnum_t regnum;
995} compile_stack_elt_t;
996
997
998typedef struct
999{
1000 compile_stack_elt_t *stack;
1001 unsigned size;
1002 unsigned avail; /* Offset of next open position. */
1003} compile_stack_type;
1004
1005
1006#define INIT_COMPILE_STACK_SIZE 32
1007
1008#define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
1009#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
1010
1011/* The next available element. */
1012#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1013
1014
1015/* Set the bit for character C in a list. */
1016#define SET_LIST_BIT(c) \
1017 (b[((unsigned char) (c)) / BYTEWIDTH] \
1018 |= 1 << (((unsigned char) c) % BYTEWIDTH))
1019
1020
1021/* Get the next unsigned number in the uncompiled pattern. */
1022#define GET_UNSIGNED_NUMBER(num) \
1023 { if (p != pend) \
1024 { \
1025 PATFETCH (c); \
1026 while (ISDIGIT (c)) \
1027 { \
1028 if (num < 0) \
1029 num = 0; \
1030 num = num * 10 + c - '0'; \
1031 if (p == pend) \
1032 break; \
1033 PATFETCH (c); \
1034 } \
1035 } \
1036 }
1037
1038#define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
1039
1040#define IS_CHAR_CLASS(string) \
1041 (STREQ (string, "alpha") || STREQ (string, "upper") \
1042 || STREQ (string, "lower") || STREQ (string, "digit") \
1043 || STREQ (string, "alnum") || STREQ (string, "xdigit") \
1044 || STREQ (string, "space") || STREQ (string, "print") \
1045 || STREQ (string, "punct") || STREQ (string, "graph") \
1046 || STREQ (string, "cntrl") || STREQ (string, "blank"))
1047\f
1048/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
1049 Returns one of error codes defined in `regex.h', or zero for success.
1050
1051 Assumes the `allocated' (and perhaps `buffer') and `translate'
1052 fields are set in BUFP on entry.
1053
1054 If it succeeds, results are put in BUFP (if it returns an error, the
1055 contents of BUFP are undefined):
1056 `buffer' is the compiled pattern;
1057 `syntax' is set to SYNTAX;
1058 `used' is set to the length of the compiled pattern;
1059 `fastmap_accurate' is zero;
1060 `re_nsub' is the number of subexpressions in PATTERN;
1061 `not_bol' and `not_eol' are zero;
1062
1063 The `fastmap' and `newline_anchor' fields are neither
1064 examined nor set. */
1065
1066static reg_errcode_t
1067regex_compile (pattern, size, syntax, bufp)
1068 const char *pattern;
1069 int size;
1070 reg_syntax_t syntax;
1071 struct re_pattern_buffer *bufp;
1072{
1073 /* We fetch characters from PATTERN here. Even though PATTERN is
1074 `char *' (i.e., signed), we declare these variables as unsigned, so
1075 they can be reliably used as array indices. */
1076 register unsigned char c, c1;
1077
1078 /* A random tempory spot in PATTERN. */
1079 const char *p1;
1080
1081 /* Points to the end of the buffer, where we should append. */
1082 register unsigned char *b;
1083
1084 /* Keeps track of unclosed groups. */
1085 compile_stack_type compile_stack;
1086
1087 /* Points to the current (ending) position in the pattern. */
1088 const char *p = pattern;
1089 const char *pend = pattern + size;
1090
1091 /* How to translate the characters in the pattern. */
1092 char *translate = bufp->translate;
1093
1094 /* Address of the count-byte of the most recently inserted `exactn'
1095 command. This makes it possible to tell if a new exact-match
1096 character can be added to that command or if the character requires
1097 a new `exactn' command. */
1098 unsigned char *pending_exact = 0;
1099
1100 /* Address of start of the most recently finished expression.
1101 This tells, e.g., postfix * where to find the start of its
1102 operand. Reset at the beginning of groups and alternatives. */
1103 unsigned char *laststart = 0;
1104
1105 /* Address of beginning of regexp, or inside of last group. */
1106 unsigned char *begalt;
1107
1108 /* Place in the uncompiled pattern (i.e., the {) to
1109 which to go back if the interval is invalid. */
1110 const char *beg_interval;
1111
1112 /* Address of the place where a forward jump should go to the end of
1113 the containing expression. Each alternative of an `or' -- except the
1114 last -- ends with a forward jump of this sort. */
1115 unsigned char *fixup_alt_jump = 0;
1116
1117 /* Counts open-groups as they are encountered. Remembered for the
1118 matching close-group on the compile stack, so the same register
1119 number is put in the stop_memory as the start_memory. */
1120 regnum_t regnum = 0;
1121
1122#ifdef DEBUG
1123 DEBUG_PRINT1 ("\nCompiling pattern: ");
1124 if (debug)
1125 {
1126 unsigned debug_count;
1127
1128 for (debug_count = 0; debug_count < size; debug_count++)
1129 printchar (pattern[debug_count]);
1130 putchar ('\n');
1131 }
1132#endif /* DEBUG */
1133
1134 /* Initialize the compile stack. */
1135 compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
1136 if (compile_stack.stack == NULL)
1137 return REG_ESPACE;
1138
1139 compile_stack.size = INIT_COMPILE_STACK_SIZE;
1140 compile_stack.avail = 0;
1141
1142 /* Initialize the pattern buffer. */
1143 bufp->syntax = syntax;
1144 bufp->fastmap_accurate = 0;
1145 bufp->not_bol = bufp->not_eol = 0;
1146
1147 /* Set `used' to zero, so that if we return an error, the pattern
1148 printer (for debugging) will think there's no pattern. We reset it
1149 at the end. */
1150 bufp->used = 0;
1151
1152 /* Always count groups, whether or not bufp->no_sub is set. */
1153 bufp->re_nsub = 0;
1154
1155#if !defined (emacs) && !defined (SYNTAX_TABLE)
1156 /* Initialize the syntax table. */
1157 init_syntax_once ();
1158#endif
1159
1160 if (bufp->allocated == 0)
1161 {
1162 if (bufp->buffer)
1163 { /* If zero allocated, but buffer is non-null, try to realloc
1164 enough space. This loses if buffer's address is bogus, but
1165 that is the user's responsibility. */
1166 RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
1167 }
1168 else
1169 { /* Caller did not allocate a buffer. Do it for them. */
1170 bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
1171 }
1172 if (!bufp->buffer) return REG_ESPACE;
1173
1174 bufp->allocated = INIT_BUF_SIZE;
1175 }
1176
1177 begalt = b = bufp->buffer;
1178
1179 /* Loop through the uncompiled pattern until we're at the end. */
1180 while (p != pend)
1181 {
1182 PATFETCH (c);
1183
1184 switch (c)
1185 {
1186 case '^':
1187 {
1188 if ( /* If at start of pattern, it's an operator. */
1189 p == pattern + 1
1190 /* If context independent, it's an operator. */
1191 || syntax & RE_CONTEXT_INDEP_ANCHORS
1192 /* Otherwise, depends on what's come before. */
1193 || at_begline_loc_p (pattern, p, syntax))
1194 BUF_PUSH (begline);
1195 else
1196 goto normal_char;
1197 }
1198 break;
1199
1200
1201 case '$':
1202 {
1203 if ( /* If at end of pattern, it's an operator. */
1204 p == pend
1205 /* If context independent, it's an operator. */
1206 || syntax & RE_CONTEXT_INDEP_ANCHORS
1207 /* Otherwise, depends on what's next. */
1208 || at_endline_loc_p (p, pend, syntax))
1209 BUF_PUSH (endline);
1210 else
1211 goto normal_char;
1212 }
1213 break;
1214
1215
1216 case '+':
1217 case '?':
1218 if ((syntax & RE_BK_PLUS_QM)
1219 || (syntax & RE_LIMITED_OPS))
1220 goto normal_char;
1221 handle_plus:
1222 case '*':
1223 /* If there is no previous pattern... */
1224 if (!laststart)
1225 {
1226 if (syntax & RE_CONTEXT_INVALID_OPS)
1227 return REG_BADRPT;
1228 else if (!(syntax & RE_CONTEXT_INDEP_OPS))
1229 goto normal_char;
1230 }
1231
1232 {
1233 /* Are we optimizing this jump? */
1234 boolean keep_string_p = false;
1235
1236 /* 1 means zero (many) matches is allowed. */
1237 char zero_times_ok = 0, many_times_ok = 0;
1238
1239 /* If there is a sequence of repetition chars, collapse it
1240 down to just one (the right one). We can't combine
1241 interval operators with these because of, e.g., `a{2}*',
1242 which should only match an even number of `a's. */
1243
1244 for (;;)
1245 {
1246 zero_times_ok |= c != '+';
1247 many_times_ok |= c != '?';
1248
1249 if (p == pend)
1250 break;
1251
1252 PATFETCH (c);
1253
1254 if (c == '*'
1255 || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
1256 ;
1257
1258 else if (syntax & RE_BK_PLUS_QM && c == '\\')
1259 {
1260 if (p == pend) return REG_EESCAPE;
1261
1262 PATFETCH (c1);
1263 if (!(c1 == '+' || c1 == '?'))
1264 {
1265 PATUNFETCH;
1266 PATUNFETCH;
1267 break;
1268 }
1269
1270 c = c1;
1271 }
1272 else
1273 {
1274 PATUNFETCH;
1275 break;
1276 }
1277
1278 /* If we get here, we found another repeat character. */
1279 }
1280
1281 /* Star, etc. applied to an empty pattern is equivalent
1282 to an empty pattern. */
1283 if (!laststart)
1284 break;
1285
1286 /* Now we know whether or not zero matches is allowed
1287 and also whether or not two or more matches is allowed. */
1288 if (many_times_ok)
1289 { /* More than one repetition is allowed, so put in at the
1290 end a backward relative jump from `b' to before the next
1291 jump we're going to put in below (which jumps from
1292 laststart to after this jump).
1293
1294 But if we are at the `*' in the exact sequence `.*\n',
1295 insert an unconditional jump backwards to the .,
1296 instead of the beginning of the loop. This way we only
1297 push a failure point once, instead of every time
1298 through the loop. */
1299 assert (p - 1 > pattern);
1300
1301 /* Allocate the space for the jump. */
1302 GET_BUFFER_SPACE (3);
1303
1304 /* We know we are not at the first character of the pattern,
1305 because laststart was nonzero. And we've already
1306 incremented `p', by the way, to be the character after
1307 the `*'. Do we have to do something analogous here
1308 for null bytes, because of RE_DOT_NOT_NULL? */
1309 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
1310 && zero_times_ok
1311 && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
1312 && !(syntax & RE_DOT_NEWLINE))
1313 { /* We have .*\n. */
1314 STORE_JUMP (jump, b, laststart);
1315 keep_string_p = true;
1316 }
1317 else
1318 /* Anything else. */
1319 STORE_JUMP (maybe_pop_jump, b, laststart - 3);
1320
1321 /* We've added more stuff to the buffer. */
1322 b += 3;
1323 }
1324
1325 /* On failure, jump from laststart to b + 3, which will be the
1326 end of the buffer after this jump is inserted. */
1327 GET_BUFFER_SPACE (3);
1328 INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
1329 : on_failure_jump,
1330 laststart, b + 3);
1331 pending_exact = 0;
1332 b += 3;
1333
1334 if (!zero_times_ok)
1335 {
1336 /* At least one repetition is required, so insert a
1337 `dummy_failure_jump' before the initial
1338 `on_failure_jump' instruction of the loop. This
1339 effects a skip over that instruction the first time
1340 we hit that loop. */
1341 GET_BUFFER_SPACE (3);
1342 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
1343 b += 3;
1344 }
1345 }
1346 break;
1347
1348
1349 case '.':
1350 laststart = b;
1351 BUF_PUSH (anychar);
1352 break;
1353
1354
1355 case '[':
1356 {
1357 boolean had_char_class = false;
1358
1359 if (p == pend) return REG_EBRACK;
1360
1361 /* Ensure that we have enough space to push a charset: the
1362 opcode, the length count, and the bitset; 34 bytes in all. */
1363 GET_BUFFER_SPACE (34);
1364
1365 laststart = b;
1366
1367 /* We test `*p == '^' twice, instead of using an if
1368 statement, so we only need one BUF_PUSH. */
1369 BUF_PUSH (*p == '^' ? charset_not : charset);
1370 if (*p == '^')
1371 p++;
1372
1373 /* Remember the first position in the bracket expression. */
1374 p1 = p;
1375
1376 /* Push the number of bytes in the bitmap. */
1377 BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
1378
1379 /* Clear the whole map. */
1380 bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
1381
1382 /* charset_not matches newline according to a syntax bit. */
1383 if ((re_opcode_t) b[-2] == charset_not
1384 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
1385 SET_LIST_BIT ('\n');
1386
1387 /* Read in characters and ranges, setting map bits. */
1388 for (;;)
1389 {
1390 if (p == pend) return REG_EBRACK;
1391
1392 PATFETCH (c);
1393
1394 /* \ might escape characters inside [...] and [^...]. */
1395 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
1396 {
1397 if (p == pend) return REG_EESCAPE;
1398
1399 PATFETCH (c1);
1400 SET_LIST_BIT (c1);
1401 continue;
1402 }
1403
1404 /* Could be the end of the bracket expression. If it's
1405 not (i.e., when the bracket expression is `[]' so
1406 far), the ']' character bit gets set way below. */
1407 if (c == ']' && p != p1 + 1)
1408 break;
1409
1410 /* Look ahead to see if it's a range when the last thing
1411 was a character class. */
1412 if (had_char_class && c == '-' && *p != ']')
1413 return REG_ERANGE;
1414
1415 /* Look ahead to see if it's a range when the last thing
1416 was a character: if this is a hyphen not at the
1417 beginning or the end of a list, then it's the range
1418 operator. */
1419 if (c == '-'
1420 && !(p - 2 >= pattern && p[-2] == '[')
1421 && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
1422 && *p != ']')
1423 {
1424 reg_errcode_t ret
1425 = compile_range (&p, pend, translate, syntax, b);
1426 if (ret != REG_NOERROR) return ret;
1427 }
1428
1429 else if (p[0] == '-' && p[1] != ']')
1430 { /* This handles ranges made up of characters only. */
1431 reg_errcode_t ret;
1432
1433 /* Move past the `-'. */
1434 PATFETCH (c1);
1435
1436 ret = compile_range (&p, pend, translate, syntax, b);
1437 if (ret != REG_NOERROR) return ret;
1438 }
1439
1440 /* See if we're at the beginning of a possible character
1441 class. */
1442
1443 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
1444 { /* Leave room for the null. */
1445 char str[CHAR_CLASS_MAX_LENGTH + 1];
1446
1447 PATFETCH (c);
1448 c1 = 0;
1449
1450 /* If pattern is `[[:'. */
1451 if (p == pend) return REG_EBRACK;
1452
1453 for (;;)
1454 {
1455 PATFETCH (c);
1456 if (c == ':' || c == ']' || p == pend
1457 || c1 == CHAR_CLASS_MAX_LENGTH)
1458 break;
1459 str[c1++] = c;
1460 }
1461 str[c1] = '\0';
1462
1463 /* If isn't a word bracketed by `[:' and:`]':
1464 undo the ending character, the letters, and leave
1465 the leading `:' and `[' (but set bits for them). */
1466 if (c == ':' && *p == ']')
1467 {
1468 int ch;
1469 boolean is_alnum = STREQ (str, "alnum");
1470 boolean is_alpha = STREQ (str, "alpha");
1471 boolean is_blank = STREQ (str, "blank");
1472 boolean is_cntrl = STREQ (str, "cntrl");
1473 boolean is_digit = STREQ (str, "digit");
1474 boolean is_graph = STREQ (str, "graph");
1475 boolean is_lower = STREQ (str, "lower");
1476 boolean is_print = STREQ (str, "print");
1477 boolean is_punct = STREQ (str, "punct");
1478 boolean is_space = STREQ (str, "space");
1479 boolean is_upper = STREQ (str, "upper");
1480 boolean is_xdigit = STREQ (str, "xdigit");
1481
1482 if (!IS_CHAR_CLASS (str)) return REG_ECTYPE;
1483
1484 /* Throw away the ] at the end of the character
1485 class. */
1486 PATFETCH (c);
1487
1488 if (p == pend) return REG_EBRACK;
1489
1490 for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
1491 {
1492 if ( (is_alnum && ISALNUM (ch))
1493 || (is_alpha && ISALPHA (ch))
1494 || (is_blank && ISBLANK (ch))
1495 || (is_cntrl && ISCNTRL (ch))
1496 || (is_digit && ISDIGIT (ch))
1497 || (is_graph && ISGRAPH (ch))
1498 || (is_lower && ISLOWER (ch))
1499 || (is_print && ISPRINT (ch))
1500 || (is_punct && ISPUNCT (ch))
1501 || (is_space && ISSPACE (ch))
1502 || (is_upper && ISUPPER (ch))
1503 || (is_xdigit && ISXDIGIT (ch)))
1504 SET_LIST_BIT (ch);
1505 }
1506 had_char_class = true;
1507 }
1508 else
1509 {
1510 c1++;
1511 while (c1--)
1512 PATUNFETCH;
1513 SET_LIST_BIT ('[');
1514 SET_LIST_BIT (':');
1515 had_char_class = false;
1516 }
1517 }
1518 else
1519 {
1520 had_char_class = false;
1521 SET_LIST_BIT (c);
1522 }
1523 }
1524
1525 /* Discard any (non)matching list bytes that are all 0 at the
1526 end of the map. Decrease the map-length byte too. */
1527 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
1528 b[-1]--;
1529 b += b[-1];
1530 }
1531 break;
1532
1533
1534 case '(':
1535 if (syntax & RE_NO_BK_PARENS)
1536 goto handle_open;
1537 else
1538 goto normal_char;
1539
1540
1541 case ')':
1542 if (syntax & RE_NO_BK_PARENS)
1543 goto handle_close;
1544 else
1545 goto normal_char;
1546
1547
1548 case '\n':
1549 if (syntax & RE_NEWLINE_ALT)
1550 goto handle_alt;
1551 else
1552 goto normal_char;
1553
1554
1555 case '|':
1556 if (syntax & RE_NO_BK_VBAR)
1557 goto handle_alt;
1558 else
1559 goto normal_char;
1560
1561
1562 case '{':
1563 if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
1564 goto handle_interval;
1565 else
1566 goto normal_char;
1567
1568
1569 case '\\':
1570 if (p == pend) return REG_EESCAPE;
1571
1572 /* Do not translate the character after the \, so that we can
1573 distinguish, e.g., \B from \b, even if we normally would
1574 translate, e.g., B to b. */
1575 PATFETCH_RAW (c);
1576
1577 switch (c)
1578 {
1579 case '(':
1580 if (syntax & RE_NO_BK_PARENS)
1581 goto normal_backslash;
1582
1583 handle_open:
1584 bufp->re_nsub++;
1585 regnum++;
1586
1587 if (COMPILE_STACK_FULL)
1588 {
1589 RETALLOC (compile_stack.stack, compile_stack.size << 1,
1590 compile_stack_elt_t);
1591 if (compile_stack.stack == NULL) return REG_ESPACE;
1592
1593 compile_stack.size <<= 1;
1594 }
1595
1596 /* These are the values to restore when we hit end of this
1597 group. They are all relative offsets, so that if the
1598 whole pattern moves because of realloc, they will still
1599 be valid. */
1600 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
1601 COMPILE_STACK_TOP.fixup_alt_jump
1602 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
1603 COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
1604 COMPILE_STACK_TOP.regnum = regnum;
1605
1606 /* We will eventually replace the 0 with the number of
1607 groups inner to this one. But do not push a
1608 start_memory for groups beyond the last one we can
1609 represent in the compiled pattern. */
1610 if (regnum <= MAX_REGNUM)
1611 {
1612 COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
1613 BUF_PUSH_3 (start_memory, regnum, 0);
1614 }
1615
1616 compile_stack.avail++;
1617
1618 fixup_alt_jump = 0;
1619 laststart = 0;
1620 begalt = b;
1621 /* If we've reached MAX_REGNUM groups, then this open
1622 won't actually generate any code, so we'll have to
1623 clear pending_exact explicitly. */
1624 pending_exact = 0;
1625 break;
1626
1627
1628 case ')':
1629 if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
1630
1631 if (COMPILE_STACK_EMPTY)
1632 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
1633 goto normal_backslash;
1634 else
1635 return REG_ERPAREN;
1636
1637 handle_close:
1638 if (fixup_alt_jump)
1639 { /* Push a dummy failure point at the end of the
1640 alternative for a possible future
1641 `pop_failure_jump' to pop. See comments at
1642 `push_dummy_failure' in `re_match_2'. */
1643 BUF_PUSH (push_dummy_failure);
1644
1645 /* We allocated space for this jump when we assigned
1646 to `fixup_alt_jump', in the `handle_alt' case below. */
1647 STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
1648 }
1649
1650 /* See similar code for backslashed left paren above. */
1651 if (COMPILE_STACK_EMPTY)
1652 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
1653 goto normal_char;
1654 else
1655 return REG_ERPAREN;
1656
1657 /* Since we just checked for an empty stack above, this
1658 ``can't happen''. */
1659 assert (compile_stack.avail != 0);
1660 {
1661 /* We don't just want to restore into `regnum', because
1662 later groups should continue to be numbered higher,
1663 as in `(ab)c(de)' -- the second group is #2. */
1664 regnum_t this_group_regnum;
1665
1666 compile_stack.avail--;
1667 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
1668 fixup_alt_jump
1669 = COMPILE_STACK_TOP.fixup_alt_jump
1670 ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
1671 : 0;
1672 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
1673 this_group_regnum = COMPILE_STACK_TOP.regnum;
1674 /* If we've reached MAX_REGNUM groups, then this open
1675 won't actually generate any code, so we'll have to
1676 clear pending_exact explicitly. */
1677 pending_exact = 0;
1678
1679 /* We're at the end of the group, so now we know how many
1680 groups were inside this one. */
1681 if (this_group_regnum <= MAX_REGNUM)
1682 {
1683 unsigned char *inner_group_loc
1684 = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
1685
1686 *inner_group_loc = regnum - this_group_regnum;
1687 BUF_PUSH_3 (stop_memory, this_group_regnum,
1688 regnum - this_group_regnum);
1689 }
1690 }
1691 break;
1692
1693
1694 case '|': /* `\|'. */
1695 if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
1696 goto normal_backslash;
1697 handle_alt:
1698 if (syntax & RE_LIMITED_OPS)
1699 goto normal_char;
1700
1701 /* Insert before the previous alternative a jump which
1702 jumps to this alternative if the former fails. */
1703 GET_BUFFER_SPACE (3);
1704 INSERT_JUMP (on_failure_jump, begalt, b + 6);
1705 pending_exact = 0;
1706 b += 3;
1707
1708 /* The alternative before this one has a jump after it
1709 which gets executed if it gets matched. Adjust that
1710 jump so it will jump to this alternative's analogous
1711 jump (put in below, which in turn will jump to the next
1712 (if any) alternative's such jump, etc.). The last such
1713 jump jumps to the correct final destination. A picture:
1714 _____ _____
1715 | | | |
1716 | v | v
1717 a | b | c
1718
1719 If we are at `b', then fixup_alt_jump right now points to a
1720 three-byte space after `a'. We'll put in the jump, set
1721 fixup_alt_jump to right after `b', and leave behind three
1722 bytes which we'll fill in when we get to after `c'. */
1723
1724 if (fixup_alt_jump)
1725 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
1726
1727 /* Mark and leave space for a jump after this alternative,
1728 to be filled in later either by next alternative or
1729 when know we're at the end of a series of alternatives. */
1730 fixup_alt_jump = b;
1731 GET_BUFFER_SPACE (3);
1732 b += 3;
1733
1734 laststart = 0;
1735 begalt = b;
1736 break;
1737
1738
1739 case '{':
1740 /* If \{ is a literal. */
1741 if (!(syntax & RE_INTERVALS)
1742 /* If we're at `\{' and it's not the open-interval
1743 operator. */
1744 || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
1745 || (p - 2 == pattern && p == pend))
1746 goto normal_backslash;
1747
1748 handle_interval:
1749 {
1750 /* If got here, then the syntax allows intervals. */
1751
1752 /* At least (most) this many matches must be made. */
1753 int lower_bound = -1, upper_bound = -1;
1754
1755 beg_interval = p - 1;
1756
1757 if (p == pend)
1758 {
1759 if (syntax & RE_NO_BK_BRACES)
1760 goto unfetch_interval;
1761 else
1762 return REG_EBRACE;
1763 }
1764
1765 GET_UNSIGNED_NUMBER (lower_bound);
1766
1767 if (c == ',')
1768 {
1769 GET_UNSIGNED_NUMBER (upper_bound);
1770 if (upper_bound < 0) upper_bound = RE_DUP_MAX;
1771 }
1772 else
1773 /* Interval such as `{1}' => match exactly once. */
1774 upper_bound = lower_bound;
1775
1776 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
1777 || lower_bound > upper_bound)
1778 {
1779 if (syntax & RE_NO_BK_BRACES)
1780 goto unfetch_interval;
1781 else
1782 return REG_BADBR;
1783 }
1784
1785 if (!(syntax & RE_NO_BK_BRACES))
1786 {
1787 if (c != '\\') return REG_EBRACE;
1788
1789 PATFETCH (c);
1790 }
1791
1792 if (c != '}')
1793 {
1794 if (syntax & RE_NO_BK_BRACES)
1795 goto unfetch_interval;
1796 else
1797 return REG_BADBR;
1798 }
1799
1800 /* We just parsed a valid interval. */
1801
1802 /* If it's invalid to have no preceding re. */
1803 if (!laststart)
1804 {
1805 if (syntax & RE_CONTEXT_INVALID_OPS)
1806 return REG_BADRPT;
1807 else if (syntax & RE_CONTEXT_INDEP_OPS)
1808 laststart = b;
1809 else
1810 goto unfetch_interval;
1811 }
1812
1813 /* If the upper bound is zero, don't want to succeed at
1814 all; jump from `laststart' to `b + 3', which will be
1815 the end of the buffer after we insert the jump. */
1816 if (upper_bound == 0)
1817 {
1818 GET_BUFFER_SPACE (3);
1819 INSERT_JUMP (jump, laststart, b + 3);
1820 b += 3;
1821 }
1822
1823 /* Otherwise, we have a nontrivial interval. When
1824 we're all done, the pattern will look like:
1825 set_number_at <jump count> <upper bound>
1826 set_number_at <succeed_n count> <lower bound>
1827 succeed_n <after jump addr> <succed_n count>
1828 <body of loop>
1829 jump_n <succeed_n addr> <jump count>
1830 (The upper bound and `jump_n' are omitted if
1831 `upper_bound' is 1, though.) */
1832 else
1833 { /* If the upper bound is > 1, we need to insert
1834 more at the end of the loop. */
1835 unsigned nbytes = 10 + (upper_bound > 1) * 10;
1836
1837 GET_BUFFER_SPACE (nbytes);
1838
1839 /* Initialize lower bound of the `succeed_n', even
1840 though it will be set during matching by its
1841 attendant `set_number_at' (inserted next),
1842 because `re_compile_fastmap' needs to know.
1843 Jump to the `jump_n' we might insert below. */
1844 INSERT_JUMP2 (succeed_n, laststart,
1845 b + 5 + (upper_bound > 1) * 5,
1846 lower_bound);
1847 b += 5;
1848
1849 /* Code to initialize the lower bound. Insert
1850 before the `succeed_n'. The `5' is the last two
1851 bytes of this `set_number_at', plus 3 bytes of
1852 the following `succeed_n'. */
1853 insert_op2 (set_number_at, laststart, 5, lower_bound, b);
1854 b += 5;
1855
1856 if (upper_bound > 1)
1857 { /* More than one repetition is allowed, so
1858 append a backward jump to the `succeed_n'
1859 that starts this interval.
1860
1861 When we've reached this during matching,
1862 we'll have matched the interval once, so
1863 jump back only `upper_bound - 1' times. */
1864 STORE_JUMP2 (jump_n, b, laststart + 5,
1865 upper_bound - 1);
1866 b += 5;
1867
1868 /* The location we want to set is the second
1869 parameter of the `jump_n'; that is `b-2' as
1870 an absolute address. `laststart' will be
1871 the `set_number_at' we're about to insert;
1872 `laststart+3' the number to set, the source
1873 for the relative address. But we are
1874 inserting into the middle of the pattern --
1875 so everything is getting moved up by 5.
1876 Conclusion: (b - 2) - (laststart + 3) + 5,
1877 i.e., b - laststart.
1878
1879 We insert this at the beginning of the loop
1880 so that if we fail during matching, we'll
1881 reinitialize the bounds. */
1882 insert_op2 (set_number_at, laststart, b - laststart,
1883 upper_bound - 1, b);
1884 b += 5;
1885 }
1886 }
1887 pending_exact = 0;
1888 beg_interval = NULL;
1889 }
1890 break;
1891
1892 unfetch_interval:
1893 /* If an invalid interval, match the characters as literals. */
1894 assert (beg_interval);
1895 p = beg_interval;
1896 beg_interval = NULL;
1897
1898 /* normal_char and normal_backslash need `c'. */
1899 PATFETCH (c);
1900
1901 if (!(syntax & RE_NO_BK_BRACES))
1902 {
1903 if (p > pattern && p[-1] == '\\')
1904 goto normal_backslash;
1905 }
1906 goto normal_char;
1907
1908#ifdef emacs
1909 /* There is no way to specify the before_dot and after_dot
1910 operators. rms says this is ok. --karl */
1911 case '=':
1912 BUF_PUSH (at_dot);
1913 break;
1914
1915 case 's':
1916 laststart = b;
1917 PATFETCH (c);
1918 BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
1919 break;
1920
1921 case 'S':
1922 laststart = b;
1923 PATFETCH (c);
1924 BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
1925 break;
1926#endif /* emacs */
1927
1928
1929 case 'w':
1930 laststart = b;
1931 BUF_PUSH (wordchar);
1932 break;
1933
1934
1935 case 'W':
1936 laststart = b;
1937 BUF_PUSH (notwordchar);
1938 break;
1939
1940
1941 case '<':
1942 BUF_PUSH (wordbeg);
1943 break;
1944
1945 case '>':
1946 BUF_PUSH (wordend);
1947 break;
1948
1949 case 'b':
1950 BUF_PUSH (wordbound);
1951 break;
1952
1953 case 'B':
1954 BUF_PUSH (notwordbound);
1955 break;
1956
1957 case '`':
1958 BUF_PUSH (begbuf);
1959 break;
1960
1961 case '\'':
1962 BUF_PUSH (endbuf);
1963 break;
1964
1965 case '1': case '2': case '3': case '4': case '5':
1966 case '6': case '7': case '8': case '9':
1967 if (syntax & RE_NO_BK_REFS)
1968 goto normal_char;
1969
1970 c1 = c - '0';
1971
1972 if (c1 > regnum)
1973 return REG_ESUBREG;
1974
1975 /* Can't back reference to a subexpression if inside of it. */
1976 if (group_in_compile_stack (compile_stack, c1))
1977 goto normal_char;
1978
1979 laststart = b;
1980 BUF_PUSH_2 (duplicate, c1);
1981 break;
1982
1983
1984 case '+':
1985 case '?':
1986 if (syntax & RE_BK_PLUS_QM)
1987 goto handle_plus;
1988 else
1989 goto normal_backslash;
1990
1991 default:
1992 normal_backslash:
1993 /* You might think it would be useful for \ to mean
1994 not to translate; but if we don't translate it
1995 it will never match anything. */
1996 c = TRANSLATE (c);
1997 goto normal_char;
1998 }
1999 break;
2000
2001
2002 default:
2003 /* Expects the character in `c'. */
2004 normal_char:
2005 /* If no exactn currently being built. */
2006 if (!pending_exact
2007
2008 /* If last exactn not at current position. */
2009 || pending_exact + *pending_exact + 1 != b
2010
2011 /* We have only one byte following the exactn for the count. */
2012 || *pending_exact == (1 << BYTEWIDTH) - 1
2013
2014 /* If followed by a repetition operator. */
2015 || *p == '*' || *p == '^'
2016 || ((syntax & RE_BK_PLUS_QM)
2017 ? *p == '\\' && (p[1] == '+' || p[1] == '?')
2018 : (*p == '+' || *p == '?'))
2019 || ((syntax & RE_INTERVALS)
2020 && ((syntax & RE_NO_BK_BRACES)
2021 ? *p == '{'
2022 : (p[0] == '\\' && p[1] == '{'))))
2023 {
2024 /* Start building a new exactn. */
2025
2026 laststart = b;
2027
2028 BUF_PUSH_2 (exactn, 0);
2029 pending_exact = b - 1;
2030 }
2031
2032 BUF_PUSH (c);
2033 (*pending_exact)++;
2034 break;
2035 } /* switch (c) */
2036 } /* while p != pend */
2037
2038
2039 /* Through the pattern now. */
2040
2041 if (fixup_alt_jump)
2042 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2043
2044 if (!COMPILE_STACK_EMPTY)
2045 return REG_EPAREN;
2046
2047 free (compile_stack.stack);
2048
2049 /* We have succeeded; set the length of the buffer. */
2050 bufp->used = b - bufp->buffer;
2051
2052#ifdef DEBUG
2053 if (debug)
2054 {
2055 DEBUG_PRINT1 ("\nCompiled pattern: ");
2056 print_compiled_pattern (bufp);
2057 }
2058#endif /* DEBUG */
2059
2060 return REG_NOERROR;
2061} /* regex_compile */
2062\f
2063/* Subroutines for `regex_compile'. */
2064
2065/* Store OP at LOC followed by two-byte integer parameter ARG. */
2066
2067static void
2068store_op1 (op, loc, arg)
2069 re_opcode_t op;
2070 unsigned char *loc;
2071 int arg;
2072{
2073 *loc = (unsigned char) op;
2074 STORE_NUMBER (loc + 1, arg);
2075}
2076
2077
2078/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */
2079
2080static void
2081store_op2 (op, loc, arg1, arg2)
2082 re_opcode_t op;
2083 unsigned char *loc;
2084 int arg1, arg2;
2085{
2086 *loc = (unsigned char) op;
2087 STORE_NUMBER (loc + 1, arg1);
2088 STORE_NUMBER (loc + 3, arg2);
2089}
2090
2091
2092/* Copy the bytes from LOC to END to open up three bytes of space at LOC
2093 for OP followed by two-byte integer parameter ARG. */
2094
2095static void
2096insert_op1 (op, loc, arg, end)
2097 re_opcode_t op;
2098 unsigned char *loc;
2099 int arg;
2100 unsigned char *end;
2101{
2102 register unsigned char *pfrom = end;
2103 register unsigned char *pto = end + 3;
2104
2105 while (pfrom != loc)
2106 *--pto = *--pfrom;
2107
2108 store_op1 (op, loc, arg);
2109}
2110
2111
2112/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */
2113
2114static void
2115insert_op2 (op, loc, arg1, arg2, end)
2116 re_opcode_t op;
2117 unsigned char *loc;
2118 int arg1, arg2;
2119 unsigned char *end;
2120{
2121 register unsigned char *pfrom = end;
2122 register unsigned char *pto = end + 5;
2123
2124 while (pfrom != loc)
2125 *--pto = *--pfrom;
2126
2127 store_op2 (op, loc, arg1, arg2);
2128}
2129
2130
2131/* P points to just after a ^ in PATTERN. Return true if that ^ comes
2132 after an alternative or a begin-subexpression. We assume there is at
2133 least one character before the ^. */
2134
2135static boolean
2136at_begline_loc_p (pattern, p, syntax)
2137 const char *pattern, *p;
2138 reg_syntax_t syntax;
2139{
2140 const char *prev = p - 2;
2141 boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
2142
2143 return
2144 /* After a subexpression? */
2145 (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
2146 /* After an alternative? */
2147 || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
2148}
2149
2150
2151/* The dual of at_begline_loc_p. This one is for $. We assume there is
2152 at least one character after the $, i.e., `P < PEND'. */
2153
2154static boolean
2155at_endline_loc_p (p, pend, syntax)
2156 const char *p, *pend;
2157 int syntax;
2158{
2159 const char *next = p;
2160 boolean next_backslash = *next == '\\';
2161 const char *next_next = p + 1 < pend ? p + 1 : NULL;
2162
2163 return
2164 /* Before a subexpression? */
2165 (syntax & RE_NO_BK_PARENS ? *next == ')'
2166 : next_backslash && next_next && *next_next == ')')
2167 /* Before an alternative? */
2168 || (syntax & RE_NO_BK_VBAR ? *next == '|'
2169 : next_backslash && next_next && *next_next == '|');
2170}
2171
2172
2173/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
2174 false if it's not. */
2175
2176static boolean
2177group_in_compile_stack (compile_stack, regnum)
2178 compile_stack_type compile_stack;
2179 regnum_t regnum;
2180{
2181 int this_element;
2182
2183 for (this_element = compile_stack.avail - 1;
2184 this_element >= 0;
2185 this_element--)
2186 if (compile_stack.stack[this_element].regnum == regnum)
2187 return true;
2188
2189 return false;
2190}
2191
2192
2193/* Read the ending character of a range (in a bracket expression) from the
2194 uncompiled pattern *P_PTR (which ends at PEND). We assume the
2195 starting character is in `P[-2]'. (`P[-1]' is the character `-'.)
2196 Then we set the translation of all bits between the starting and
2197 ending characters (inclusive) in the compiled pattern B.
2198
2199 Return an error code.
2200
2201 We use these short variable names so we can use the same macros as
2202 `regex_compile' itself. */
2203
2204static reg_errcode_t
2205compile_range (p_ptr, pend, translate, syntax, b)
2206 const char **p_ptr, *pend;
2207 char *translate;
2208 reg_syntax_t syntax;
2209 unsigned char *b;
2210{
2211 unsigned this_char;
2212
2213 const char *p = *p_ptr;
2214 int range_start, range_end;
2215
2216 if (p == pend)
2217 return REG_ERANGE;
2218
2219 /* Even though the pattern is a signed `char *', we need to fetch
2220 with unsigned char *'s; if the high bit of the pattern character
2221 is set, the range endpoints will be negative if we fetch using a
2222 signed char *.
2223
2224 We also want to fetch the endpoints without translating them; the
2225 appropriate translation is done in the bit-setting loop below. */
2226 range_start = ((unsigned char *) p)[-2];
2227 range_end = ((unsigned char *) p)[0];
2228
2229 /* Have to increment the pointer into the pattern string, so the
2230 caller isn't still at the ending character. */
2231 (*p_ptr)++;
2232
2233 /* If the start is after the end, the range is empty. */
2234 if (range_start > range_end)
2235 return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
2236
2237 /* Here we see why `this_char' has to be larger than an `unsigned
2238 char' -- the range is inclusive, so if `range_end' == 0xff
2239 (assuming 8-bit characters), we would otherwise go into an infinite
2240 loop, since all characters <= 0xff. */
2241 for (this_char = range_start; this_char <= range_end; this_char++)
2242 {
2243 SET_LIST_BIT (TRANSLATE (this_char));
2244 }
2245
2246 return REG_NOERROR;
2247}
2248\f
2249/* Failure stack declarations and macros; both re_compile_fastmap and
2250 re_match_2 use a failure stack. These have to be macros because of
2251 REGEX_ALLOCATE. */
2252
2253
2254/* Number of failure points for which to initially allocate space
2255 when matching. If this number is exceeded, we allocate more
2256 space, so it is not a hard limit. */
2257#ifndef INIT_FAILURE_ALLOC
2258#define INIT_FAILURE_ALLOC 5
2259#endif
2260
2261/* Roughly the maximum number of failure points on the stack. Would be
2262 exactly that if always used MAX_FAILURE_SPACE each time we failed.
2263 This is a variable only so users of regex can assign to it; we never
2264 change it ourselves. */
2265int re_max_failures = 2000;
2266
2267typedef const unsigned char *fail_stack_elt_t;
2268
2269typedef struct
2270{
2271 fail_stack_elt_t *stack;
2272 unsigned size;
2273 unsigned avail; /* Offset of next open position. */
2274} fail_stack_type;
2275
2276#define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
2277#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
2278#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
2279#define FAIL_STACK_TOP() (fail_stack.stack[fail_stack.avail])
2280
2281
2282/* Initialize `fail_stack'. Do `return -2' if the alloc fails. */
2283
2284#define INIT_FAIL_STACK() \
2285 do { \
2286 fail_stack.stack = (fail_stack_elt_t *) \
2287 REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
2288 \
2289 if (fail_stack.stack == NULL) \
2290 return -2; \
2291 \
2292 fail_stack.size = INIT_FAILURE_ALLOC; \
2293 fail_stack.avail = 0; \
2294 } while (0)
2295
2296
2297/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
2298
2299 Return 1 if succeeds, and 0 if either ran out of memory
2300 allocating space for it or it was already too large.
2301
2302 REGEX_REALLOCATE requires `destination' be declared. */
2303
2304#define DOUBLE_FAIL_STACK(fail_stack) \
2305 ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \
2306 ? 0 \
2307 : ((fail_stack).stack = (fail_stack_elt_t *) \
2308 REGEX_REALLOCATE ((fail_stack).stack, \
2309 (fail_stack).size * sizeof (fail_stack_elt_t), \
2310 ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \
2311 \
2312 (fail_stack).stack == NULL \
2313 ? 0 \
2314 : ((fail_stack).size <<= 1, \
2315 1)))
2316
2317
2318/* Push PATTERN_OP on FAIL_STACK.
2319
2320 Return 1 if was able to do so and 0 if ran out of memory allocating
2321 space to do so. */
2322#define PUSH_PATTERN_OP(pattern_op, fail_stack) \
2323 ((FAIL_STACK_FULL () \
2324 && !DOUBLE_FAIL_STACK (fail_stack)) \
2325 ? 0 \
2326 : ((fail_stack).stack[(fail_stack).avail++] = pattern_op, \
2327 1))
2328
2329/* This pushes an item onto the failure stack. Must be a four-byte
2330 value. Assumes the variable `fail_stack'. Probably should only
2331 be called from within `PUSH_FAILURE_POINT'. */
2332#define PUSH_FAILURE_ITEM(item) \
2333 fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item
2334
2335/* The complement operation. Assumes `fail_stack' is nonempty. */
2336#define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail]
2337
2338/* Used to omit pushing failure point id's when we're not debugging. */
2339#ifdef DEBUG
2340#define DEBUG_PUSH PUSH_FAILURE_ITEM
2341#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM ()
2342#else
2343#define DEBUG_PUSH(item)
2344#define DEBUG_POP(item_addr)
2345#endif
2346
2347
2348/* Push the information about the state we will need
2349 if we ever fail back to it.
2350
2351 Requires variables fail_stack, regstart, regend, reg_info, and
2352 num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be
2353 declared.
2354
2355 Does `return FAILURE_CODE' if runs out of memory. */
2356
2357#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \
2358 do { \
2359 char *destination; \
2360 /* Must be int, so when we don't save any registers, the arithmetic \
2361 of 0 + -1 isn't done as unsigned. */ \
2362 int this_reg; \
2363 \
2364 DEBUG_STATEMENT (failure_id++); \
2365 DEBUG_STATEMENT (nfailure_points_pushed++); \
2366 DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
2367 DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
2368 DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
2369 \
2370 DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \
2371 DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
2372 \
2373 /* Ensure we have enough space allocated for what we will push. */ \
2374 while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
2375 { \
2376 if (!DOUBLE_FAIL_STACK (fail_stack)) \
2377 return failure_code; \
2378 \
2379 DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
2380 (fail_stack).size); \
2381 DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
2382 } \
2383 \
2384 /* Push the info, starting with the registers. */ \
2385 DEBUG_PRINT1 ("\n"); \
2386 \
2387 for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
2388 this_reg++) \
2389 { \
2390 DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \
2391 DEBUG_STATEMENT (num_regs_pushed++); \
2392 \
2393 DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
2394 PUSH_FAILURE_ITEM (regstart[this_reg]); \
2395 \
2396 DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
2397 PUSH_FAILURE_ITEM (regend[this_reg]); \
2398 \
2399 DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \
2400 DEBUG_PRINT2 (" match_null=%d", \
2401 REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
2402 DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
2403 DEBUG_PRINT2 (" matched_something=%d", \
2404 MATCHED_SOMETHING (reg_info[this_reg])); \
2405 DEBUG_PRINT2 (" ever_matched=%d", \
2406 EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
2407 DEBUG_PRINT1 ("\n"); \
2408 PUSH_FAILURE_ITEM (reg_info[this_reg].word); \
2409 } \
2410 \
2411 DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\
2412 PUSH_FAILURE_ITEM (lowest_active_reg); \
2413 \
2414 DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\
2415 PUSH_FAILURE_ITEM (highest_active_reg); \
2416 \
2417 DEBUG_PRINT2 (" Pushing pattern 0x%x: ", pattern_place); \
2418 DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
2419 PUSH_FAILURE_ITEM (pattern_place); \
2420 \
2421 DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \
2422 DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
2423 size2); \
2424 DEBUG_PRINT1 ("'\n"); \
2425 PUSH_FAILURE_ITEM (string_place); \
2426 \
2427 DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
2428 DEBUG_PUSH (failure_id); \
2429 } while (0)
2430
2431/* This is the number of items that are pushed and popped on the stack
2432 for each register. */
2433#define NUM_REG_ITEMS 3
2434
2435/* Individual items aside from the registers. */
2436#ifdef DEBUG
2437#define NUM_NONREG_ITEMS 5 /* Includes failure point id. */
2438#else
2439#define NUM_NONREG_ITEMS 4
2440#endif
2441
2442/* We push at most this many items on the stack. */
2443#define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
2444
2445/* We actually push this many items. */
2446#define NUM_FAILURE_ITEMS \
2447 ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS \
2448 + NUM_NONREG_ITEMS)
2449
2450/* How many items can still be added to the stack without overflowing it. */
2451#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
2452
2453
2454/* Pops what PUSH_FAIL_STACK pushes.
2455
2456 We restore into the parameters, all of which should be lvalues:
2457 STR -- the saved data position.
2458 PAT -- the saved pattern position.
2459 LOW_REG, HIGH_REG -- the highest and lowest active registers.
2460 REGSTART, REGEND -- arrays of string positions.
2461 REG_INFO -- array of information about each subexpression.
2462
2463 Also assumes the variables `fail_stack' and (if debugging), `bufp',
2464 `pend', `string1', `size1', `string2', and `size2'. */
2465
2466#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
2467{ \
2468 DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \
2469 int this_reg; \
2470 const unsigned char *string_temp; \
2471 \
2472 assert (!FAIL_STACK_EMPTY ()); \
2473 \
2474 /* Remove failure points and point to how many regs pushed. */ \
2475 DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
2476 DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
2477 DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
2478 \
2479 assert (fail_stack.avail >= NUM_NONREG_ITEMS); \
2480 \
2481 DEBUG_POP (&failure_id); \
2482 DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \
2483 \
2484 /* If the saved string location is NULL, it came from an \
2485 on_failure_keep_string_jump opcode, and we want to throw away the \
2486 saved NULL, thus retaining our current position in the string. */ \
2487 string_temp = POP_FAILURE_ITEM (); \
2488 if (string_temp != NULL) \
2489 str = (const char *) string_temp; \
2490 \
2491 DEBUG_PRINT2 (" Popping string 0x%x: `", str); \
2492 DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
2493 DEBUG_PRINT1 ("'\n"); \
2494 \
2495 pat = (unsigned char *) POP_FAILURE_ITEM (); \
2496 DEBUG_PRINT2 (" Popping pattern 0x%x: ", pat); \
2497 DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
2498 \
2499 /* Restore register info. */ \
2500 high_reg = (unsigned) POP_FAILURE_ITEM (); \
2501 DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \
2502 \
2503 low_reg = (unsigned) POP_FAILURE_ITEM (); \
2504 DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \
2505 \
2506 for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
2507 { \
2508 DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \
2509 \
2510 reg_info[this_reg].word = POP_FAILURE_ITEM (); \
2511 DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \
2512 \
2513 regend[this_reg] = (const char *) POP_FAILURE_ITEM (); \
2514 DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
2515 \
2516 regstart[this_reg] = (const char *) POP_FAILURE_ITEM (); \
2517 DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
2518 } \
2519 \
2520 DEBUG_STATEMENT (nfailure_points_popped++); \
2521} /* POP_FAILURE_POINT */
2522\f
2523/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
2524 BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible
2525 characters can start a string that matches the pattern. This fastmap
2526 is used by re_search to skip quickly over impossible starting points.
2527
2528 The caller must supply the address of a (1 << BYTEWIDTH)-byte data
2529 area as BUFP->fastmap.
2530
2531 We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
2532 the pattern buffer.
2533
2534 Returns 0 if we succeed, -2 if an internal error. */
2535
2536int
2537re_compile_fastmap (bufp)
2538 struct re_pattern_buffer *bufp;
2539{
2540 int j, k;
2541 fail_stack_type fail_stack;
2542#ifndef REGEX_MALLOC
2543 char *destination;
2544#endif
2545 /* We don't push any register information onto the failure stack. */
2546 unsigned num_regs = 0;
2547
2548 register char *fastmap = bufp->fastmap;
2549 unsigned char *pattern = bufp->buffer;
2550 unsigned long size = bufp->used;
2551 const unsigned char *p = pattern;
2552 register unsigned char *pend = pattern + size;
2553
2554 /* Assume that each path through the pattern can be null until
2555 proven otherwise. We set this false at the bottom of switch
2556 statement, to which we get only if a particular path doesn't
2557 match the empty string. */
2558 boolean path_can_be_null = true;
2559
2560 /* We aren't doing a `succeed_n' to begin with. */
2561 boolean succeed_n_p = false;
2562
2563 assert (fastmap != NULL && p != NULL);
2564
2565 INIT_FAIL_STACK ();
2566 bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */
2567 bufp->fastmap_accurate = 1; /* It will be when we're done. */
2568 bufp->can_be_null = 0;
2569
2570 while (p != pend || !FAIL_STACK_EMPTY ())
2571 {
2572 if (p == pend)
2573 {
2574 bufp->can_be_null |= path_can_be_null;
2575
2576 /* Reset for next path. */
2577 path_can_be_null = true;
2578
2579 p = fail_stack.stack[--fail_stack.avail];
2580 }
2581
2582 /* We should never be about to go beyond the end of the pattern. */
2583 assert (p < pend);
2584
2585#ifdef SWITCH_ENUM_BUG
2586 switch ((int) ((re_opcode_t) *p++))
2587#else
2588 switch ((re_opcode_t) *p++)
2589#endif
2590 {
2591
2592 /* I guess the idea here is to simply not bother with a fastmap
2593 if a backreference is used, since it's too hard to figure out
2594 the fastmap for the corresponding group. Setting
2595 `can_be_null' stops `re_search_2' from using the fastmap, so
2596 that is all we do. */
2597 case duplicate:
2598 bufp->can_be_null = 1;
2599 return 0;
2600
2601
2602 /* Following are the cases which match a character. These end
2603 with `break'. */
2604
2605 case exactn:
2606 fastmap[p[1]] = 1;
2607 break;
2608
2609
2610 case charset:
2611 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2612 if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
2613 fastmap[j] = 1;
2614 break;
2615
2616
2617 case charset_not:
2618 /* Chars beyond end of map must be allowed. */
2619 for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
2620 fastmap[j] = 1;
2621
2622 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2623 if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
2624 fastmap[j] = 1;
2625 break;
2626
2627
2628 case wordchar:
2629 for (j = 0; j < (1 << BYTEWIDTH); j++)
2630 if (SYNTAX (j) == Sword)
2631 fastmap[j] = 1;
2632 break;
2633
2634
2635 case notwordchar:
2636 for (j = 0; j < (1 << BYTEWIDTH); j++)
2637 if (SYNTAX (j) != Sword)
2638 fastmap[j] = 1;
2639 break;
2640
2641
2642 case anychar:
2643 /* `.' matches anything ... */
2644 for (j = 0; j < (1 << BYTEWIDTH); j++)
2645 fastmap[j] = 1;
2646
2647 /* ... except perhaps newline. */
2648 if (!(bufp->syntax & RE_DOT_NEWLINE))
2649 fastmap['\n'] = 0;
2650
2651 /* Return if we have already set `can_be_null'; if we have,
2652 then the fastmap is irrelevant. Something's wrong here. */
2653 else if (bufp->can_be_null)
2654 return 0;
2655
2656 /* Otherwise, have to check alternative paths. */
2657 break;
2658
2659
2660#ifdef emacs
2661 case syntaxspec:
2662 k = *p++;
2663 for (j = 0; j < (1 << BYTEWIDTH); j++)
2664 if (SYNTAX (j) == (enum syntaxcode) k)
2665 fastmap[j] = 1;
2666 break;
2667
2668
2669 case notsyntaxspec:
2670 k = *p++;
2671 for (j = 0; j < (1 << BYTEWIDTH); j++)
2672 if (SYNTAX (j) != (enum syntaxcode) k)
2673 fastmap[j] = 1;
2674 break;
2675
2676
2677 /* All cases after this match the empty string. These end with
2678 `continue'. */
2679
2680
2681 case before_dot:
2682 case at_dot:
2683 case after_dot:
2684 continue;
2685#endif /* not emacs */
2686
2687
2688 case no_op:
2689 case begline:
2690 case endline:
2691 case begbuf:
2692 case endbuf:
2693 case wordbound:
2694 case notwordbound:
2695 case wordbeg:
2696 case wordend:
2697 case push_dummy_failure:
2698 continue;
2699
2700
2701 case jump_n:
2702 case pop_failure_jump:
2703 case maybe_pop_jump:
2704 case jump:
2705 case jump_past_alt:
2706 case dummy_failure_jump:
2707 EXTRACT_NUMBER_AND_INCR (j, p);
2708 p += j;
2709 if (j > 0)
2710 continue;
2711
2712 /* Jump backward implies we just went through the body of a
2713 loop and matched nothing. Opcode jumped to should be
2714 `on_failure_jump' or `succeed_n'. Just treat it like an
2715 ordinary jump. For a * loop, it has pushed its failure
2716 point already; if so, discard that as redundant. */
2717 if ((re_opcode_t) *p != on_failure_jump
2718 && (re_opcode_t) *p != succeed_n)
2719 continue;
2720
2721 p++;
2722 EXTRACT_NUMBER_AND_INCR (j, p);
2723 p += j;
2724
2725 /* If what's on the stack is where we are now, pop it. */
2726 if (!FAIL_STACK_EMPTY ()
2727 && fail_stack.stack[fail_stack.avail - 1] == p)
2728 fail_stack.avail--;
2729
2730 continue;
2731
2732
2733 case on_failure_jump:
2734 case on_failure_keep_string_jump:
2735 handle_on_failure_jump:
2736 EXTRACT_NUMBER_AND_INCR (j, p);
2737
2738 /* For some patterns, e.g., `(a?)?', `p+j' here points to the
2739 end of the pattern. We don't want to push such a point,
2740 since when we restore it above, entering the switch will
2741 increment `p' past the end of the pattern. We don't need
2742 to push such a point since we obviously won't find any more
2743 fastmap entries beyond `pend'. Such a pattern can match
2744 the null string, though. */
2745 if (p + j < pend)
2746 {
2747 if (!PUSH_PATTERN_OP (p + j, fail_stack))
2748 return -2;
2749 }
2750 else
2751 bufp->can_be_null = 1;
2752
2753 if (succeed_n_p)
2754 {
2755 EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */
2756 succeed_n_p = false;
2757 }
2758
2759 continue;
2760
2761
2762 case succeed_n:
2763 /* Get to the number of times to succeed. */
2764 p += 2;
2765
2766 /* Increment p past the n for when k != 0. */
2767 EXTRACT_NUMBER_AND_INCR (k, p);
2768 if (k == 0)
2769 {
2770 p -= 4;
2771 succeed_n_p = true; /* Spaghetti code alert. */
2772 goto handle_on_failure_jump;
2773 }
2774 continue;
2775
2776
2777 case set_number_at:
2778 p += 4;
2779 continue;
2780
2781
2782 case start_memory:
2783 case stop_memory:
2784 p += 2;
2785 continue;
2786
2787
2788 default:
2789 abort (); /* We have listed all the cases. */
2790 } /* switch *p++ */
2791
2792 /* Getting here means we have found the possible starting
2793 characters for one path of the pattern -- and that the empty
2794 string does not match. We need not follow this path further.
2795 Instead, look at the next alternative (remembered on the
2796 stack), or quit if no more. The test at the top of the loop
2797 does these things. */
2798 path_can_be_null = false;
2799 p = pend;
2800 } /* while p */
2801
2802 /* Set `can_be_null' for the last path (also the first path, if the
2803 pattern is empty). */
2804 bufp->can_be_null |= path_can_be_null;
2805 return 0;
2806} /* re_compile_fastmap */
2807\f
2808/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
2809 ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
2810 this memory for recording register information. STARTS and ENDS
2811 must be allocated using the malloc library routine, and must each
2812 be at least NUM_REGS * sizeof (regoff_t) bytes long.
2813
2814 If NUM_REGS == 0, then subsequent matches should allocate their own
2815 register data.
2816
2817 Unless this function is called, the first search or match using
2818 PATTERN_BUFFER will allocate its own register data, without
2819 freeing the old data. */
2820
2821void
2822re_set_registers (bufp, regs, num_regs, starts, ends)
2823 struct re_pattern_buffer *bufp;
2824 struct re_registers *regs;
2825 unsigned num_regs;
2826 regoff_t *starts, *ends;
2827{
2828 if (num_regs)
2829 {
2830 bufp->regs_allocated = REGS_REALLOCATE;
2831 regs->num_regs = num_regs;
2832 regs->start = starts;
2833 regs->end = ends;
2834 }
2835 else
2836 {
2837 bufp->regs_allocated = REGS_UNALLOCATED;
2838 regs->num_regs = 0;
2839 regs->start = regs->end = (regoff_t) 0;
2840 }
2841}
2842\f
2843/* Searching routines. */
2844
2845/* Like re_search_2, below, but only one string is specified, and
2846 doesn't let you say where to stop matching. */
2847
2848int
2849re_search (bufp, string, size, startpos, range, regs)
2850 struct re_pattern_buffer *bufp;
2851 const char *string;
2852 int size, startpos, range;
2853 struct re_registers *regs;
2854{
2855 return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
2856 regs, size);
2857}
2858
2859
2860/* Using the compiled pattern in BUFP->buffer, first tries to match the
2861 virtual concatenation of STRING1 and STRING2, starting first at index
2862 STARTPOS, then at STARTPOS + 1, and so on.
2863
2864 STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
2865
2866 RANGE is how far to scan while trying to match. RANGE = 0 means try
2867 only at STARTPOS; in general, the last start tried is STARTPOS +
2868 RANGE.
2869
2870 In REGS, return the indices of the virtual concatenation of STRING1
2871 and STRING2 that matched the entire BUFP->buffer and its contained
2872 subexpressions.
2873
2874 Do not consider matching one past the index STOP in the virtual
2875 concatenation of STRING1 and STRING2.
2876
2877 We return either the position in the strings at which the match was
2878 found, -1 if no match, or -2 if error (such as failure
2879 stack overflow). */
2880
2881int
2882re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
2883 struct re_pattern_buffer *bufp;
2884 const char *string1, *string2;
2885 int size1, size2;
2886 int startpos;
2887 int range;
2888 struct re_registers *regs;
2889 int stop;
2890{
2891 int val;
2892 register char *fastmap = bufp->fastmap;
2893 register char *translate = bufp->translate;
2894 int total_size = size1 + size2;
2895 int endpos = startpos + range;
2896
2897 /* Check for out-of-range STARTPOS. */
2898 if (startpos < 0 || startpos > total_size)
2899 return -1;
2900
2901 /* Fix up RANGE if it might eventually take us outside
2902 the virtual concatenation of STRING1 and STRING2. */
2903 if (endpos < -1)
2904 range = -1 - startpos;
2905 else if (endpos > total_size)
2906 range = total_size - startpos;
2907
2908 /* If the search isn't to be a backwards one, don't waste time in a
2909 search for a pattern that must be anchored. */
2910 if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
2911 {
2912 if (startpos > 0)
2913 return -1;
2914 else
2915 range = 1;
2916 }
2917
2918 /* Update the fastmap now if not correct already. */
2919 if (fastmap && !bufp->fastmap_accurate)
2920 if (re_compile_fastmap (bufp) == -2)
2921 return -2;
2922
2923 /* Loop through the string, looking for a place to start matching. */
2924 for (;;)
2925 {
2926 /* If a fastmap is supplied, skip quickly over characters that
2927 cannot be the start of a match. If the pattern can match the
2928 null string, however, we don't need to skip characters; we want
2929 the first null string. */
2930 if (fastmap && startpos < total_size && !bufp->can_be_null)
2931 {
2932 if (range > 0) /* Searching forwards. */
2933 {
2934 register const char *d;
2935 register int lim = 0;
2936 int irange = range;
2937
2938 if (startpos < size1 && startpos + range >= size1)
2939 lim = range - (size1 - startpos);
2940
2941 d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
2942
2943 /* Written out as an if-else to avoid testing `translate'
2944 inside the loop. */
2945 if (translate)
2946 while (range > lim
2947 && !fastmap[(unsigned char)
2948 translate[(unsigned char) *d++]])
2949 range--;
2950 else
2951 while (range > lim && !fastmap[(unsigned char) *d++])
2952 range--;
2953
2954 startpos += irange - range;
2955 }
2956 else /* Searching backwards. */
2957 {
2958 register char c = (size1 == 0 || startpos >= size1
2959 ? string2[startpos - size1]
2960 : string1[startpos]);
2961
2962 if (!fastmap[(unsigned char) TRANSLATE (c)])
2963 goto advance;
2964 }
2965 }
2966
2967 /* If can't match the null string, and that's all we have left, fail. */
2968 if (range >= 0 && startpos == total_size && fastmap
2969 && !bufp->can_be_null)
2970 return -1;
2971
2972 val = re_match_2 (bufp, string1, size1, string2, size2,
2973 startpos, regs, stop);
2974 if (val >= 0)
2975 return startpos;
2976
2977 if (val == -2)
2978 return -2;
2979
2980 advance:
2981 if (!range)
2982 break;
2983 else if (range > 0)
2984 {
2985 range--;
2986 startpos++;
2987 }
2988 else
2989 {
2990 range++;
2991 startpos--;
2992 }
2993 }
2994 return -1;
2995} /* re_search_2 */
2996\f
2997/* Declarations and macros for re_match_2. */
2998
2999static int bcmp_translate ();
3000static boolean alt_match_null_string_p (),
3001 common_op_match_null_string_p (),
3002 group_match_null_string_p ();
3003
3004/* Structure for per-register (a.k.a. per-group) information.
3005 This must not be longer than one word, because we push this value
3006 onto the failure stack. Other register information, such as the
3007 starting and ending positions (which are addresses), and the list of
3008 inner groups (which is a bits list) are maintained in separate
3009 variables.
3010
3011 We are making a (strictly speaking) nonportable assumption here: that
3012 the compiler will pack our bit fields into something that fits into
3013 the type of `word', i.e., is something that fits into one item on the
3014 failure stack. */
3015typedef union
3016{
3017 fail_stack_elt_t word;
3018 struct
3019 {
3020 /* This field is one if this group can match the empty string,
3021 zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */
3022#define MATCH_NULL_UNSET_VALUE 3
3023 unsigned match_null_string_p : 2;
3024 unsigned is_active : 1;
3025 unsigned matched_something : 1;
3026 unsigned ever_matched_something : 1;
3027 } bits;
3028} register_info_type;
3029
3030#define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p)
3031#define IS_ACTIVE(R) ((R).bits.is_active)
3032#define MATCHED_SOMETHING(R) ((R).bits.matched_something)
3033#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)
3034
3035
3036/* Call this when have matched a real character; it sets `matched' flags
3037 for the subexpressions which we are currently inside. Also records
3038 that those subexprs have matched. */
3039#define SET_REGS_MATCHED() \
3040 do \
3041 { \
3042 unsigned r; \
3043 for (r = lowest_active_reg; r <= highest_active_reg; r++) \
3044 { \
3045 MATCHED_SOMETHING (reg_info[r]) \
3046 = EVER_MATCHED_SOMETHING (reg_info[r]) \
3047 = 1; \
3048 } \
3049 } \
3050 while (0)
3051
3052
3053/* This converts PTR, a pointer into one of the search strings `string1'
3054 and `string2' into an offset from the beginning of that string. */
3055#define POINTER_TO_OFFSET(ptr) \
3056 (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1)
3057
3058/* Registers are set to a sentinel when they haven't yet matched. */
3059#define REG_UNSET_VALUE ((char *) -1)
3060#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
3061
3062
3063/* Macros for dealing with the split strings in re_match_2. */
3064
3065#define MATCHING_IN_FIRST_STRING (dend == end_match_1)
3066
3067/* Call before fetching a character with *d. This switches over to
3068 string2 if necessary. */
3069#define PREFETCH() \
3070 while (d == dend) \
3071 { \
3072 /* End of string2 => fail. */ \
3073 if (dend == end_match_2) \
3074 goto fail; \
3075 /* End of string1 => advance to string2. */ \
3076 d = string2; \
3077 dend = end_match_2; \
3078 }
3079
3080
3081/* Test if at very beginning or at very end of the virtual concatenation
3082 of `string1' and `string2'. If only one string, it's `string2'. */
3083#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
3084#define AT_STRINGS_END(d) ((d) == end2)
3085
3086
3087/* Test if D points to a character which is word-constituent. We have
3088 two special cases to check for: if past the end of string1, look at
3089 the first character in string2; and if before the beginning of
3090 string2, look at the last character in string1. */
3091#define WORDCHAR_P(d) \
3092 (SYNTAX ((d) == end1 ? *string2 \
3093 : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
3094 == Sword)
3095
3096/* Test if the character before D and the one at D differ with respect
3097 to being word-constituent. */
3098#define AT_WORD_BOUNDARY(d) \
3099 (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \
3100 || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
3101
3102
3103/* Free everything we malloc. */
3104#ifdef REGEX_MALLOC
3105#define FREE_VAR(var) if (var) free (var); var = NULL
3106#define FREE_VARIABLES() \
3107 do { \
3108 FREE_VAR (fail_stack.stack); \
3109 FREE_VAR (regstart); \
3110 FREE_VAR (regend); \
3111 FREE_VAR (old_regstart); \
3112 FREE_VAR (old_regend); \
3113 FREE_VAR (best_regstart); \
3114 FREE_VAR (best_regend); \
3115 FREE_VAR (reg_info); \
3116 FREE_VAR (reg_dummy); \
3117 FREE_VAR (reg_info_dummy); \
3118 } while (0)
3119#else /* not REGEX_MALLOC */
3120/* Some MIPS systems (at least) want this to free alloca'd storage. */
3121#define FREE_VARIABLES() alloca (0)
3122#endif /* not REGEX_MALLOC */
3123
3124
3125/* These values must meet several constraints. They must not be valid
3126 register values; since we have a limit of 255 registers (because
3127 we use only one byte in the pattern for the register number), we can
3128 use numbers larger than 255. They must differ by 1, because of
3129 NUM_FAILURE_ITEMS above. And the value for the lowest register must
3130 be larger than the value for the highest register, so we do not try
3131 to actually save any registers when none are active. */
3132#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
3133#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
3134\f
3135/* Matching routines. */
3136
3137#ifndef emacs /* Emacs never uses this. */
3138/* re_match is like re_match_2 except it takes only a single string. */
3139
3140int
3141re_match (bufp, string, size, pos, regs)
3142 struct re_pattern_buffer *bufp;
3143 const char *string;
3144 int size, pos;
3145 struct re_registers *regs;
3146 {
3147 return re_match_2 (bufp, NULL, 0, string, size, pos, regs, size);
3148}
3149#endif /* not emacs */
3150
3151
3152/* re_match_2 matches the compiled pattern in BUFP against the
3153 the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
3154 and SIZE2, respectively). We start matching at POS, and stop
3155 matching at STOP.
3156
3157 If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
3158 store offsets for the substring each group matched in REGS. See the
3159 documentation for exactly how many groups we fill.
3160
3161 We return -1 if no match, -2 if an internal error (such as the
3162 failure stack overflowing). Otherwise, we return the length of the
3163 matched substring. */
3164
3165int
3166re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3167 struct re_pattern_buffer *bufp;
3168 const char *string1, *string2;
3169 int size1, size2;
3170 int pos;
3171 struct re_registers *regs;
3172 int stop;
3173{
3174 /* General temporaries. */
3175 int mcnt;
3176 unsigned char *p1;
3177
3178 /* Just past the end of the corresponding string. */
3179 const char *end1, *end2;
3180
3181 /* Pointers into string1 and string2, just past the last characters in
3182 each to consider matching. */
3183 const char *end_match_1, *end_match_2;
3184
3185 /* Where we are in the data, and the end of the current string. */
3186 const char *d, *dend;
3187
3188 /* Where we are in the pattern, and the end of the pattern. */
3189 unsigned char *p = bufp->buffer;
3190 register unsigned char *pend = p + bufp->used;
3191
3192 /* We use this to map every character in the string. */
3193 char *translate = bufp->translate;
3194
3195 /* Failure point stack. Each place that can handle a failure further
3196 down the line pushes a failure point on this stack. It consists of
3197 restart, regend, and reg_info for all registers corresponding to
3198 the subexpressions we're currently inside, plus the number of such
3199 registers, and, finally, two char *'s. The first char * is where
3200 to resume scanning the pattern; the second one is where to resume
3201 scanning the strings. If the latter is zero, the failure point is
3202 a ``dummy''; if a failure happens and the failure point is a dummy,
3203 it gets discarded and the next next one is tried. */
3204 fail_stack_type fail_stack;
3205#ifdef DEBUG
3206 static unsigned failure_id = 0;
3207 unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
3208#endif
3209
3210 /* We fill all the registers internally, independent of what we
3211 return, for use in backreferences. The number here includes
3212 an element for register zero. */
3213 unsigned num_regs = bufp->re_nsub + 1;
3214
3215 /* The currently active registers. */
3216 unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3217 unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3218
3219 /* Information on the contents of registers. These are pointers into
3220 the input strings; they record just what was matched (on this
3221 attempt) by a subexpression part of the pattern, that is, the
3222 regnum-th regstart pointer points to where in the pattern we began
3223 matching and the regnum-th regend points to right after where we
3224 stopped matching the regnum-th subexpression. (The zeroth register
3225 keeps track of what the whole pattern matches.) */
3226 const char **regstart, **regend;
3227
3228 /* If a group that's operated upon by a repetition operator fails to
3229 match anything, then the register for its start will need to be
3230 restored because it will have been set to wherever in the string we
3231 are when we last see its open-group operator. Similarly for a
3232 register's end. */
3233 const char **old_regstart, **old_regend;
3234
3235 /* The is_active field of reg_info helps us keep track of which (possibly
3236 nested) subexpressions we are currently in. The matched_something
3237 field of reg_info[reg_num] helps us tell whether or not we have
3238 matched any of the pattern so far this time through the reg_num-th
3239 subexpression. These two fields get reset each time through any
3240 loop their register is in. */
3241 register_info_type *reg_info;
3242
3243 /* The following record the register info as found in the above
3244 variables when we find a match better than any we've seen before.
3245 This happens as we backtrack through the failure points, which in
3246 turn happens only if we have not yet matched the entire string. */
3247 unsigned best_regs_set = false;
3248 const char **best_regstart, **best_regend;
3249
3250 /* Logically, this is `best_regend[0]'. But we don't want to have to
3251 allocate space for that if we're not allocating space for anything
3252 else (see below). Also, we never need info about register 0 for
3253 any of the other register vectors, and it seems rather a kludge to
3254 treat `best_regend' differently than the rest. So we keep track of
3255 the end of the best match so far in a separate variable. We
3256 initialize this to NULL so that when we backtrack the first time
3257 and need to test it, it's not garbage. */
3258 const char *match_end = NULL;
3259
3260 /* Used when we pop values we don't care about. */
3261 const char **reg_dummy;
3262 register_info_type *reg_info_dummy;
3263
3264#ifdef DEBUG
3265 /* Counts the total number of registers pushed. */
3266 unsigned num_regs_pushed = 0;
3267#endif
3268
3269 DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
3270
3271 INIT_FAIL_STACK ();
3272
3273 /* Do not bother to initialize all the register variables if there are
3274 no groups in the pattern, as it takes a fair amount of time. If
3275 there are groups, we include space for register 0 (the whole
3276 pattern), even though we never use it, since it simplifies the
3277 array indexing. We should fix this. */
3278 if (bufp->re_nsub)
3279 {
3280 regstart = REGEX_TALLOC (num_regs, const char *);
3281 regend = REGEX_TALLOC (num_regs, const char *);
3282 old_regstart = REGEX_TALLOC (num_regs, const char *);
3283 old_regend = REGEX_TALLOC (num_regs, const char *);
3284 best_regstart = REGEX_TALLOC (num_regs, const char *);
3285 best_regend = REGEX_TALLOC (num_regs, const char *);
3286 reg_info = REGEX_TALLOC (num_regs, register_info_type);
3287 reg_dummy = REGEX_TALLOC (num_regs, const char *);
3288 reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
3289
3290 if (!(regstart && regend && old_regstart && old_regend && reg_info
3291 && best_regstart && best_regend && reg_dummy && reg_info_dummy))
3292 {
3293 FREE_VARIABLES ();
3294 return -2;
3295 }
3296 }
3297#ifdef REGEX_MALLOC
3298 else
3299 {
3300 /* We must initialize all our variables to NULL, so that
3301 `FREE_VARIABLES' doesn't try to free them. */
3302 regstart = regend = old_regstart = old_regend = best_regstart
3303 = best_regend = reg_dummy = NULL;
3304 reg_info = reg_info_dummy = (register_info_type *) NULL;
3305 }
3306#endif /* REGEX_MALLOC */
3307
3308 /* The starting position is bogus. */
3309 if (pos < 0 || pos > size1 + size2)
3310 {
3311 FREE_VARIABLES ();
3312 return -1;
3313 }
3314
3315 /* Initialize subexpression text positions to -1 to mark ones that no
3316 start_memory/stop_memory has been seen for. Also initialize the
3317 register information struct. */
3318 for (mcnt = 1; mcnt < num_regs; mcnt++)
3319 {
3320 regstart[mcnt] = regend[mcnt]
3321 = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
3322
3323 REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
3324 IS_ACTIVE (reg_info[mcnt]) = 0;
3325 MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3326 EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3327 }
3328
3329 /* We move `string1' into `string2' if the latter's empty -- but not if
3330 `string1' is null. */
3331 if (size2 == 0 && string1 != NULL)
3332 {
3333 string2 = string1;
3334 size2 = size1;
3335 string1 = 0;
3336 size1 = 0;
3337 }
3338 end1 = string1 + size1;
3339 end2 = string2 + size2;
3340
3341 /* Compute where to stop matching, within the two strings. */
3342 if (stop <= size1)
3343 {
3344 end_match_1 = string1 + stop;
3345 end_match_2 = string2;
3346 }
3347 else
3348 {
3349 end_match_1 = end1;
3350 end_match_2 = string2 + stop - size1;
3351 }
3352
3353 /* `p' scans through the pattern as `d' scans through the data.
3354 `dend' is the end of the input string that `d' points within. `d'
3355 is advanced into the following input string whenever necessary, but
3356 this happens before fetching; therefore, at the beginning of the
3357 loop, `d' can be pointing at the end of a string, but it cannot
3358 equal `string2'. */
3359 if (size1 > 0 && pos <= size1)
3360 {
3361 d = string1 + pos;
3362 dend = end_match_1;
3363 }
3364 else
3365 {
3366 d = string2 + pos - size1;
3367 dend = end_match_2;
3368 }
3369
3370 DEBUG_PRINT1 ("The compiled pattern is: ");
3371 DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
3372 DEBUG_PRINT1 ("The string to match is: `");
3373 DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
3374 DEBUG_PRINT1 ("'\n");
3375
3376 /* This loops over pattern commands. It exits by returning from the
3377 function if the match is complete, or it drops through if the match
3378 fails at this starting point in the input data. */
3379 for (;;)
3380 {
3381 DEBUG_PRINT2 ("\n0x%x: ", p);
3382
3383 if (p == pend)
3384 { /* End of pattern means we might have succeeded. */
3385 DEBUG_PRINT1 ("end of pattern ... ");
3386
3387 /* If we haven't matched the entire string, and we want the
3388 longest match, try backtracking. */
3389 if (d != end_match_2)
3390 {
3391 DEBUG_PRINT1 ("backtracking.\n");
3392
3393 if (!FAIL_STACK_EMPTY ())
3394 { /* More failure points to try. */
3395 boolean same_str_p = (FIRST_STRING_P (match_end)
3396 == MATCHING_IN_FIRST_STRING);
3397
3398 /* If exceeds best match so far, save it. */
3399 if (!best_regs_set
3400 || (same_str_p && d > match_end)
3401 || (!same_str_p && !MATCHING_IN_FIRST_STRING))
3402 {
3403 best_regs_set = true;
3404 match_end = d;
3405
3406 DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
3407
3408 for (mcnt = 1; mcnt < num_regs; mcnt++)
3409 {
3410 best_regstart[mcnt] = regstart[mcnt];
3411 best_regend[mcnt] = regend[mcnt];
3412 }
3413 }
3414 goto fail;
3415 }
3416
3417 /* If no failure points, don't restore garbage. */
3418 else if (best_regs_set)
3419 {
3420 restore_best_regs:
3421 /* Restore best match. It may happen that `dend ==
3422 end_match_1' while the restored d is in string2.
3423 For example, the pattern `x.*y.*z' against the
3424 strings `x-' and `y-z-', if the two strings are
3425 not consecutive in memory. */
3426 DEBUG_PRINT1 ("Restoring best registers.\n");
3427
3428 d = match_end;
3429 dend = ((d >= string1 && d <= end1)
3430 ? end_match_1 : end_match_2);
3431
3432 for (mcnt = 1; mcnt < num_regs; mcnt++)
3433 {
3434 regstart[mcnt] = best_regstart[mcnt];
3435 regend[mcnt] = best_regend[mcnt];
3436 }
3437 }
3438 } /* d != end_match_2 */
3439
3440 DEBUG_PRINT1 ("Accepting match.\n");
3441
3442 /* If caller wants register contents data back, do it. */
3443 if (regs && !bufp->no_sub)
3444 {
3445 /* Have the register data arrays been allocated? */
3446 if (bufp->regs_allocated == REGS_UNALLOCATED)
3447 { /* No. So allocate them with malloc. We need one
3448 extra element beyond `num_regs' for the `-1' marker
3449 GNU code uses. */
3450 regs->num_regs = MAX (RE_NREGS, num_regs + 1);
3451 regs->start = TALLOC (regs->num_regs, regoff_t);
3452 regs->end = TALLOC (regs->num_regs, regoff_t);
3453 if (regs->start == NULL || regs->end == NULL)
3454 return -2;
3455 bufp->regs_allocated = REGS_REALLOCATE;
3456 }
3457 else if (bufp->regs_allocated == REGS_REALLOCATE)
3458 { /* Yes. If we need more elements than were already
3459 allocated, reallocate them. If we need fewer, just
3460 leave it alone. */
3461 if (regs->num_regs < num_regs + 1)
3462 {
3463 regs->num_regs = num_regs + 1;
3464 RETALLOC (regs->start, regs->num_regs, regoff_t);
3465 RETALLOC (regs->end, regs->num_regs, regoff_t);
3466 if (regs->start == NULL || regs->end == NULL)
3467 return -2;
3468 }
3469 }
3470 else
3471 assert (bufp->regs_allocated == REGS_FIXED);
3472
3473 /* Convert the pointer data in `regstart' and `regend' to
3474 indices. Register zero has to be set differently,
3475 since we haven't kept track of any info for it. */
3476 if (regs->num_regs > 0)
3477 {
3478 regs->start[0] = pos;
3479 regs->end[0] = (MATCHING_IN_FIRST_STRING ? d - string1
3480 : d - string2 + size1);
3481 }
3482
3483 /* Go through the first `min (num_regs, regs->num_regs)'
3484 registers, since that is all we initialized. */
3485 for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
3486 {
3487 if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
3488 regs->start[mcnt] = regs->end[mcnt] = -1;
3489 else
3490 {
3491 regs->start[mcnt] = POINTER_TO_OFFSET (regstart[mcnt]);
3492 regs->end[mcnt] = POINTER_TO_OFFSET (regend[mcnt]);
3493 }
3494 }
3495
3496 /* If the regs structure we return has more elements than
3497 were in the pattern, set the extra elements to -1. If
3498 we (re)allocated the registers, this is the case,
3499 because we always allocate enough to have at least one
3500 -1 at the end. */
3501 for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
3502 regs->start[mcnt] = regs->end[mcnt] = -1;
3503 } /* regs && !bufp->no_sub */
3504
3505 FREE_VARIABLES ();
3506 DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
3507 nfailure_points_pushed, nfailure_points_popped,
3508 nfailure_points_pushed - nfailure_points_popped);
3509 DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
3510
3511 mcnt = d - pos - (MATCHING_IN_FIRST_STRING
3512 ? string1
3513 : string2 - size1);
3514
3515 DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
3516
3517 return mcnt;
3518 }
3519
3520 /* Otherwise match next pattern command. */
3521#ifdef SWITCH_ENUM_BUG
3522 switch ((int) ((re_opcode_t) *p++))
3523#else
3524 switch ((re_opcode_t) *p++)
3525#endif
3526 {
3527 /* Ignore these. Used to ignore the n of succeed_n's which
3528 currently have n == 0. */
3529 case no_op:
3530 DEBUG_PRINT1 ("EXECUTING no_op.\n");
3531 break;
3532
3533
3534 /* Match the next n pattern characters exactly. The following
3535 byte in the pattern defines n, and the n bytes after that
3536 are the characters to match. */
3537 case exactn:
3538 mcnt = *p++;
3539 DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
3540
3541 /* This is written out as an if-else so we don't waste time
3542 testing `translate' inside the loop. */
3543 if (translate)
3544 {
3545 do
3546 {
3547 PREFETCH ();
3548 if (translate[(unsigned char) *d++] != (char) *p++)
3549 goto fail;
3550 }
3551 while (--mcnt);
3552 }
3553 else
3554 {
3555 do
3556 {
3557 PREFETCH ();
3558 if (*d++ != (char) *p++) goto fail;
3559 }
3560 while (--mcnt);
3561 }
3562 SET_REGS_MATCHED ();
3563 break;
3564
3565
3566 /* Match any character except possibly a newline or a null. */
3567 case anychar:
3568 DEBUG_PRINT1 ("EXECUTING anychar.\n");
3569
3570 PREFETCH ();
3571
3572 if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
3573 || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
3574 goto fail;
3575
3576 SET_REGS_MATCHED ();
3577 DEBUG_PRINT2 (" Matched `%d'.\n", *d);
3578 d++;
3579 break;
3580
3581
3582 case charset:
3583 case charset_not:
3584 {
3585 register unsigned char c;
3586 boolean not = (re_opcode_t) *(p - 1) == charset_not;
3587
3588 DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
3589
3590 PREFETCH ();
3591 c = TRANSLATE (*d); /* The character to match. */
3592
3593 /* Cast to `unsigned' instead of `unsigned char' in case the
3594 bit list is a full 32 bytes long. */
3595 if (c < (unsigned) (*p * BYTEWIDTH)
3596 && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
3597 not = !not;
3598
3599 p += 1 + *p;
3600
3601 if (!not) goto fail;
3602
3603 SET_REGS_MATCHED ();
3604 d++;
3605 break;
3606 }
3607
3608
3609 /* The beginning of a group is represented by start_memory.
3610 The arguments are the register number in the next byte, and the
3611 number of groups inner to this one in the next. The text
3612 matched within the group is recorded (in the internal
3613 registers data structure) under the register number. */
3614 case start_memory:
3615 DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
3616
3617 /* Find out if this group can match the empty string. */
3618 p1 = p; /* To send to group_match_null_string_p. */
3619
3620 if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
3621 REG_MATCH_NULL_STRING_P (reg_info[*p])
3622 = group_match_null_string_p (&p1, pend, reg_info);
3623
3624 /* Save the position in the string where we were the last time
3625 we were at this open-group operator in case the group is
3626 operated upon by a repetition operator, e.g., with `(a*)*b'
3627 against `ab'; then we want to ignore where we are now in
3628 the string in case this attempt to match fails. */
3629 old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
3630 ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
3631 : regstart[*p];
3632 DEBUG_PRINT2 (" old_regstart: %d\n",
3633 POINTER_TO_OFFSET (old_regstart[*p]));
3634
3635 regstart[*p] = d;
3636 DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
3637
3638 IS_ACTIVE (reg_info[*p]) = 1;
3639 MATCHED_SOMETHING (reg_info[*p]) = 0;
3640
3641 /* This is the new highest active register. */
3642 highest_active_reg = *p;
3643
3644 /* If nothing was active before, this is the new lowest active
3645 register. */
3646 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
3647 lowest_active_reg = *p;
3648
3649 /* Move past the register number and inner group count. */
3650 p += 2;
3651 break;
3652
3653
3654 /* The stop_memory opcode represents the end of a group. Its
3655 arguments are the same as start_memory's: the register
3656 number, and the number of inner groups. */
3657 case stop_memory:
3658 DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
3659
3660 /* We need to save the string position the last time we were at
3661 this close-group operator in case the group is operated
3662 upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
3663 against `aba'; then we want to ignore where we are now in
3664 the string in case this attempt to match fails. */
3665 old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
3666 ? REG_UNSET (regend[*p]) ? d : regend[*p]
3667 : regend[*p];
3668 DEBUG_PRINT2 (" old_regend: %d\n",
3669 POINTER_TO_OFFSET (old_regend[*p]));
3670
3671 regend[*p] = d;
3672 DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
3673
3674 /* This register isn't active anymore. */
3675 IS_ACTIVE (reg_info[*p]) = 0;
3676
3677 /* If this was the only register active, nothing is active
3678 anymore. */
3679 if (lowest_active_reg == highest_active_reg)
3680 {
3681 lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3682 highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3683 }
3684 else
3685 { /* We must scan for the new highest active register, since
3686 it isn't necessarily one less than now: consider
3687 (a(b)c(d(e)f)g). When group 3 ends, after the f), the
3688 new highest active register is 1. */
3689 unsigned char r = *p - 1;
3690 while (r > 0 && !IS_ACTIVE (reg_info[r]))
3691 r--;
3692
3693 /* If we end up at register zero, that means that we saved
3694 the registers as the result of an `on_failure_jump', not
3695 a `start_memory', and we jumped to past the innermost
3696 `stop_memory'. For example, in ((.)*) we save
3697 registers 1 and 2 as a result of the *, but when we pop
3698 back to the second ), we are at the stop_memory 1.
3699 Thus, nothing is active. */
3700 if (r == 0)
3701 {
3702 lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3703 highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3704 }
3705 else
3706 highest_active_reg = r;
3707 }
3708
3709 /* If just failed to match something this time around with a
3710 group that's operated on by a repetition operator, try to
3711 force exit from the ``loop'', and restore the register
3712 information for this group that we had before trying this
3713 last match. */
3714 if ((!MATCHED_SOMETHING (reg_info[*p])
3715 || (re_opcode_t) p[-3] == start_memory)
3716 && (p + 2) < pend)
3717 {
3718 boolean is_a_jump_n = false;
3719
3720 p1 = p + 2;
3721 mcnt = 0;
3722 switch ((re_opcode_t) *p1++)
3723 {
3724 case jump_n:
3725 is_a_jump_n = true;
3726 case pop_failure_jump:
3727 case maybe_pop_jump:
3728 case jump:
3729 case dummy_failure_jump:
3730 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
3731 if (is_a_jump_n)
3732 p1 += 2;
3733 break;
3734
3735 default:
3736 /* do nothing */ ;
3737 }
3738 p1 += mcnt;
3739
3740 /* If the next operation is a jump backwards in the pattern
3741 to an on_failure_jump right before the start_memory
3742 corresponding to this stop_memory, exit from the loop
3743 by forcing a failure after pushing on the stack the
3744 on_failure_jump's jump in the pattern, and d. */
3745 if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
3746 && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
3747 {
3748 /* If this group ever matched anything, then restore
3749 what its registers were before trying this last
3750 failed match, e.g., with `(a*)*b' against `ab' for
3751 regstart[1], and, e.g., with `((a*)*(b*)*)*'
3752 against `aba' for regend[3].
3753
3754 Also restore the registers for inner groups for,
3755 e.g., `((a*)(b*))*' against `aba' (register 3 would
3756 otherwise get trashed). */
3757
3758 if (EVER_MATCHED_SOMETHING (reg_info[*p]))
3759 {
3760 unsigned r;
3761
3762 EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
3763
3764 /* Restore this and inner groups' (if any) registers. */
3765 for (r = *p; r < *p + *(p + 1); r++)
3766 {
3767 regstart[r] = old_regstart[r];
3768
3769 /* xx why this test? */
3770 if ((int) old_regend[r] >= (int) regstart[r])
3771 regend[r] = old_regend[r];
3772 }
3773 }
3774 p1++;
3775 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
3776 PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
3777
3778 goto fail;
3779 }
3780 }
3781
3782 /* Move past the register number and the inner group count. */
3783 p += 2;
3784 break;
3785
3786
3787 /* \<digit> has been turned into a `duplicate' command which is
3788 followed by the numeric value of <digit> as the register number. */
3789 case duplicate:
3790 {
3791 register const char *d2, *dend2;
3792 int regno = *p++; /* Get which register to match against. */
3793 DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
3794
3795 /* Can't back reference a group which we've never matched. */
3796 if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
3797 goto fail;
3798
3799 /* Where in input to try to start matching. */
3800 d2 = regstart[regno];
3801
3802 /* Where to stop matching; if both the place to start and
3803 the place to stop matching are in the same string, then
3804 set to the place to stop, otherwise, for now have to use
3805 the end of the first string. */
3806
3807 dend2 = ((FIRST_STRING_P (regstart[regno])
3808 == FIRST_STRING_P (regend[regno]))
3809 ? regend[regno] : end_match_1);
3810 for (;;)
3811 {
3812 /* If necessary, advance to next segment in register
3813 contents. */
3814 while (d2 == dend2)
3815 {
3816 if (dend2 == end_match_2) break;
3817 if (dend2 == regend[regno]) break;
3818
3819 /* End of string1 => advance to string2. */
3820 d2 = string2;
3821 dend2 = regend[regno];
3822 }
3823 /* At end of register contents => success */
3824 if (d2 == dend2) break;
3825
3826 /* If necessary, advance to next segment in data. */
3827 PREFETCH ();
3828
3829 /* How many characters left in this segment to match. */
3830 mcnt = dend - d;
3831
3832 /* Want how many consecutive characters we can match in
3833 one shot, so, if necessary, adjust the count. */
3834 if (mcnt > dend2 - d2)
3835 mcnt = dend2 - d2;
3836
3837 /* Compare that many; failure if mismatch, else move
3838 past them. */
3839 if (translate
3840 ? bcmp_translate (d, d2, mcnt, translate)
3841 : bcmp (d, d2, mcnt))
3842 goto fail;
3843 d += mcnt, d2 += mcnt;
3844 }
3845 }
3846 break;
3847
3848
3849 /* begline matches the empty string at the beginning of the string
3850 (unless `not_bol' is set in `bufp'), and, if
3851 `newline_anchor' is set, after newlines. */
3852 case begline:
3853 DEBUG_PRINT1 ("EXECUTING begline.\n");
3854
3855 if (AT_STRINGS_BEG (d))
3856 {
3857 if (!bufp->not_bol) break;
3858 }
3859 else if (d[-1] == '\n' && bufp->newline_anchor)
3860 {
3861 break;
3862 }
3863 /* In all other cases, we fail. */
3864 goto fail;
3865
3866
3867 /* endline is the dual of begline. */
3868 case endline:
3869 DEBUG_PRINT1 ("EXECUTING endline.\n");
3870
3871 if (AT_STRINGS_END (d))
3872 {
3873 if (!bufp->not_eol) break;
3874 }
3875
3876 /* We have to ``prefetch'' the next character. */
3877 else if ((d == end1 ? *string2 : *d) == '\n'
3878 && bufp->newline_anchor)
3879 {
3880 break;
3881 }
3882 goto fail;
3883
3884
3885 /* Match at the very beginning of the data. */
3886 case begbuf:
3887 DEBUG_PRINT1 ("EXECUTING begbuf.\n");
3888 if (AT_STRINGS_BEG (d))
3889 break;
3890 goto fail;
3891
3892
3893 /* Match at the very end of the data. */
3894 case endbuf:
3895 DEBUG_PRINT1 ("EXECUTING endbuf.\n");
3896 if (AT_STRINGS_END (d))
3897 break;
3898 goto fail;
3899
3900
3901 /* on_failure_keep_string_jump is used to optimize `.*\n'. It
3902 pushes NULL as the value for the string on the stack. Then
3903 `pop_failure_point' will keep the current value for the
3904 string, instead of restoring it. To see why, consider
3905 matching `foo\nbar' against `.*\n'. The .* matches the foo;
3906 then the . fails against the \n. But the next thing we want
3907 to do is match the \n against the \n; if we restored the
3908 string value, we would be back at the foo.
3909
3910 Because this is used only in specific cases, we don't need to
3911 check all the things that `on_failure_jump' does, to make
3912 sure the right things get saved on the stack. Hence we don't
3913 share its code. The only reason to push anything on the
3914 stack at all is that otherwise we would have to change
3915 `anychar's code to do something besides goto fail in this
3916 case; that seems worse than this. */
3917 case on_failure_keep_string_jump:
3918 DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
3919
3920 EXTRACT_NUMBER_AND_INCR (mcnt, p);
3921 DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
3922
3923 PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
3924 break;
3925
3926
3927 /* Uses of on_failure_jump:
3928
3929 Each alternative starts with an on_failure_jump that points
3930 to the beginning of the next alternative. Each alternative
3931 except the last ends with a jump that in effect jumps past
3932 the rest of the alternatives. (They really jump to the
3933 ending jump of the following alternative, because tensioning
3934 these jumps is a hassle.)
3935
3936 Repeats start with an on_failure_jump that points past both
3937 the repetition text and either the following jump or
3938 pop_failure_jump back to this on_failure_jump. */
3939 case on_failure_jump:
3940 on_failure:
3941 DEBUG_PRINT1 ("EXECUTING on_failure_jump");
3942
3943 EXTRACT_NUMBER_AND_INCR (mcnt, p);
3944 DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
3945
3946 /* If this on_failure_jump comes right before a group (i.e.,
3947 the original * applied to a group), save the information
3948 for that group and all inner ones, so that if we fail back
3949 to this point, the group's information will be correct.
3950 For example, in \(a*\)*\1, we need the preceding group,
3951 and in \(\(a*\)b*\)\2, we need the inner group. */
3952
3953 /* We can't use `p' to check ahead because we push
3954 a failure point to `p + mcnt' after we do this. */
3955 p1 = p;
3956
3957 /* We need to skip no_op's before we look for the
3958 start_memory in case this on_failure_jump is happening as
3959 the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
3960 against aba. */
3961 while (p1 < pend && (re_opcode_t) *p1 == no_op)
3962 p1++;
3963
3964 if (p1 < pend && (re_opcode_t) *p1 == start_memory)
3965 {
3966 /* We have a new highest active register now. This will
3967 get reset at the start_memory we are about to get to,
3968 but we will have saved all the registers relevant to
3969 this repetition op, as described above. */
3970 highest_active_reg = *(p1 + 1) + *(p1 + 2);
3971 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
3972 lowest_active_reg = *(p1 + 1);
3973 }
3974
3975 DEBUG_PRINT1 (":\n");
3976 PUSH_FAILURE_POINT (p + mcnt, d, -2);
3977 break;
3978
3979
3980 /* A smart repeat ends with `maybe_pop_jump'.
3981 We change it to either `pop_failure_jump' or `jump'. */
3982 case maybe_pop_jump:
3983 EXTRACT_NUMBER_AND_INCR (mcnt, p);
3984 DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
3985 {
3986 register unsigned char *p2 = p;
3987
3988 /* Compare the beginning of the repeat with what in the
3989 pattern follows its end. If we can establish that there
3990 is nothing that they would both match, i.e., that we
3991 would have to backtrack because of (as in, e.g., `a*a')
3992 then we can change to pop_failure_jump, because we'll
3993 never have to backtrack.
3994
3995 This is not true in the case of alternatives: in
3996 `(a|ab)*' we do need to backtrack to the `ab' alternative
3997 (e.g., if the string was `ab'). But instead of trying to
3998 detect that here, the alternative has put on a dummy
3999 failure point which is what we will end up popping. */
4000
4001 /* Skip over open/close-group commands. */
4002 while (p2 + 2 < pend
4003 && ((re_opcode_t) *p2 == stop_memory
4004 || (re_opcode_t) *p2 == start_memory))
4005 p2 += 3; /* Skip over args, too. */
4006
4007 /* If we're at the end of the pattern, we can change. */
4008 if (p2 == pend)
4009 {
4010 /* Consider what happens when matching ":\(.*\)"
4011 against ":/". I don't really understand this code
4012 yet. */
4013 p[-3] = (unsigned char) pop_failure_jump;
4014 DEBUG_PRINT1
4015 (" End of pattern: change to `pop_failure_jump'.\n");
4016 }
4017
4018 else if ((re_opcode_t) *p2 == exactn
4019 || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
4020 {
4021 register unsigned char c
4022 = *p2 == (unsigned char) endline ? '\n' : p2[2];
4023 p1 = p + mcnt;
4024
4025 /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
4026 to the `maybe_finalize_jump' of this case. Examine what
4027 follows. */
4028 if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
4029 {
4030 p[-3] = (unsigned char) pop_failure_jump;
4031 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
4032 c, p1[5]);
4033 }
4034
4035 else if ((re_opcode_t) p1[3] == charset
4036 || (re_opcode_t) p1[3] == charset_not)
4037 {
4038 int not = (re_opcode_t) p1[3] == charset_not;
4039
4040 if (c < (unsigned char) (p1[4] * BYTEWIDTH)
4041 && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4042 not = !not;
4043
4044 /* `not' is equal to 1 if c would match, which means
4045 that we can't change to pop_failure_jump. */
4046 if (!not)
4047 {
4048 p[-3] = (unsigned char) pop_failure_jump;
4049 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
4050 }
4051 }
4052 }
4053 }
4054 p -= 2; /* Point at relative address again. */
4055 if ((re_opcode_t) p[-1] != pop_failure_jump)
4056 {
4057 p[-1] = (unsigned char) jump;
4058 DEBUG_PRINT1 (" Match => jump.\n");
4059 goto unconditional_jump;
4060 }
4061 /* Note fall through. */
4062
4063
4064 /* The end of a simple repeat has a pop_failure_jump back to
4065 its matching on_failure_jump, where the latter will push a
4066 failure point. The pop_failure_jump takes off failure
4067 points put on by this pop_failure_jump's matching
4068 on_failure_jump; we got through the pattern to here from the
4069 matching on_failure_jump, so didn't fail. */
4070 case pop_failure_jump:
4071 {
4072 /* We need to pass separate storage for the lowest and
4073 highest registers, even though we don't care about the
4074 actual values. Otherwise, we will restore only one
4075 register from the stack, since lowest will == highest in
4076 `pop_failure_point'. */
4077 unsigned dummy_low_reg, dummy_high_reg;
4078 unsigned char *pdummy;
4079 const char *sdummy;
4080
4081 DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
4082 POP_FAILURE_POINT (sdummy, pdummy,
4083 dummy_low_reg, dummy_high_reg,
4084 reg_dummy, reg_dummy, reg_info_dummy);
4085 }
4086 /* Note fall through. */
4087
4088
4089 /* Unconditionally jump (without popping any failure points). */
4090 case jump:
4091 unconditional_jump:
4092 EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */
4093 DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
4094 p += mcnt; /* Do the jump. */
4095 DEBUG_PRINT2 ("(to 0x%x).\n", p);
4096 break;
4097
4098
4099 /* We need this opcode so we can detect where alternatives end
4100 in `group_match_null_string_p' et al. */
4101 case jump_past_alt:
4102 DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
4103 goto unconditional_jump;
4104
4105
4106 /* Normally, the on_failure_jump pushes a failure point, which
4107 then gets popped at pop_failure_jump. We will end up at
4108 pop_failure_jump, also, and with a pattern of, say, `a+', we
4109 are skipping over the on_failure_jump, so we have to push
4110 something meaningless for pop_failure_jump to pop. */
4111 case dummy_failure_jump:
4112 DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
4113 /* It doesn't matter what we push for the string here. What
4114 the code at `fail' tests is the value for the pattern. */
4115 PUSH_FAILURE_POINT (0, 0, -2);
4116 goto unconditional_jump;
4117
4118
4119 /* At the end of an alternative, we need to push a dummy failure
4120 point in case we are followed by a `pop_failure_jump', because
4121 we don't want the failure point for the alternative to be
4122 popped. For example, matching `(a|ab)*' against `aab'
4123 requires that we match the `ab' alternative. */
4124 case push_dummy_failure:
4125 DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
4126 /* See comments just above at `dummy_failure_jump' about the
4127 two zeroes. */
4128 PUSH_FAILURE_POINT (0, 0, -2);
4129 break;
4130
4131 /* Have to succeed matching what follows at least n times.
4132 After that, handle like `on_failure_jump'. */
4133 case succeed_n:
4134 EXTRACT_NUMBER (mcnt, p + 2);
4135 DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
4136
4137 assert (mcnt >= 0);
4138 /* Originally, this is how many times we HAVE to succeed. */
4139 if (mcnt > 0)
4140 {
4141 mcnt--;
4142 p += 2;
4143 STORE_NUMBER_AND_INCR (p, mcnt);
4144 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p, mcnt);
4145 }
4146 else if (mcnt == 0)
4147 {
4148 DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2);
4149 p[2] = (unsigned char) no_op;
4150 p[3] = (unsigned char) no_op;
4151 goto on_failure;
4152 }
4153 break;
4154
4155 case jump_n:
4156 EXTRACT_NUMBER (mcnt, p + 2);
4157 DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
4158
4159 /* Originally, this is how many times we CAN jump. */
4160 if (mcnt)
4161 {
4162 mcnt--;
4163 STORE_NUMBER (p + 2, mcnt);
4164 goto unconditional_jump;
4165 }
4166 /* If don't have to jump any more, skip over the rest of command. */
4167 else
4168 p += 4;
4169 break;
4170
4171 case set_number_at:
4172 {
4173 DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
4174
4175 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4176 p1 = p + mcnt;
4177 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4178 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt);
4179 STORE_NUMBER (p1, mcnt);
4180 break;
4181 }
4182
4183 case wordbound:
4184 DEBUG_PRINT1 ("EXECUTING wordbound.\n");
4185 if (AT_WORD_BOUNDARY (d))
4186 break;
4187 goto fail;
4188
4189 case notwordbound:
4190 DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
4191 if (AT_WORD_BOUNDARY (d))
4192 goto fail;
4193 break;
4194
4195 case wordbeg:
4196 DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
4197 if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
4198 break;
4199 goto fail;
4200
4201 case wordend:
4202 DEBUG_PRINT1 ("EXECUTING wordend.\n");
4203 if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
4204 && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
4205 break;
4206 goto fail;
4207
4208#ifdef emacs
4209#ifdef emacs19
4210 case before_dot:
4211 DEBUG_PRINT1 ("EXECUTING before_dot.\n");
4212 if (PTR_CHAR_POS ((unsigned char *) d) >= point)
4213 goto fail;
4214 break;
4215
4216 case at_dot:
4217 DEBUG_PRINT1 ("EXECUTING at_dot.\n");
4218 if (PTR_CHAR_POS ((unsigned char *) d) != point)
4219 goto fail;
4220 break;
4221
4222 case after_dot:
4223 DEBUG_PRINT1 ("EXECUTING after_dot.\n");
4224 if (PTR_CHAR_POS ((unsigned char *) d) <= point)
4225 goto fail;
4226 break;
4227#else /* not emacs19 */
4228 case at_dot:
4229 DEBUG_PRINT1 ("EXECUTING at_dot.\n");
4230 if (PTR_CHAR_POS ((unsigned char *) d) + 1 != point)
4231 goto fail;
4232 break;
4233#endif /* not emacs19 */
4234
4235 case syntaxspec:
4236 DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
4237 mcnt = *p++;
4238 goto matchsyntax;
4239
4240 case wordchar:
4241 DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
4242 mcnt = (int) Sword;
4243 matchsyntax:
4244 PREFETCH ();
4245 if (SYNTAX (*d++) != (enum syntaxcode) mcnt)
4246 goto fail;
4247 SET_REGS_MATCHED ();
4248 break;
4249
4250 case notsyntaxspec:
4251 DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
4252 mcnt = *p++;
4253 goto matchnotsyntax;
4254
4255 case notwordchar:
4256 DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
4257 mcnt = (int) Sword;
4258 matchnotsyntax:
4259 PREFETCH ();
4260 if (SYNTAX (*d++) == (enum syntaxcode) mcnt)
4261 goto fail;
4262 SET_REGS_MATCHED ();
4263 break;
4264
4265#else /* not emacs */
4266 case wordchar:
4267 DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
4268 PREFETCH ();
4269 if (!WORDCHAR_P (d))
4270 goto fail;
4271 SET_REGS_MATCHED ();
4272 d++;
4273 break;
4274
4275 case notwordchar:
4276 DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
4277 PREFETCH ();
4278 if (WORDCHAR_P (d))
4279 goto fail;
4280 SET_REGS_MATCHED ();
4281 d++;
4282 break;
4283#endif /* not emacs */
4284
4285 default:
4286 abort ();
4287 }
4288 continue; /* Successfully executed one pattern command; keep going. */
4289
4290
4291 /* We goto here if a matching operation fails. */
4292 fail:
4293 if (!FAIL_STACK_EMPTY ())
4294 { /* A restart point is known. Restore to that state. */
4295 DEBUG_PRINT1 ("\nFAIL:\n");
4296 POP_FAILURE_POINT (d, p,
4297 lowest_active_reg, highest_active_reg,
4298 regstart, regend, reg_info);
4299
4300 /* If this failure point is a dummy, try the next one. */
4301 if (!p)
4302 goto fail;
4303
4304 /* If we failed to the end of the pattern, don't examine *p. */
4305 assert (p <= pend);
4306 if (p < pend)
4307 {
4308 boolean is_a_jump_n = false;
4309
4310 /* If failed to a backwards jump that's part of a repetition
4311 loop, need to pop this failure point and use the next one. */
4312 switch ((re_opcode_t) *p)
4313 {
4314 case jump_n:
4315 is_a_jump_n = true;
4316 case maybe_pop_jump:
4317 case pop_failure_jump:
4318 case jump:
4319 p1 = p + 1;
4320 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4321 p1 += mcnt;
4322
4323 if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
4324 || (!is_a_jump_n
4325 && (re_opcode_t) *p1 == on_failure_jump))
4326 goto fail;
4327 break;
4328 default:
4329 /* do nothing */ ;
4330 }
4331 }
4332
4333 if (d >= string1 && d <= end1)
4334 dend = end_match_1;
4335 }
4336 else
4337 break; /* Matching at this starting point really fails. */
4338 } /* for (;;) */
4339
4340 if (best_regs_set)
4341 goto restore_best_regs;
4342
4343 FREE_VARIABLES ();
4344
4345 return -1; /* Failure to match. */
4346} /* re_match_2 */
4347\f
4348/* Subroutine definitions for re_match_2. */
4349
4350
4351/* We are passed P pointing to a register number after a start_memory.
4352
4353 Return true if the pattern up to the corresponding stop_memory can
4354 match the empty string, and false otherwise.
4355
4356 If we find the matching stop_memory, sets P to point to one past its number.
4357 Otherwise, sets P to an undefined byte less than or equal to END.
4358
4359 We don't handle duplicates properly (yet). */
4360
4361static boolean
4362group_match_null_string_p (p, end, reg_info)
4363 unsigned char **p, *end;
4364 register_info_type *reg_info;
4365{
4366 int mcnt;
4367 /* Point to after the args to the start_memory. */
4368 unsigned char *p1 = *p + 2;
4369
4370 while (p1 < end)
4371 {
4372 /* Skip over opcodes that can match nothing, and return true or
4373 false, as appropriate, when we get to one that can't, or to the
4374 matching stop_memory. */
4375
4376 switch ((re_opcode_t) *p1)
4377 {
4378 /* Could be either a loop or a series of alternatives. */
4379 case on_failure_jump:
4380 p1++;
4381 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4382
4383 /* If the next operation is not a jump backwards in the
4384 pattern. */
4385
4386 if (mcnt >= 0)
4387 {
4388 /* Go through the on_failure_jumps of the alternatives,
4389 seeing if any of the alternatives cannot match nothing.
4390 The last alternative starts with only a jump,
4391 whereas the rest start with on_failure_jump and end
4392 with a jump, e.g., here is the pattern for `a|b|c':
4393
4394 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
4395 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
4396 /exactn/1/c
4397
4398 So, we have to first go through the first (n-1)
4399 alternatives and then deal with the last one separately. */
4400
4401
4402 /* Deal with the first (n-1) alternatives, which start
4403 with an on_failure_jump (see above) that jumps to right
4404 past a jump_past_alt. */
4405
4406 while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
4407 {
4408 /* `mcnt' holds how many bytes long the alternative
4409 is, including the ending `jump_past_alt' and
4410 its number. */
4411
4412 if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
4413 reg_info))
4414 return false;
4415
4416 /* Move to right after this alternative, including the
4417 jump_past_alt. */
4418 p1 += mcnt;
4419
4420 /* Break if it's the beginning of an n-th alternative
4421 that doesn't begin with an on_failure_jump. */
4422 if ((re_opcode_t) *p1 != on_failure_jump)
4423 break;
4424
4425 /* Still have to check that it's not an n-th
4426 alternative that starts with an on_failure_jump. */
4427 p1++;
4428 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4429 if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
4430 {
4431 /* Get to the beginning of the n-th alternative. */
4432 p1 -= 3;
4433 break;
4434 }
4435 }
4436
4437 /* Deal with the last alternative: go back and get number
4438 of the `jump_past_alt' just before it. `mcnt' contains
4439 the length of the alternative. */
4440 EXTRACT_NUMBER (mcnt, p1 - 2);
4441
4442 if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
4443 return false;
4444
4445 p1 += mcnt; /* Get past the n-th alternative. */
4446 } /* if mcnt > 0 */
4447 break;
4448
4449
4450 case stop_memory:
4451 assert (p1[1] == **p);
4452 *p = p1 + 2;
4453 return true;
4454
4455
4456 default:
4457 if (!common_op_match_null_string_p (&p1, end, reg_info))
4458 return false;
4459 }
4460 } /* while p1 < end */
4461
4462 return false;
4463} /* group_match_null_string_p */
4464
4465
4466/* Similar to group_match_null_string_p, but doesn't deal with alternatives:
4467 It expects P to be the first byte of a single alternative and END one
4468 byte past the last. The alternative can contain groups. */
4469
4470static boolean
4471alt_match_null_string_p (p, end, reg_info)
4472 unsigned char *p, *end;
4473 register_info_type *reg_info;
4474{
4475 int mcnt;
4476 unsigned char *p1 = p;
4477
4478 while (p1 < end)
4479 {
4480 /* Skip over opcodes that can match nothing, and break when we get
4481 to one that can't. */
4482
4483 switch ((re_opcode_t) *p1)
4484 {
4485 /* It's a loop. */
4486 case on_failure_jump:
4487 p1++;
4488 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4489 p1 += mcnt;
4490 break;
4491
4492 default:
4493 if (!common_op_match_null_string_p (&p1, end, reg_info))
4494 return false;
4495 }
4496 } /* while p1 < end */
4497
4498 return true;
4499} /* alt_match_null_string_p */
4500
4501
4502/* Deals with the ops common to group_match_null_string_p and
4503 alt_match_null_string_p.
4504
4505 Sets P to one after the op and its arguments, if any. */
4506
4507static boolean
4508common_op_match_null_string_p (p, end, reg_info)
4509 unsigned char **p, *end;
4510 register_info_type *reg_info;
4511{
4512 int mcnt;
4513 boolean ret;
4514 int reg_no;
4515 unsigned char *p1 = *p;
4516
4517 switch ((re_opcode_t) *p1++)
4518 {
4519 case no_op:
4520 case begline:
4521 case endline:
4522 case begbuf:
4523 case endbuf:
4524 case wordbeg:
4525 case wordend:
4526 case wordbound:
4527 case notwordbound:
4528#ifdef emacs
4529 case before_dot:
4530 case at_dot:
4531 case after_dot:
4532#endif
4533 break;
4534
4535 case start_memory:
4536 reg_no = *p1;
4537 assert (reg_no > 0 && reg_no <= MAX_REGNUM);
4538 ret = group_match_null_string_p (&p1, end, reg_info);
4539
4540 /* Have to set this here in case we're checking a group which
4541 contains a group and a back reference to it. */
4542
4543 if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
4544 REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
4545
4546 if (!ret)
4547 return false;
4548 break;
4549
4550 /* If this is an optimized succeed_n for zero times, make the jump. */
4551 case jump:
4552 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4553 if (mcnt >= 0)
4554 p1 += mcnt;
4555 else
4556 return false;
4557 break;
4558
4559 case succeed_n:
4560 /* Get to the number of times to succeed. */
4561 p1 += 2;
4562 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4563
4564 if (mcnt == 0)
4565 {
4566 p1 -= 4;
4567 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4568 p1 += mcnt;
4569 }
4570 else
4571 return false;
4572 break;
4573
4574 case duplicate:
4575 if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
4576 return false;
4577 break;
4578
4579 case set_number_at:
4580 p1 += 4;
4581
4582 default:
4583 /* All other opcodes mean we cannot match the empty string. */
4584 return false;
4585 }
4586
4587 *p = p1;
4588 return true;
4589} /* common_op_match_null_string_p */
4590
4591
4592/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
4593 bytes; nonzero otherwise. */
4594
4595static int
4596bcmp_translate (s1, s2, len, translate)
4597 unsigned char *s1, *s2;
4598 register int len;
4599 char *translate;
4600{
4601 register unsigned char *p1 = s1, *p2 = s2;
4602 while (len)
4603 {
4604 if (translate[*p1++] != translate[*p2++]) return 1;
4605 len--;
4606 }
4607 return 0;
4608}
4609\f
4610/* Entry points for GNU code. */
4611
4612/* re_compile_pattern is the GNU regular expression compiler: it
4613 compiles PATTERN (of length SIZE) and puts the result in BUFP.
4614 Returns 0 if the pattern was valid, otherwise an error string.
4615
4616 Assumes the `allocated' (and perhaps `buffer') and `translate' fields
4617 are set in BUFP on entry.
4618
4619 We call regex_compile to do the actual compilation. */
4620
4621const char *
4622re_compile_pattern (pattern, length, bufp)
4623 const char *pattern;
4624 int length;
4625 struct re_pattern_buffer *bufp;
4626{
4627 reg_errcode_t ret;
4628
4629 /* GNU code is written to assume at least RE_NREGS registers will be set
4630 (and at least one extra will be -1). */
4631 bufp->regs_allocated = REGS_UNALLOCATED;
4632
4633 /* And GNU code determines whether or not to get register information
4634 by passing null for the REGS argument to re_match, etc., not by
4635 setting no_sub. */
4636 bufp->no_sub = 0;
4637
4638 /* Match anchors at newline. */
4639 bufp->newline_anchor = 1;
4640
4641 ret = regex_compile (pattern, length, re_syntax_options, bufp);
4642
4643 return re_error_msg[(int) ret];
4644}
4645\f
4646/* Entry points compatible with 4.2 BSD regex library. We don't define
4647 them if this is an Emacs or POSIX compilation. */
4648
4649#if !defined (emacs) && !defined (_POSIX_SOURCE)
4650
4651/* BSD has one and only one pattern buffer. */
4652static struct re_pattern_buffer re_comp_buf;
4653
4654char *
4655re_comp (s)
4656 const char *s;
4657{
4658 reg_errcode_t ret;
4659
4660 if (!s)
4661 {
4662 if (!re_comp_buf.buffer)
4663 return "No previous regular expression";
4664 return 0;
4665 }
4666
4667 if (!re_comp_buf.buffer)
4668 {
4669 re_comp_buf.buffer = (unsigned char *) malloc (200);
4670 if (re_comp_buf.buffer == NULL)
4671 return "Memory exhausted";
4672 re_comp_buf.allocated = 200;
4673
4674 re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
4675 if (re_comp_buf.fastmap == NULL)
4676 return "Memory exhausted";
4677 }
4678
4679 /* Since `re_exec' always passes NULL for the `regs' argument, we
4680 don't need to initialize the pattern buffer fields which affect it. */
4681
4682 /* Match anchors at newlines. */
4683 re_comp_buf.newline_anchor = 1;
4684
4685 ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
4686
4687 /* Yes, we're discarding `const' here. */
4688 return (char *) re_error_msg[(int) ret];
4689}
4690
4691
4692int
4693re_exec (s)
4694 const char *s;
4695{
4696 const int len = strlen (s);
4697 return
4698 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
4699}
4700#endif /* not emacs and not _POSIX_SOURCE */
4701\f
4702/* POSIX.2 functions. Don't define these for Emacs. */
4703
4704#ifndef emacs
4705
4706/* regcomp takes a regular expression as a string and compiles it.
4707
4708 PREG is a regex_t *. We do not expect any fields to be initialized,
4709 since POSIX says we shouldn't. Thus, we set
4710
4711 `buffer' to the compiled pattern;
4712 `used' to the length of the compiled pattern;
4713 `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
4714 REG_EXTENDED bit in CFLAGS is set; otherwise, to
4715 RE_SYNTAX_POSIX_BASIC;
4716 `newline_anchor' to REG_NEWLINE being set in CFLAGS;
4717 `fastmap' and `fastmap_accurate' to zero;
4718 `re_nsub' to the number of subexpressions in PATTERN.
4719
4720 PATTERN is the address of the pattern string.
4721
4722 CFLAGS is a series of bits which affect compilation.
4723
4724 If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
4725 use POSIX basic syntax.
4726
4727 If REG_NEWLINE is set, then . and [^...] don't match newline.
4728 Also, regexec will try a match beginning after every newline.
4729
4730 If REG_ICASE is set, then we considers upper- and lowercase
4731 versions of letters to be equivalent when matching.
4732
4733 If REG_NOSUB is set, then when PREG is passed to regexec, that
4734 routine will report only success or failure, and nothing about the
4735 registers.
4736
4737 It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
4738 the return codes and their meanings.) */
4739
4740int
4741regcomp (preg, pattern, cflags)
4742 regex_t *preg;
4743 const char *pattern;
4744 int cflags;
4745{
4746 reg_errcode_t ret;
4747 unsigned syntax
4748 = (cflags & REG_EXTENDED) ?
4749 RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
4750
4751 /* regex_compile will allocate the space for the compiled pattern. */
4752 preg->buffer = 0;
4753 preg->allocated = 0;
4754
4755 /* Don't bother to use a fastmap when searching. This simplifies the
4756 REG_NEWLINE case: if we used a fastmap, we'd have to put all the
4757 characters after newlines into the fastmap. This way, we just try
4758 every character. */
4759 preg->fastmap = 0;
4760
4761 if (cflags & REG_ICASE)
4762 {
4763 unsigned i;
4764
4765 preg->translate = (char *) malloc (CHAR_SET_SIZE);
4766 if (preg->translate == NULL)
4767 return (int) REG_ESPACE;
4768
4769 /* Map uppercase characters to corresponding lowercase ones. */
4770 for (i = 0; i < CHAR_SET_SIZE; i++)
4771 preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
4772 }
4773 else
4774 preg->translate = NULL;
4775
4776 /* If REG_NEWLINE is set, newlines are treated differently. */
4777 if (cflags & REG_NEWLINE)
4778 { /* REG_NEWLINE implies neither . nor [^...] match newline. */
4779 syntax &= ~RE_DOT_NEWLINE;
4780 syntax |= RE_HAT_LISTS_NOT_NEWLINE;
4781 /* It also changes the matching behavior. */
4782 preg->newline_anchor = 1;
4783 }
4784 else
4785 preg->newline_anchor = 0;
4786
4787 preg->no_sub = !!(cflags & REG_NOSUB);
4788
4789 /* POSIX says a null character in the pattern terminates it, so we
4790 can use strlen here in compiling the pattern. */
4791 ret = regex_compile (pattern, strlen (pattern), syntax, preg);
4792
4793 /* POSIX doesn't distinguish between an unmatched open-group and an
4794 unmatched close-group: both are REG_EPAREN. */
4795 if (ret == REG_ERPAREN) ret = REG_EPAREN;
4796
4797 return (int) ret;
4798}
4799
4800
4801/* regexec searches for a given pattern, specified by PREG, in the
4802 string STRING.
4803
4804 If NMATCH is zero or REG_NOSUB was set in the cflags argument to
4805 `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
4806 least NMATCH elements, and we set them to the offsets of the
4807 corresponding matched substrings.
4808
4809 EFLAGS specifies `execution flags' which affect matching: if
4810 REG_NOTBOL is set, then ^ does not match at the beginning of the
4811 string; if REG_NOTEOL is set, then $ does not match at the end.
4812
4813 We return 0 if we find a match and REG_NOMATCH if not. */
4814
4815int
4816regexec (preg, string, nmatch, pmatch, eflags)
4817 const regex_t *preg;
4818 const char *string;
4819 size_t nmatch;
4820 regmatch_t pmatch[];
4821 int eflags;
4822{
4823 int ret;
4824 struct re_registers regs;
4825 regex_t private_preg;
4826 int len = strlen (string);
4827 boolean want_reg_info = !preg->no_sub && nmatch > 0;
4828
4829 private_preg = *preg;
4830
4831 private_preg.not_bol = !!(eflags & REG_NOTBOL);
4832 private_preg.not_eol = !!(eflags & REG_NOTEOL);
4833
4834 /* The user has told us exactly how many registers to return
4835 information about, via `nmatch'. We have to pass that on to the
4836 matching routines. */
4837 private_preg.regs_allocated = REGS_FIXED;
4838
4839 if (want_reg_info)
4840 {
4841 regs.num_regs = nmatch;
4842 regs.start = TALLOC (nmatch, regoff_t);
4843 regs.end = TALLOC (nmatch, regoff_t);
4844 if (regs.start == NULL || regs.end == NULL)
4845 return (int) REG_NOMATCH;
4846 }
4847
4848 /* Perform the searching operation. */
4849 ret = re_search (&private_preg, string, len,
4850 /* start: */ 0, /* range: */ len,
4851 want_reg_info ? &regs : (struct re_registers *) 0);
4852
4853 /* Copy the register information to the POSIX structure. */
4854 if (want_reg_info)
4855 {
4856 if (ret >= 0)
4857 {
4858 unsigned r;
4859
4860 for (r = 0; r < nmatch; r++)
4861 {
4862 pmatch[r].rm_so = regs.start[r];
4863 pmatch[r].rm_eo = regs.end[r];
4864 }
4865 }
4866
4867 /* If we needed the temporary register info, free the space now. */
4868 free (regs.start);
4869 free (regs.end);
4870 }
4871
4872 /* We want zero return to mean success, unlike `re_search'. */
4873 return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
4874}
4875
4876
4877/* Returns a message corresponding to an error code, ERRCODE, returned
4878 from either regcomp or regexec. We don't use PREG here. */
4879
4880size_t
4881regerror (errcode, preg, errbuf, errbuf_size)
4882 int errcode;
4883 const regex_t *preg;
4884 char *errbuf;
4885 size_t errbuf_size;
4886{
4887 const char *msg;
4888 size_t msg_size;
4889
4890 if (errcode < 0
4891 || errcode >= (sizeof (re_error_msg) / sizeof (re_error_msg[0])))
4892 /* Only error codes returned by the rest of the code should be passed
4893 to this routine. If we are given anything else, or if other regex
4894 code generates an invalid error code, then the program has a bug.
4895 Dump core so we can fix it. */
4896 abort ();
4897
4898 msg = re_error_msg[errcode];
4899
4900 /* POSIX doesn't require that we do anything in this case, but why
4901 not be nice. */
4902 if (! msg)
4903 msg = "Success";
4904
4905 msg_size = strlen (msg) + 1; /* Includes the null. */
4906
4907 if (errbuf_size != 0)
4908 {
4909 if (msg_size > errbuf_size)
4910 {
4911 strncpy (errbuf, msg, errbuf_size - 1);
4912 errbuf[errbuf_size - 1] = 0;
4913 }
4914 else
4915 strcpy (errbuf, msg);
4916 }
4917
4918 return msg_size;
4919}
4920
4921
4922/* Free dynamically allocated space used by PREG. */
4923
4924void
4925regfree (preg)
4926 regex_t *preg;
4927{
4928 if (preg->buffer != NULL)
4929 free (preg->buffer);
4930 preg->buffer = NULL;
4931
4932 preg->allocated = 0;
4933 preg->used = 0;
4934
4935 if (preg->fastmap != NULL)
4936 free (preg->fastmap);
4937 preg->fastmap = NULL;
4938 preg->fastmap_accurate = 0;
4939
4940 if (preg->translate != NULL)
4941 free (preg->translate);
4942 preg->translate = NULL;
4943}
4944
4945#endif /* not emacs */
4946\f
4947/*
4948Local variables:
4949make-backup-files: t
4950version-control: t
4951trim-versions-without-asking: nil
4952End:
4953*/