/*
- * $Id: GNUregex.h,v 1.8 2003/01/23 00:36:47 robertc Exp $
+ * $Id: GNUregex.h,v 1.9 2003/08/03 22:53:47 hno Exp $
*/
#ifndef SQUID_REGEXP_LIBRARY_H
* If not set, then an unmatched ) is invalid. */
#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
-/* This global variable defines the particular regexp syntax to use (for
- * some interfaces). When a regexp is compiled, the syntax used is
- * stored in the pattern buffer, so changing this does not affect
- * already-compiled regexps. */
-extern reg_syntax_t re_syntax_options;
\f
/* Define combinations of the above bits for the standard possibilities.
* (The [[[ comments delimit what gets put into the Texinfo file, so
#endif /* not __STDC__ */
-/* Sets the current default syntax to SYNTAX, and return the old syntax.
- * You can also simply assign to the `re_syntax_options' variable. */
-extern reg_syntax_t re_set_syntax _RE_ARGS((reg_syntax_t syntax));
-
-/* Compile the regular expression PATTERN, with length LENGTH
- * and syntax given by the global `re_syntax_options', into the buffer
- * BUFFER. Return NULL if successful, and an error string if not. */
-extern const char *re_compile_pattern
- _RE_ARGS((const char *pattern, int length,
- struct re_pattern_buffer * buffer));
-
-
-/* Compile a fastmap for the compiled pattern in BUFFER; used to
- * accelerate searches. Return 0 if successful and -2 if was an
- * internal error. */
-extern int re_compile_fastmap _RE_ARGS((struct re_pattern_buffer * buffer));
-
-
-/* Search in the string STRING (with length LENGTH) for the pattern
- * compiled into BUFFER. Start searching at position START, for RANGE
- * characters. Return the starting position of the match, -1 for no
- * match, or -2 for an internal error. Also return register
- * information in REGS (if REGS and BUFFER->no_sub are nonzero). */
-extern int re_search
- _RE_ARGS((struct re_pattern_buffer * buffer, const char *string,
- int length, int start, int range, struct re_registers * regs));
-
-
-/* Like `re_search', but search in the concatenation of STRING1 and
- * STRING2. Also, stop searching at index START + STOP. */
-extern int re_search_2
- _RE_ARGS((struct re_pattern_buffer * buffer, const char *string1,
- int length1, const char *string2, int length2,
- int start, int range, struct re_registers * regs, int stop));
-
-
-/* Like `re_search', but return how many characters in STRING the regexp
- * in BUFFER matched, starting at position START. */
-extern int re_match
- _RE_ARGS((struct re_pattern_buffer * buffer, const char *string,
- int length, int start, struct re_registers * regs));
-
-
-/* Relates to `re_match' as `re_search_2' relates to `re_search'. */
-extern int re_match_2
- _RE_ARGS((struct re_pattern_buffer * buffer, const char *string1,
- int length1, const char *string2, int length2,
- int start, struct re_registers * regs, int stop));
-
-
-/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
- * ENDS. Subsequent matches using BUFFER and REGS will use this memory
- * for recording register information. STARTS and ENDS must be
- * allocated with malloc, and must each be at least `NUM_REGS * sizeof
- * (regoff_t)' bytes long.
- *
- * If NUM_REGS == 0, then subsequent matches should allocate their own
- * register data.
- *
- * Unless this function is called, the first search or match using
- * PATTERN_BUFFER will allocate its own register data, without
- * freeing the old data. */
-extern void re_set_registers
- _RE_ARGS((struct re_pattern_buffer * buffer, struct re_registers * regs,
- unsigned num_regs, regoff_t * starts, regoff_t * ends));
-
-/* 4.2 bsd compatibility. */
-extern char *re_comp _RE_ARGS((const char *));
-extern int re_exec _RE_ARGS((const char *));
-
/* POSIX compatibility. */
extern int regcomp _RE_ARGS((regex_t * preg, const char *pattern, int cflags));
extern int regexec
/*
- * $Id: GNUregex.c,v 1.15 2003/06/20 00:05:11 hno Exp $
+ * $Id: GNUregex.c,v 1.16 2003/08/03 22:53:47 hno Exp $
*/
/* Extended regular expression matching and search library,
#define REGEX_MALLOC 1
#endif
-/* The `emacs' switch turns on certain matching commands
- * that make sense only in Emacs. */
-#ifdef emacs
-
-#include "lisp.h"
-#include "buffer.h"
-#include "syntax.h"
-
-/* Emacs uses `NULL' as a predicate. */
-#undef NULL
-
-#else /* not emacs */
-
/* We used to test for `BSTRING' here, but only GCC and Emacs define
* `BSTRING', as far as I know, and neither of them use this code. */
#if HAVE_STRING_H || STDC_HEADERS
#define SYNTAX(c) re_syntax_table[c]
-#endif /* not emacs */
\f
/* Get the interface, including the syntax bits. */
#include "GNUregex.h"
+/* Compile a fastmap for the compiled pattern in BUFFER; used to
+ * accelerate searches. Return 0 if successful and -2 if was an
+ * internal error. */
+static int re_compile_fastmap _RE_ARGS((struct re_pattern_buffer * buffer));
+
+
+/* Search in the string STRING (with length LENGTH) for the pattern
+ * compiled into BUFFER. Start searching at position START, for RANGE
+ * characters. Return the starting position of the match, -1 for no
+ * match, or -2 for an internal error. Also return register
+ * information in REGS (if REGS and BUFFER->no_sub are nonzero). */
+static int re_search
+ _RE_ARGS((struct re_pattern_buffer * buffer, const char *string,
+ int length, int start, int range, struct re_registers * regs));
+
+
+/* Like `re_search', but search in the concatenation of STRING1 and
+ * STRING2. Also, stop searching at index START + STOP. */
+static int re_search_2
+ _RE_ARGS((struct re_pattern_buffer * buffer, const char *string1,
+ int length1, const char *string2, int length2,
+ int start, int range, struct re_registers * regs, int stop));
+
+
+/* Like `re_search_2', but return how many characters in STRING the regexp
+ * in BUFFER matched, starting at position START. */
+static int re_match_2
+ _RE_ARGS((struct re_pattern_buffer * buffer, const char *string1,
+ int length1, const char *string2, int length2,
+ int start, struct re_registers * regs, int stop));
+
+
/* isalpha etc. are used for the character classes. */
#include <ctype.h>
/* Fail unless at end of line. */
endline,
- /* Succeeds if at beginning of buffer (if emacs) or at beginning
- * of string to be matched (if not). */
+ /* Succeeds if or at beginning of string to be matched. */
begbuf,
/* Analogously, for end of buffer/string. */
wordbound, /* Succeeds if at a word boundary. */
notwordbound /* Succeeds if not at a word boundary. */
-#ifdef emacs
- ,before_dot, /* Succeeds if before point. */
- at_dot, /* Succeeds if at point. */
- after_dot, /* Succeeds if after point. */
-
- /* Matches any character whose syntax is specified. Followed by
- * a byte which contains a syntax code, e.g., Sword. */
- syntaxspec,
-
- /* Matches any character whose syntax is not that specified. */
- notsyntaxspec
-#endif /* emacs */
} re_opcode_t;
\f
/* Common operations on the compiled pattern. */
case wordend:
printf("/wordend");
-#ifdef emacs
- case before_dot:
- printf("/before_dot");
- break;
-
- case at_dot:
- printf("/at_dot");
- break;
-
- case after_dot:
- printf("/after_dot");
- break;
-
- case syntaxspec:
- printf("/syntaxspec");
- mcnt = *p++;
- printf("/%d", mcnt);
- break;
-
- case notsyntaxspec:
- printf("/notsyntaxspec");
- mcnt = *p++;
- printf("/%d", mcnt);
- break;
-#endif /* emacs */
-
case wordchar:
printf("/wordchar");
break;
#endif /* not DEBUG */
\f
-/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
- * also be assigned to arbitrarily: each pattern buffer stores its own
- * syntax, so it can be changed between regex compilations. */
-reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
-
-
-/* Specify the precise syntax of regexps for compilation. This provides
- * for compatibility for various utilities which historically have
- * different, incompatible syntaxes.
- *
- * The argument SYNTAX is a bit mask comprised of the various bits
- * defined in regex.h. We return the old syntax. */
-
-reg_syntax_t
-re_set_syntax(syntax)
- reg_syntax_t syntax;
-{
- reg_syntax_t ret = re_syntax_options;
-
- re_syntax_options = syntax;
- return ret;
-}
-\f
/* This table gives an error message for each of the error codes listed
* in regex.h. Obviously the order here has to be same as there. */
/* Always count groups, whether or not bufp->no_sub is set. */
bufp->re_nsub = 0;
-#if !defined (emacs) && !defined (SYNTAX_TABLE)
+#if !defined (SYNTAX_TABLE)
/* Initialize the syntax table. */
init_syntax_once();
#endif
}
goto normal_char;
-#ifdef emacs
- /* There is no way to specify the before_dot and after_dot
- * operators. rms says this is ok. --karl */
- case '=':
- BUF_PUSH(at_dot);
- break;
-
- case 's':
- laststart = b;
- PATFETCH(c);
- BUF_PUSH_2(syntaxspec, syntax_spec_code[c]);
- break;
-
- case 'S':
- laststart = b;
- PATFETCH(c);
- BUF_PUSH_2(notsyntaxspec, syntax_spec_code[c]);
- break;
-#endif /* emacs */
-
case 'w':
laststart = b;
break;
-#ifdef emacs
- case syntaxspec:
- k = *p++;
- for (j = 0; j < (1 << BYTEWIDTH); j++)
- if (SYNTAX(j) == (enum syntaxcode) k)
- fastmap[j] = 1;
- break;
-
-
- case notsyntaxspec:
- k = *p++;
- for (j = 0; j < (1 << BYTEWIDTH); j++)
- if (SYNTAX(j) != (enum syntaxcode) k)
- fastmap[j] = 1;
- break;
-
-
- /* All cases after this match the empty string. These end with
- * `continue'. */
-
-
- case before_dot:
- case at_dot:
- case after_dot:
- continue;
-#endif /* not emacs */
-
-
case no_op:
case begline:
case endline:
return 0;
} /* re_compile_fastmap */
\f
-/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
- * ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
- * this memory for recording register information. STARTS and ENDS
- * must be allocated using the malloc library routine, and must each
- * be at least NUM_REGS * sizeof (regoff_t) bytes long.
- *
- * If NUM_REGS == 0, then subsequent matches should allocate their own
- * register data.
- *
- * Unless this function is called, the first search or match using
- * PATTERN_BUFFER will allocate its own register data, without
- * freeing the old data. */
-
-void
-re_set_registers(bufp, regs, num_regs, starts, ends)
- struct re_pattern_buffer *bufp;
- struct re_registers *regs;
- unsigned num_regs;
- regoff_t *starts, *ends;
-{
- if (num_regs) {
- bufp->regs_allocated = REGS_REALLOCATE;
- regs->num_regs = num_regs;
- regs->start = starts;
- regs->end = ends;
- } else {
- bufp->regs_allocated = REGS_UNALLOCATED;
- regs->num_regs = 0;
- regs->start = regs->end = (regoff_t) 0;
- }
-}
-\f
/* Searching routines. */
/* Like re_search_2, below, but only one string is specified, and
* doesn't let you say where to stop matching. */
-int
+static int
re_search(bufp, string, size, startpos, range, regs)
struct re_pattern_buffer *bufp;
const char *string;
* found, -1 if no match, or -2 if error (such as failure
* stack overflow). */
-int
+static int
re_search_2(bufp, string1, size1, string2, size2, startpos, range, regs, stop)
struct re_pattern_buffer *bufp;
const char *string1, *string2;
\f
/* Matching routines. */
-#ifndef emacs /* Emacs never uses this. */
-/* re_match is like re_match_2 except it takes only a single string. */
-
-int
-re_match(bufp, string, size, pos, regs)
- struct re_pattern_buffer *bufp;
- const char *string;
- int size, pos;
- struct re_registers *regs;
-{
- return re_match_2(bufp, NULL, 0, string, size, pos, regs, size);
-}
-#endif /* not emacs */
-
-
/* re_match_2 matches the compiled pattern in BUFP against the
* the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
* and SIZE2, respectively). We start matching at POS, and stop
break;
goto fail;
-#ifdef emacs
-#ifdef emacs19
- case before_dot:
- DEBUG_PRINT1("EXECUTING before_dot.\n");
- if (PTR_CHAR_POS((unsigned char *) d) >= point)
- goto fail;
- break;
-
- case at_dot:
- DEBUG_PRINT1("EXECUTING at_dot.\n");
- if (PTR_CHAR_POS((unsigned char *) d) != point)
- goto fail;
- break;
-
- case after_dot:
- DEBUG_PRINT1("EXECUTING after_dot.\n");
- if (PTR_CHAR_POS((unsigned char *) d) <= point)
- goto fail;
- break;
-#else /* not emacs19 */
- case at_dot:
- DEBUG_PRINT1("EXECUTING at_dot.\n");
- if (PTR_CHAR_POS((unsigned char *) d) + 1 != point)
- goto fail;
- break;
-#endif /* not emacs19 */
-
- case syntaxspec:
- DEBUG_PRINT2("EXECUTING syntaxspec %d.\n", mcnt);
- mcnt = *p++;
- goto matchsyntax;
-
- case wordchar:
- DEBUG_PRINT1("EXECUTING Emacs wordchar.\n");
- mcnt = (int) Sword;
- matchsyntax:
- PREFETCH();
- if (SYNTAX(*d++) != (enum syntaxcode) mcnt)
- goto fail;
- SET_REGS_MATCHED();
- break;
-
- case notsyntaxspec:
- DEBUG_PRINT2("EXECUTING notsyntaxspec %d.\n", mcnt);
- mcnt = *p++;
- goto matchnotsyntax;
-
- case notwordchar:
- DEBUG_PRINT1("EXECUTING Emacs notwordchar.\n");
- mcnt = (int) Sword;
- matchnotsyntax:
- PREFETCH();
- if (SYNTAX(*d++) == (enum syntaxcode) mcnt)
- goto fail;
- SET_REGS_MATCHED();
- break;
-
-#else /* not emacs */
case wordchar:
DEBUG_PRINT1("EXECUTING non-Emacs wordchar.\n");
PREFETCH();
SET_REGS_MATCHED();
d++;
break;
-#endif /* not emacs */
default:
abort();
case wordend:
case wordbound:
case notwordbound:
-#ifdef emacs
- case before_dot:
- case at_dot:
- case after_dot:
-#endif
break;
case start_memory:
\f
/* Entry points for GNU code. */
-/* re_compile_pattern is the GNU regular expression compiler: it
- * compiles PATTERN (of length SIZE) and puts the result in BUFP.
- * Returns 0 if the pattern was valid, otherwise an error string.
- *
- * Assumes the `allocated' (and perhaps `buffer') and `translate' fields
- * are set in BUFP on entry.
- *
- * We call regex_compile to do the actual compilation. */
-
-const char *
-re_compile_pattern(pattern, length, bufp)
- const char *pattern;
- int length;
- struct re_pattern_buffer *bufp;
-{
- reg_errcode_t ret;
-
- /* GNU code is written to assume at least RE_NREGS registers will be set
- * (and at least one extra will be -1). */
- bufp->regs_allocated = REGS_UNALLOCATED;
-
- /* And GNU code determines whether or not to get register information
- * by passing null for the REGS argument to re_match, etc., not by
- * setting no_sub. */
- bufp->no_sub = 0;
-
- /* Match anchors at newline. */
- bufp->newline_anchor = 1;
-
- ret = regex_compile(pattern, length, re_syntax_options, bufp);
-
- return re_error_msg[(int) ret];
-}
-\f
-/* Entry points compatible with 4.2 BSD regex library. We don't define
- * them if this is an Emacs or POSIX compilation. */
-
-#if !defined (emacs) && !defined (_POSIX_SOURCE)
-
-/* BSD has one and only one pattern buffer. */
-static struct re_pattern_buffer re_comp_buf;
-
-char *
-re_comp(s)
- const char *s;
-{
- reg_errcode_t ret;
-
- if (!s) {
- if (!re_comp_buf.buffer)
- return "No previous regular expression";
- return 0;
- }
- if (!re_comp_buf.buffer) {
- re_comp_buf.buffer = (unsigned char *) malloc(200);
- if (re_comp_buf.buffer == NULL)
- return "Memory exhausted";
- re_comp_buf.allocated = 200;
-
- re_comp_buf.fastmap = (char *) malloc(1 << BYTEWIDTH);
- if (re_comp_buf.fastmap == NULL)
- return "Memory exhausted";
- }
- /* Since `re_exec' always passes NULL for the `regs' argument, we
- * don't need to initialize the pattern buffer fields which affect it. */
-
- /* Match anchors at newlines. */
- re_comp_buf.newline_anchor = 1;
-
- ret = regex_compile(s, strlen(s), re_syntax_options, &re_comp_buf);
-
- /* Yes, we're discarding `const' here. */
- return (char *) re_error_msg[(int) ret];
-}
-
-
-int
-re_exec(s)
- const char *s;
-{
- const int len = strlen(s);
- return
- 0 <= re_search(&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
-}
-
-#endif /* not emacs and not _POSIX_SOURCE */
\f
-/* POSIX.2 functions. Don't define these for Emacs. */
-
-#ifndef emacs
+/* POSIX.2 functions */
/* regcomp takes a regular expression as a string and compiles it.
*
free(preg->translate);
preg->translate = NULL;
}
-
-#endif /* not emacs */
\f
/*
* Local variables: