]> git.ipfire.org Git - thirdparty/glibc.git/blame - posix/regex.h
Add script to update copyright notices and reformat some to facilitate its use.
[thirdparty/glibc.git] / posix / regex.h
CommitLineData
2b83a2a4 1/* Definitions for data structures and routines for the regular
3b0bdc72 2 expression library.
f4cf5f2d 3 Copyright (C) 1985, 1989-2012 Free Software Foundation, Inc.
3b0bdc72 4 This file is part of the GNU C Library.
2b83a2a4 5
54d79e99 6 The GNU C Library is free software; you can redistribute it and/or
41bdb6e2
AJ
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
2b83a2a4 10
54d79e99
UD
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
41bdb6e2 14 Lesser General Public License for more details.
2b83a2a4 15
41bdb6e2 16 You should have received a copy of the GNU Lesser General Public
59ba27a6
PE
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
2b83a2a4 19
5107cf1d
UD
20#ifndef _REGEX_H
21#define _REGEX_H 1
2b83a2a4 22
94c24227
UD
23#include <sys/types.h>
24
1fb05e3d
UD
25/* Allow the use in C++ code. */
26#ifdef __cplusplus
27extern "C" {
28#endif
29
4cca6b86
UD
30/* The following two types have to be signed and unsigned integer type
31 wide enough to hold a value of a pointer. For most ANSI compilers
32 ptrdiff_t and size_t should be likely OK. Still size of these two
33 types is 2 for Microsoft C. Ugh... */
34typedef long int s_reg_t;
35typedef unsigned long int active_reg_t;
36
2b83a2a4
RM
37/* The following bits are used to determine the regexp syntax we
38 recognize. The set/not-set meanings are chosen so that Emacs syntax
39 remains the value 0. The bits are given in alphabetical order, and
40 the definitions shifted by one from the previous bit; thus, when we
41 add or remove a bit, only one other definition need change. */
4cca6b86 42typedef unsigned long int reg_syntax_t;
2b83a2a4 43
a53d3f82 44#ifdef __USE_GNU
2b83a2a4
RM
45/* If this bit is not set, then \ inside a bracket expression is literal.
46 If set, then such a \ quotes the following character. */
a53d3f82 47# define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
2b83a2a4
RM
48
49/* If this bit is not set, then + and ? are operators, and \+ and \? are
54d79e99 50 literals.
2b83a2a4 51 If set, then \+ and \? are operators and + and ? are literals. */
a53d3f82 52# define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
2b83a2a4
RM
53
54/* If this bit is set, then character classes are supported. They are:
55 [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
56 [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
57 If not set, then character classes are not supported. */
a53d3f82 58# define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
2b83a2a4
RM
59
60/* If this bit is set, then ^ and $ are always anchors (outside bracket
61 expressions, of course).
62 If this bit is not set, then it depends:
a4b89fd8
AR
63 ^ is an anchor if it is at the beginning of a regular
64 expression or after an open-group or an alternation operator;
65 $ is an anchor if it is at the end of a regular expression, or
66 before a close-group or an alternation operator.
2b83a2a4
RM
67
68 This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
69 POSIX draft 11.2 says that * etc. in leading positions is undefined.
70 We already implemented a previous draft which made those constructs
71 invalid, though, so we haven't changed the code back. */
a53d3f82 72# define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
2b83a2a4
RM
73
74/* If this bit is set, then special characters are always special
75 regardless of where they are in the pattern.
76 If this bit is not set, then special characters are special only in
54d79e99 77 some contexts; otherwise they are ordinary. Specifically,
2b83a2a4
RM
78 * + ? and intervals are only special when not after the beginning,
79 open-group, or alternation operator. */
a53d3f82 80# define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
2b83a2a4
RM
81
82/* If this bit is set, then *, +, ?, and { cannot be first in an re or
83 immediately after an alternation or begin-group operator. */
a53d3f82 84# define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
2b83a2a4
RM
85
86/* If this bit is set, then . matches newline.
87 If not set, then it doesn't. */
a53d3f82 88# define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
2b83a2a4
RM
89
90/* If this bit is set, then . doesn't match NUL.
91 If not set, then it does. */
a53d3f82 92# define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
2b83a2a4
RM
93
94/* If this bit is set, nonmatching lists [^...] do not match newline.
95 If not set, they do. */
a53d3f82 96# define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
2b83a2a4
RM
97
98/* If this bit is set, either \{...\} or {...} defines an
54d79e99 99 interval, depending on RE_NO_BK_BRACES.
2b83a2a4 100 If not set, \{, \}, {, and } are literals. */
a53d3f82 101# define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
2b83a2a4
RM
102
103/* If this bit is set, +, ? and | aren't recognized as operators.
104 If not set, they are. */
a53d3f82 105# define RE_LIMITED_OPS (RE_INTERVALS << 1)
2b83a2a4
RM
106
107/* If this bit is set, newline is an alternation operator.
108 If not set, newline is literal. */
a53d3f82 109# define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
2b83a2a4
RM
110
111/* If this bit is set, then `{...}' defines an interval, and \{ and \}
112 are literals.
113 If not set, then `\{...\}' defines an interval. */
a53d3f82 114# define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
2b83a2a4
RM
115
116/* If this bit is set, (...) defines a group, and \( and \) are literals.
117 If not set, \(...\) defines a group, and ( and ) are literals. */
a53d3f82 118# define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
2b83a2a4
RM
119
120/* If this bit is set, then \<digit> matches <digit>.
121 If not set, then \<digit> is a back-reference. */
a53d3f82 122# define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
2b83a2a4 123
54d79e99 124/* If this bit is set, then | is an alternation operator, and \| is literal.
2b83a2a4 125 If not set, then \| is an alternation operator, and | is literal. */
a53d3f82 126# define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
2b83a2a4
RM
127
128/* If this bit is set, then an ending range point collating higher
129 than the starting range point, as in [z-a], is invalid.
130 If not set, then when ending range point collates higher than the
131 starting range point, the range is ignored. */
a53d3f82 132# define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
2b83a2a4
RM
133
134/* If this bit is set, then an unmatched ) is ordinary.
135 If not set, then an unmatched ) is invalid. */
a53d3f82 136# define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
2b83a2a4
RM
137
138/* If this bit is set, succeed as soon as we match the whole pattern,
139 without further backtracking. */
a53d3f82 140# define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
2b83a2a4 141
4cca6b86
UD
142/* If this bit is set, do not process the GNU regex operators.
143 If not set, then the GNU regex operators are recognized. */
a53d3f82 144# define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
4cca6b86 145
51702635
UD
146/* If this bit is set, turn on internal regex debugging.
147 If not set, and debugging was on, turn it off.
148 This only works if regex.c is compiled -DDEBUG.
149 We define this bit always, so that all that's needed to turn on
150 debugging is to recompile regex.c; the calling code can always have
151 this bit set, and it won't affect anything in the normal case. */
a53d3f82 152# define RE_DEBUG (RE_NO_GNU_OPS << 1)
51702635 153
0a45b76c
UD
154/* If this bit is set, a syntactically invalid interval is treated as
155 a string of ordinary characters. For example, the ERE 'a{1' is
156 treated as 'a\{1'. */
a53d3f82 157# define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1)
0a45b76c 158
3b0bdc72
UD
159/* If this bit is set, then ignore case when matching.
160 If not set, then case is significant. */
a53d3f82 161# define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1)
3b0bdc72 162
134abcb5
UD
163/* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only
164 for ^, because it is difficult to scan the regex backwards to find
165 whether ^ should be special. */
a53d3f82 166# define RE_CARET_ANCHORS_HERE (RE_ICASE << 1)
134abcb5 167
06e8303a
UD
168/* If this bit is set, then \{ cannot be first in an bre or
169 immediately after an alternation or begin-group operator. */
a53d3f82 170# define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1)
06e8303a 171
c06a6956
UD
172/* If this bit is set, then no_sub will be set to 1 during
173 re_compile_pattern. */
a53d3f82
UD
174# define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1)
175#endif
c06a6956 176
2b83a2a4
RM
177/* This global variable defines the particular regexp syntax to use (for
178 some interfaces). When a regexp is compiled, the syntax used is
179 stored in the pattern buffer, so changing this does not affect
180 already-compiled regexps. */
181extern reg_syntax_t re_syntax_options;
182\f
a53d3f82 183#ifdef __USE_GNU
2b83a2a4
RM
184/* Define combinations of the above bits for the standard possibilities.
185 (The [[[ comments delimit what gets put into the Texinfo file, so
54d79e99 186 don't delete them!) */
2b83a2a4
RM
187/* [[[begin syntaxes]]] */
188#define RE_SYNTAX_EMACS 0
189
190#define RE_SYNTAX_AWK \
4cca6b86
UD
191 (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
192 | RE_NO_BK_PARENS | RE_NO_BK_REFS \
193 | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
51702635 194 | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \
a4b89fd8 195 | RE_CHAR_CLASSES \
4cca6b86
UD
196 | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
197
198#define RE_SYNTAX_GNU_AWK \
a4b89fd8
AR
199 ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \
200 | RE_INVALID_INTERVAL_ORD) \
201 & ~(RE_DOT_NOT_NULL | RE_CONTEXT_INDEP_OPS \
202 | RE_CONTEXT_INVALID_OPS ))
2b83a2a4 203
92d892a8 204#define RE_SYNTAX_POSIX_AWK \
51702635 205 (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \
a4b89fd8
AR
206 | RE_INTERVALS | RE_NO_GNU_OPS \
207 | RE_INVALID_INTERVAL_ORD)
2b83a2a4
RM
208
209#define RE_SYNTAX_GREP \
210 (RE_BK_PLUS_QM | RE_CHAR_CLASSES \
211 | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \
212 | RE_NEWLINE_ALT)
213
214#define RE_SYNTAX_EGREP \
215 (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \
216 | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \
217 | RE_NEWLINE_ALT | RE_NO_BK_PARENS \
218 | RE_NO_BK_VBAR)
219
220#define RE_SYNTAX_POSIX_EGREP \
0a45b76c
UD
221 (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \
222 | RE_INVALID_INTERVAL_ORD)
2b83a2a4
RM
223
224/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
225#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
226
227#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
228
229/* Syntax bits common to both basic and extended POSIX regex syntax. */
230#define _RE_SYNTAX_POSIX_COMMON \
231 (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \
232 | RE_INTERVALS | RE_NO_EMPTY_RANGES)
233
234#define RE_SYNTAX_POSIX_BASIC \
06e8303a 235 (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM | RE_CONTEXT_INVALID_DUP)
2b83a2a4
RM
236
237/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
238 RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this
239 isn't minimal, since other operators, such as \`, aren't disabled. */
240#define RE_SYNTAX_POSIX_MINIMAL_BASIC \
241 (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
242
243#define RE_SYNTAX_POSIX_EXTENDED \
9281f45d
UD
244 (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
245 | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \
246 | RE_NO_BK_PARENS | RE_NO_BK_VBAR \
247 | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD)
2b83a2a4 248
0e179ea6
UD
249/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is
250 removed and RE_NO_BK_REFS is added. */
2b83a2a4
RM
251#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \
252 (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
253 | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \
254 | RE_NO_BK_PARENS | RE_NO_BK_REFS \
255 | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD)
256/* [[[end syntaxes]]] */
257\f
258/* Maximum number of duplicates an interval can allow. Some systems
259 (erroneously) define this in other header files, but we want our
260 value, so remove any previous define. */
a53d3f82
UD
261# ifdef RE_DUP_MAX
262# undef RE_DUP_MAX
263# endif
4cca6b86 264/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */
a53d3f82
UD
265# define RE_DUP_MAX (0x7fff)
266#endif
2b83a2a4
RM
267
268
269/* POSIX `cflags' bits (i.e., information for `regcomp'). */
270
271/* If this bit is set, then use extended regular expression syntax.
272 If not set, then use basic regular expression syntax. */
273#define REG_EXTENDED 1
274
275/* If this bit is set, then ignore case when matching.
276 If not set, then case is significant. */
277#define REG_ICASE (REG_EXTENDED << 1)
54d79e99 278
2b83a2a4
RM
279/* If this bit is set, then anchors do not match at newline
280 characters in the string.
281 If not set, then anchors do match at newlines. */
282#define REG_NEWLINE (REG_ICASE << 1)
283
284/* If this bit is set, then report only success or fail in regexec.
285 If not set, then returns differ between not matching and errors. */
286#define REG_NOSUB (REG_NEWLINE << 1)
287
288
289/* POSIX `eflags' bits (i.e., information for regexec). */
290
291/* If this bit is set, then the beginning-of-line operator doesn't match
292 the beginning of the string (presumably because it's not the
293 beginning of a line).
294 If not set, then the beginning-of-line operator does match the
295 beginning of the string. */
296#define REG_NOTBOL 1
297
298/* Like REG_NOTBOL, except for the end-of-line. */
299#define REG_NOTEOL (1 << 1)
300
6fefb4e0
UD
301/* Use PMATCH[0] to delimit the start and end of the search in the
302 buffer. */
303#define REG_STARTEND (1 << 2)
304
2b83a2a4
RM
305
306/* If any error codes are removed, changed, or added, update the
307 `re_error_msg' table in regex.c. */
308typedef enum
309{
6701afc7 310#if defined _XOPEN_SOURCE || defined __USE_XOPEN2K
4bae5567 311 REG_ENOSYS = -1, /* This will never happen for this implementation. */
9756dfe1
UD
312#endif
313
2b83a2a4
RM
314 REG_NOERROR = 0, /* Success. */
315 REG_NOMATCH, /* Didn't find a match (for regexec). */
316
317 /* POSIX regcomp return error codes. (In the order listed in the
318 standard.) */
319 REG_BADPAT, /* Invalid pattern. */
df759c2a 320 REG_ECOLLATE, /* Inalid collating element. */
2b83a2a4
RM
321 REG_ECTYPE, /* Invalid character class name. */
322 REG_EESCAPE, /* Trailing backslash. */
323 REG_ESUBREG, /* Invalid back reference. */
324 REG_EBRACK, /* Unmatched left bracket. */
54d79e99 325 REG_EPAREN, /* Parenthesis imbalance. */
2b83a2a4
RM
326 REG_EBRACE, /* Unmatched \{. */
327 REG_BADBR, /* Invalid contents of \{\}. */
328 REG_ERANGE, /* Invalid range end. */
329 REG_ESPACE, /* Ran out of memory. */
330 REG_BADRPT, /* No preceding re for repetition op. */
331
332 /* Error codes we've added. */
333 REG_EEND, /* Premature end. */
334 REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */
335 REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */
336} reg_errcode_t;
337\f
338/* This data structure represents a compiled pattern. Before calling
339 the pattern compiler, the fields `buffer', `allocated', `fastmap',
78e64fdc
RT
340 and `translate' can be set. After the pattern has been compiled,
341 the fields `re_nsub', `not_bol' and `not_eol' are available. All
342 other fields are private to the regex routines. */
2b83a2a4 343
54d79e99 344#ifndef RE_TRANSLATE_TYPE
a53d3f82
UD
345# define __RE_TRANSLATE_TYPE unsigned char *
346# ifdef __USE_GNU
347# define RE_TRANSLATE_TYPE __RE_TRANSLATE_TYPE
348# endif
349#endif
350
351#ifdef __USE_GNU
352# define __REPB_PREFIX(name) name
353#else
354# define __REPB_PREFIX(name) __##name
03a75825
RM
355#endif
356
2b83a2a4
RM
357struct re_pattern_buffer
358{
2ff89ea4
UD
359 /* Space that holds the compiled pattern. It is declared as
360 `unsigned char *' because its elements are sometimes used as
361 array indexes. */
a53d3f82 362 unsigned char *__REPB_PREFIX(buffer);
2b83a2a4 363
2ff89ea4 364 /* Number of bytes to which `buffer' points. */
a53d3f82 365 unsigned long int __REPB_PREFIX(allocated);
2b83a2a4 366
2ff89ea4 367 /* Number of bytes actually used in `buffer'. */
a53d3f82 368 unsigned long int __REPB_PREFIX(used);
2b83a2a4 369
2ff89ea4 370 /* Syntax setting with which the pattern was compiled. */
a53d3f82 371 reg_syntax_t __REPB_PREFIX(syntax);
2b83a2a4 372
2ff89ea4
UD
373 /* Pointer to a fastmap, if any, otherwise zero. re_search uses the
374 fastmap, if there is one, to skip over impossible starting points
375 for matches. */
a53d3f82 376 char *__REPB_PREFIX(fastmap);
2b83a2a4 377
2ff89ea4
UD
378 /* Either a translate table to apply to all characters before
379 comparing them, or zero for no translation. The translation is
380 applied to a pattern when it is compiled and to a string when it
381 is matched. */
a53d3f82 382 __RE_TRANSLATE_TYPE __REPB_PREFIX(translate);
2b83a2a4 383
2ff89ea4 384 /* Number of subexpressions found by the compiler. */
2b83a2a4
RM
385 size_t re_nsub;
386
2ff89ea4
UD
387 /* Zero if this pattern cannot match the empty string, one else.
388 Well, in truth it's used only in `re_search_2', to see whether or
389 not we should use the fastmap, so we don't set this absolutely
390 perfectly; see `re_compile_fastmap' (the `duplicate' case). */
a53d3f82 391 unsigned __REPB_PREFIX(can_be_null) : 1;
2b83a2a4 392
2ff89ea4
UD
393 /* If REGS_UNALLOCATED, allocate space in the `regs' structure
394 for `max (RE_NREGS, re_nsub + 1)' groups.
395 If REGS_REALLOCATE, reallocate space if necessary.
396 If REGS_FIXED, use what's there. */
a53d3f82
UD
397#ifdef __USE_GNU
398# define REGS_UNALLOCATED 0
399# define REGS_REALLOCATE 1
400# define REGS_FIXED 2
401#endif
402 unsigned __REPB_PREFIX(regs_allocated) : 2;
2b83a2a4 403
2ff89ea4
UD
404 /* Set to zero when `regex_compile' compiles a pattern; set to one
405 by `re_compile_fastmap' if it updates the fastmap. */
a53d3f82 406 unsigned __REPB_PREFIX(fastmap_accurate) : 1;
2b83a2a4 407
2ff89ea4
UD
408 /* If set, `re_match_2' does not return information about
409 subexpressions. */
a53d3f82 410 unsigned __REPB_PREFIX(no_sub) : 1;
2b83a2a4 411
2ff89ea4
UD
412 /* If set, a beginning-of-line anchor doesn't match at the beginning
413 of the string. */
a53d3f82 414 unsigned __REPB_PREFIX(not_bol) : 1;
2b83a2a4 415
2ff89ea4 416 /* Similarly for an end-of-line anchor. */
a53d3f82 417 unsigned __REPB_PREFIX(not_eol) : 1;
2b83a2a4 418
2ff89ea4 419 /* If true, an anchor at a newline matches. */
a53d3f82 420 unsigned __REPB_PREFIX(newline_anchor) : 1;
2b83a2a4
RM
421};
422
423typedef struct re_pattern_buffer regex_t;
424\f
425/* Type for byte offsets within the string. POSIX mandates this. */
426typedef int regoff_t;
427
428
a53d3f82 429#ifdef __USE_GNU
2b83a2a4
RM
430/* This is the structure we store register match data in. See
431 regex.texinfo for a full description of what registers match. */
432struct re_registers
433{
434 unsigned num_regs;
435 regoff_t *start;
436 regoff_t *end;
437};
438
439
440/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
441 `re_match_2' returns information about at least this many registers
442 the first time a `regs' structure is passed. */
a53d3f82
UD
443# ifndef RE_NREGS
444# define RE_NREGS 30
445# endif
2b83a2a4
RM
446#endif
447
448
449/* POSIX specification for registers. Aside from the different names than
450 `re_registers', POSIX uses an array of structures, instead of a
451 structure of arrays. */
452typedef struct
453{
454 regoff_t rm_so; /* Byte offset from string's start to substring's start. */
455 regoff_t rm_eo; /* Byte offset from string's start to substring's end. */
456} regmatch_t;
457\f
458/* Declarations for routines. */
459
a53d3f82 460#ifdef __USE_GNU
2b83a2a4
RM
461/* Sets the current default syntax to SYNTAX, and return the old syntax.
462 You can also simply assign to the `re_syntax_options' variable. */
2ff89ea4 463extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax);
2b83a2a4
RM
464
465/* Compile the regular expression PATTERN, with length LENGTH
466 and syntax given by the global `re_syntax_options', into the buffer
78e64fdc
RT
467 BUFFER. Return NULL if successful, and an error string if not.
468
469 To free the allocated storage, you must call `regfree' on BUFFER.
470 Note that the translate table must either have been initialised by
471 `regcomp', with a malloc'ed value, or set to NULL before calling
472 `regfree'. */
2ff89ea4
UD
473extern const char *re_compile_pattern (const char *__pattern, size_t __length,
474 struct re_pattern_buffer *__buffer);
2b83a2a4
RM
475
476
477/* Compile a fastmap for the compiled pattern in BUFFER; used to
478 accelerate searches. Return 0 if successful and -2 if was an
479 internal error. */
2ff89ea4 480extern int re_compile_fastmap (struct re_pattern_buffer *__buffer);
2b83a2a4
RM
481
482
483/* Search in the string STRING (with length LENGTH) for the pattern
484 compiled into BUFFER. Start searching at position START, for RANGE
485 characters. Return the starting position of the match, -1 for no
486 match, or -2 for an internal error. Also return register
487 information in REGS (if REGS and BUFFER->no_sub are nonzero). */
2ff89ea4
UD
488extern int re_search (struct re_pattern_buffer *__buffer, const char *__string,
489 int __length, int __start, int __range,
490 struct re_registers *__regs);
2b83a2a4
RM
491
492
493/* Like `re_search', but search in the concatenation of STRING1 and
494 STRING2. Also, stop searching at index START + STOP. */
2ff89ea4
UD
495extern int re_search_2 (struct re_pattern_buffer *__buffer,
496 const char *__string1, int __length1,
497 const char *__string2, int __length2, int __start,
498 int __range, struct re_registers *__regs, int __stop);
2b83a2a4
RM
499
500
501/* Like `re_search', but return how many characters in STRING the regexp
502 in BUFFER matched, starting at position START. */
2ff89ea4
UD
503extern int re_match (struct re_pattern_buffer *__buffer, const char *__string,
504 int __length, int __start, struct re_registers *__regs);
2b83a2a4
RM
505
506
507/* Relates to `re_match' as `re_search_2' relates to `re_search'. */
2ff89ea4
UD
508extern int re_match_2 (struct re_pattern_buffer *__buffer,
509 const char *__string1, int __length1,
510 const char *__string2, int __length2, int __start,
511 struct re_registers *__regs, int __stop);
2b83a2a4
RM
512
513
514/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
515 ENDS. Subsequent matches using BUFFER and REGS will use this memory
516 for recording register information. STARTS and ENDS must be
517 allocated with malloc, and must each be at least `NUM_REGS * sizeof
518 (regoff_t)' bytes long.
519
520 If NUM_REGS == 0, then subsequent matches should allocate their own
521 register data.
522
523 Unless this function is called, the first search or match using
524 PATTERN_BUFFER will allocate its own register data, without
525 freeing the old data. */
2ff89ea4
UD
526extern void re_set_registers (struct re_pattern_buffer *__buffer,
527 struct re_registers *__regs,
528 unsigned int __num_regs,
529 regoff_t *__starts, regoff_t *__ends);
a53d3f82 530#endif /* Use GNU */
2b83a2a4 531
a53d3f82 532#if defined _REGEX_RE_COMP || (defined _LIBC && defined __USE_BSD)
6973fc01 533# ifndef _CRAY
2b83a2a4 534/* 4.2 bsd compatibility. */
2d87db5b
UD
535extern char *re_comp (const char *);
536extern int re_exec (const char *);
6973fc01 537# endif
4cca6b86 538#endif
2b83a2a4 539
7ca404ad
UD
540/* GCC 2.95 and later have "__restrict"; C99 compilers have
541 "restrict", and "configure" may have defined "restrict". */
542#ifndef __restrict
543# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__))
544# if defined restrict || 199901L <= __STDC_VERSION__
545# define __restrict restrict
546# else
547# define __restrict
548# endif
549# endif
da2a3ca6 550#endif
8343eaee
UD
551/* gcc 3.1 and up support the [restrict] syntax. */
552#ifndef __restrict_arr
5631e740
UD
553# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) \
554 && !defined __GNUG__
8343eaee
UD
555# define __restrict_arr __restrict
556# else
557# define __restrict_arr
558# endif
559#endif
7ca404ad 560
2b83a2a4 561/* POSIX compatibility. */
2d87db5b
UD
562extern int regcomp (regex_t *__restrict __preg,
563 const char *__restrict __pattern,
564 int __cflags);
30baa360 565
2d87db5b
UD
566extern int regexec (const regex_t *__restrict __preg,
567 const char *__restrict __string, size_t __nmatch,
568 regmatch_t __pmatch[__restrict_arr],
569 int __eflags);
30baa360 570
2d87db5b
UD
571extern size_t regerror (int __errcode, const regex_t *__restrict __preg,
572 char *__restrict __errbuf, size_t __errbuf_size);
30baa360 573
2d87db5b 574extern void regfree (regex_t *__preg);
2b83a2a4 575
1fb05e3d
UD
576
577#ifdef __cplusplus
578}
579#endif /* C++ */
580
5107cf1d 581#endif /* regex.h */