1 /* Extended regular expression matching and search library.
2 Copyright (C) 2002, 2003 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
21 static reg_errcode_t
re_compile_internal (regex_t
*preg
, const char * pattern
,
22 int length
, reg_syntax_t syntax
);
23 static void re_compile_fastmap_iter (regex_t
*bufp
,
24 const re_dfastate_t
*init_state
,
26 static reg_errcode_t
init_dfa (re_dfa_t
*dfa
, int pat_len
);
27 static reg_errcode_t
init_word_char (re_dfa_t
*dfa
);
29 static void free_charset (re_charset_t
*cset
);
30 #endif /* RE_ENABLE_I18N */
31 static void free_workarea_compile (regex_t
*preg
);
32 static reg_errcode_t
create_initial_state (re_dfa_t
*dfa
);
34 static void optimize_utf8 (re_dfa_t
*dfa
);
36 static reg_errcode_t
analyze (re_dfa_t
*dfa
);
37 static reg_errcode_t
analyze_tree (re_dfa_t
*dfa
, bin_tree_t
*node
);
38 static void calc_first (re_dfa_t
*dfa
, bin_tree_t
*node
);
39 static void calc_next (re_dfa_t
*dfa
, bin_tree_t
*node
);
40 static void calc_epsdest (re_dfa_t
*dfa
, bin_tree_t
*node
);
41 static reg_errcode_t
duplicate_node_closure (re_dfa_t
*dfa
, int top_org_node
,
42 int top_clone_node
, int root_node
,
43 unsigned int constraint
);
44 static reg_errcode_t
duplicate_node (int *new_idx
, re_dfa_t
*dfa
, int org_idx
,
45 unsigned int constraint
);
46 static int search_duplicated_node (re_dfa_t
*dfa
, int org_node
,
47 unsigned int constraint
);
48 static reg_errcode_t
calc_eclosure (re_dfa_t
*dfa
);
49 static reg_errcode_t
calc_eclosure_iter (re_node_set
*new_set
, re_dfa_t
*dfa
,
51 static void calc_inveclosure (re_dfa_t
*dfa
);
52 static int fetch_number (re_string_t
*input
, re_token_t
*token
,
54 static re_token_t
fetch_token (re_string_t
*input
, reg_syntax_t syntax
);
55 static int peek_token (re_token_t
*token
, re_string_t
*input
,
57 static int peek_token_bracket (re_token_t
*token
, re_string_t
*input
,
59 static bin_tree_t
*parse (re_string_t
*regexp
, regex_t
*preg
,
60 reg_syntax_t syntax
, reg_errcode_t
*err
);
61 static bin_tree_t
*parse_reg_exp (re_string_t
*regexp
, regex_t
*preg
,
62 re_token_t
*token
, reg_syntax_t syntax
,
63 int nest
, reg_errcode_t
*err
);
64 static bin_tree_t
*parse_branch (re_string_t
*regexp
, regex_t
*preg
,
65 re_token_t
*token
, reg_syntax_t syntax
,
66 int nest
, reg_errcode_t
*err
);
67 static bin_tree_t
*parse_expression (re_string_t
*regexp
, regex_t
*preg
,
68 re_token_t
*token
, reg_syntax_t syntax
,
69 int nest
, reg_errcode_t
*err
);
70 static bin_tree_t
*parse_sub_exp (re_string_t
*regexp
, regex_t
*preg
,
71 re_token_t
*token
, reg_syntax_t syntax
,
72 int nest
, reg_errcode_t
*err
);
73 static bin_tree_t
*parse_dup_op (bin_tree_t
*dup_elem
, re_string_t
*regexp
,
74 re_dfa_t
*dfa
, re_token_t
*token
,
75 reg_syntax_t syntax
, reg_errcode_t
*err
);
76 static bin_tree_t
*parse_bracket_exp (re_string_t
*regexp
, re_dfa_t
*dfa
,
77 re_token_t
*token
, reg_syntax_t syntax
,
79 static reg_errcode_t
parse_bracket_element (bracket_elem_t
*elem
,
81 re_token_t
*token
, int token_len
,
85 static reg_errcode_t
parse_bracket_symbol (bracket_elem_t
*elem
,
89 # ifdef RE_ENABLE_I18N
90 static reg_errcode_t
build_range_exp (re_bitset_ptr_t sbcset
,
91 re_charset_t
*mbcset
, int *range_alloc
,
92 bracket_elem_t
*start_elem
,
93 bracket_elem_t
*end_elem
);
94 static reg_errcode_t
build_collating_symbol (re_bitset_ptr_t sbcset
,
97 const unsigned char *name
);
98 # else /* not RE_ENABLE_I18N */
99 static reg_errcode_t
build_range_exp (re_bitset_ptr_t sbcset
,
100 bracket_elem_t
*start_elem
,
101 bracket_elem_t
*end_elem
);
102 static reg_errcode_t
build_collating_symbol (re_bitset_ptr_t sbcset
,
103 const unsigned char *name
);
104 # endif /* not RE_ENABLE_I18N */
105 #endif /* not _LIBC */
106 #ifdef RE_ENABLE_I18N
107 static reg_errcode_t
build_equiv_class (re_bitset_ptr_t sbcset
,
108 re_charset_t
*mbcset
,
109 int *equiv_class_alloc
,
110 const unsigned char *name
);
111 static reg_errcode_t
build_charclass (RE_TRANSLATE_TYPE trans
,
112 re_bitset_ptr_t sbcset
,
113 re_charset_t
*mbcset
,
114 int *char_class_alloc
,
115 const unsigned char *class_name
,
116 reg_syntax_t syntax
);
117 #else /* not RE_ENABLE_I18N */
118 static reg_errcode_t
build_equiv_class (re_bitset_ptr_t sbcset
,
119 const unsigned char *name
);
120 static reg_errcode_t
build_charclass (RE_TRANSLATE_TYPE trans
,
121 re_bitset_ptr_t sbcset
,
122 const unsigned char *class_name
,
123 reg_syntax_t syntax
);
124 #endif /* not RE_ENABLE_I18N */
125 static bin_tree_t
*build_charclass_op (re_dfa_t
*dfa
, RE_TRANSLATE_TYPE trans
,
126 const unsigned char *class_name
,
127 const unsigned char *extra
, int not,
129 static void free_bin_tree (bin_tree_t
*tree
);
130 static bin_tree_t
*create_tree (bin_tree_t
*left
, bin_tree_t
*right
,
131 re_token_type_t type
, int index
);
132 static bin_tree_t
*duplicate_tree (const bin_tree_t
*src
, re_dfa_t
*dfa
);
134 /* This table gives an error message for each of the error codes listed
135 in regex.h. Obviously the order here has to be same as there.
136 POSIX doesn't require that we do anything for REG_NOERROR,
137 but why not be nice? */
139 const char __re_error_msgid
[] attribute_hidden
=
141 #define REG_NOERROR_IDX 0
142 gettext_noop ("Success") /* REG_NOERROR */
144 #define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
145 gettext_noop ("No match") /* REG_NOMATCH */
147 #define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match")
148 gettext_noop ("Invalid regular expression") /* REG_BADPAT */
150 #define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
151 gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
153 #define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character")
154 gettext_noop ("Invalid character class name") /* REG_ECTYPE */
156 #define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name")
157 gettext_noop ("Trailing backslash") /* REG_EESCAPE */
159 #define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash")
160 gettext_noop ("Invalid back reference") /* REG_ESUBREG */
162 #define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference")
163 gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */
165 #define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
166 gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
168 #define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
169 gettext_noop ("Unmatched \\{") /* REG_EBRACE */
171 #define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{")
172 gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
174 #define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
175 gettext_noop ("Invalid range end") /* REG_ERANGE */
177 #define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end")
178 gettext_noop ("Memory exhausted") /* REG_ESPACE */
180 #define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted")
181 gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
183 #define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
184 gettext_noop ("Premature end of regular expression") /* REG_EEND */
186 #define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression")
187 gettext_noop ("Regular expression too big") /* REG_ESIZE */
189 #define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big")
190 gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
193 const size_t __re_error_msgid_idx
[] attribute_hidden
=
214 /* Entry points for GNU code. */
216 /* re_compile_pattern is the GNU regular expression compiler: it
217 compiles PATTERN (of length LENGTH) and puts the result in BUFP.
218 Returns 0 if the pattern was valid, otherwise an error string.
220 Assumes the `allocated' (and perhaps `buffer') and `translate' fields
221 are set in BUFP on entry. */
224 re_compile_pattern (pattern
, length
, bufp
)
227 struct re_pattern_buffer
*bufp
;
231 /* And GNU code determines whether or not to get register information
232 by passing null for the REGS argument to re_match, etc., not by
236 /* Match anchors at newline. */
237 bufp
->newline_anchor
= 1;
239 ret
= re_compile_internal (bufp
, pattern
, length
, re_syntax_options
);
243 return gettext (__re_error_msgid
+ __re_error_msgid_idx
[(int) ret
]);
246 weak_alias (__re_compile_pattern
, re_compile_pattern
)
249 /* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
250 also be assigned to arbitrarily: each pattern buffer stores its own
251 syntax, so it can be changed between regex compilations. */
252 /* This has no initializer because initialized variables in Emacs
253 become read-only after dumping. */
254 reg_syntax_t re_syntax_options
;
257 /* Specify the precise syntax of regexps for compilation. This provides
258 for compatibility for various utilities which historically have
259 different, incompatible syntaxes.
261 The argument SYNTAX is a bit mask comprised of the various bits
262 defined in regex.h. We return the old syntax. */
265 re_set_syntax (syntax
)
268 reg_syntax_t ret
= re_syntax_options
;
270 re_syntax_options
= syntax
;
274 weak_alias (__re_set_syntax
, re_set_syntax
)
278 re_compile_fastmap (bufp
)
279 struct re_pattern_buffer
*bufp
;
281 re_dfa_t
*dfa
= (re_dfa_t
*) bufp
->buffer
;
282 char *fastmap
= bufp
->fastmap
;
284 memset (fastmap
, '\0', sizeof (char) * SBC_MAX
);
285 re_compile_fastmap_iter (bufp
, dfa
->init_state
, fastmap
);
286 if (dfa
->init_state
!= dfa
->init_state_word
)
287 re_compile_fastmap_iter (bufp
, dfa
->init_state_word
, fastmap
);
288 if (dfa
->init_state
!= dfa
->init_state_nl
)
289 re_compile_fastmap_iter (bufp
, dfa
->init_state_nl
, fastmap
);
290 if (dfa
->init_state
!= dfa
->init_state_begbuf
)
291 re_compile_fastmap_iter (bufp
, dfa
->init_state_begbuf
, fastmap
);
292 bufp
->fastmap_accurate
= 1;
296 weak_alias (__re_compile_fastmap
, re_compile_fastmap
)
300 __attribute ((always_inline
))
301 re_set_fastmap (char *fastmap
, int icase
, int ch
)
305 fastmap
[tolower (ch
)] = 1;
308 /* Helper function for re_compile_fastmap.
309 Compile fastmap for the initial_state INIT_STATE. */
312 re_compile_fastmap_iter (bufp
, init_state
, fastmap
)
314 const re_dfastate_t
*init_state
;
317 re_dfa_t
*dfa
= (re_dfa_t
*) bufp
->buffer
;
319 int icase
= (dfa
->mb_cur_max
== 1 && (bufp
->syntax
& RE_ICASE
));
320 for (node_cnt
= 0; node_cnt
< init_state
->nodes
.nelem
; ++node_cnt
)
322 int node
= init_state
->nodes
.elems
[node_cnt
];
323 re_token_type_t type
= dfa
->nodes
[node
].type
;
325 if (type
== CHARACTER
)
327 re_set_fastmap (fastmap
, icase
, dfa
->nodes
[node
].opr
.c
);
328 #ifdef RE_ENABLE_I18N
329 if ((bufp
->syntax
& RE_ICASE
) && dfa
->mb_cur_max
> 1)
331 unsigned char *buf
= alloca (dfa
->mb_cur_max
), *p
;
336 *p
++ = dfa
->nodes
[node
].opr
.c
;
337 while (++node
< dfa
->nodes_len
338 && dfa
->nodes
[node
].type
== CHARACTER
339 && dfa
->nodes
[node
].mb_partial
)
340 *p
++ = dfa
->nodes
[node
].opr
.c
;
341 memset (&state
, 0, sizeof (state
));
342 if (mbrtowc (&wc
, (const char *) buf
, p
- buf
,
344 && __wcrtomb ((char *) buf
, towlower (wc
), &state
) > 0)
345 re_set_fastmap (fastmap
, 0, buf
[0]);
349 else if (type
== SIMPLE_BRACKET
)
352 for (i
= 0, ch
= 0; i
< BITSET_UINTS
; ++i
)
353 for (j
= 0; j
< UINT_BITS
; ++j
, ++ch
)
354 if (dfa
->nodes
[node
].opr
.sbcset
[i
] & (1 << j
))
355 re_set_fastmap (fastmap
, icase
, ch
);
357 #ifdef RE_ENABLE_I18N
358 else if (type
== COMPLEX_BRACKET
)
361 re_charset_t
*cset
= dfa
->nodes
[node
].opr
.mbcset
;
362 if (cset
->non_match
|| cset
->ncoll_syms
|| cset
->nequiv_classes
363 || cset
->nranges
|| cset
->nchar_classes
)
366 if (_NL_CURRENT_WORD (LC_COLLATE
, _NL_COLLATE_NRULES
) != 0)
368 /* In this case we want to catch the bytes which are
369 the first byte of any collation elements.
370 e.g. In da_DK, we want to catch 'a' since "aa"
371 is a valid collation element, and don't catch
372 'b' since 'b' is the only collation element
373 which starts from 'b'. */
375 const int32_t *table
= (const int32_t *)
376 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_TABLEMB
);
377 for (i
= 0, ch
= 0; i
< BITSET_UINTS
; ++i
)
378 for (j
= 0; j
< UINT_BITS
; ++j
, ++ch
)
380 re_set_fastmap (fastmap
, icase
, ch
);
383 if (dfa
->mb_cur_max
> 1)
384 for (i
= 0; i
< SBC_MAX
; ++i
)
385 if (__btowc (i
) == WEOF
)
386 re_set_fastmap (fastmap
, icase
, i
);
387 # endif /* not _LIBC */
389 for (i
= 0; i
< cset
->nmbchars
; ++i
)
393 memset (&state
, '\0', sizeof (state
));
394 __wcrtomb (buf
, cset
->mbchars
[i
], &state
);
395 re_set_fastmap (fastmap
, icase
, *(unsigned char *) buf
);
396 if ((bufp
->syntax
& RE_ICASE
) && dfa
->mb_cur_max
> 1)
398 __wcrtomb (buf
, towlower (cset
->mbchars
[i
]), &state
);
399 re_set_fastmap (fastmap
, 0, *(unsigned char *) buf
);
403 #endif /* RE_ENABLE_I18N */
404 else if (type
== END_OF_RE
|| type
== OP_PERIOD
)
406 memset (fastmap
, '\1', sizeof (char) * SBC_MAX
);
407 if (type
== END_OF_RE
)
408 bufp
->can_be_null
= 1;
414 /* Entry point for POSIX code. */
415 /* regcomp takes a regular expression as a string and compiles it.
417 PREG is a regex_t *. We do not expect any fields to be initialized,
418 since POSIX says we shouldn't. Thus, we set
420 `buffer' to the compiled pattern;
421 `used' to the length of the compiled pattern;
422 `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
423 REG_EXTENDED bit in CFLAGS is set; otherwise, to
424 RE_SYNTAX_POSIX_BASIC;
425 `newline_anchor' to REG_NEWLINE being set in CFLAGS;
426 `fastmap' to an allocated space for the fastmap;
427 `fastmap_accurate' to zero;
428 `re_nsub' to the number of subexpressions in PATTERN.
430 PATTERN is the address of the pattern string.
432 CFLAGS is a series of bits which affect compilation.
434 If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
435 use POSIX basic syntax.
437 If REG_NEWLINE is set, then . and [^...] don't match newline.
438 Also, regexec will try a match beginning after every newline.
440 If REG_ICASE is set, then we considers upper- and lowercase
441 versions of letters to be equivalent when matching.
443 If REG_NOSUB is set, then when PREG is passed to regexec, that
444 routine will report only success or failure, and nothing about the
447 It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
448 the return codes and their meanings.) */
451 regcomp (preg
, pattern
, cflags
)
452 regex_t
*__restrict preg
;
453 const char *__restrict pattern
;
457 reg_syntax_t syntax
= ((cflags
& REG_EXTENDED
) ? RE_SYNTAX_POSIX_EXTENDED
458 : RE_SYNTAX_POSIX_BASIC
);
464 /* Try to allocate space for the fastmap. */
465 preg
->fastmap
= re_malloc (char, SBC_MAX
);
466 if (BE (preg
->fastmap
== NULL
, 0))
469 syntax
|= (cflags
& REG_ICASE
) ? RE_ICASE
: 0;
471 /* If REG_NEWLINE is set, newlines are treated differently. */
472 if (cflags
& REG_NEWLINE
)
473 { /* REG_NEWLINE implies neither . nor [^...] match newline. */
474 syntax
&= ~RE_DOT_NEWLINE
;
475 syntax
|= RE_HAT_LISTS_NOT_NEWLINE
;
476 /* It also changes the matching behavior. */
477 preg
->newline_anchor
= 1;
480 preg
->newline_anchor
= 0;
481 preg
->no_sub
= !!(cflags
& REG_NOSUB
);
482 preg
->translate
= NULL
;
484 ret
= re_compile_internal (preg
, pattern
, strlen (pattern
), syntax
);
486 /* POSIX doesn't distinguish between an unmatched open-group and an
487 unmatched close-group: both are REG_EPAREN. */
488 if (ret
== REG_ERPAREN
)
491 /* We have already checked preg->fastmap != NULL. */
492 if (BE (ret
== REG_NOERROR
, 1))
493 /* Compute the fastmap now, since regexec cannot modify the pattern
494 buffer. This function nevers fails in this implementation. */
495 (void) re_compile_fastmap (preg
);
498 /* Some error occurred while compiling the expression. */
499 re_free (preg
->fastmap
);
500 preg
->fastmap
= NULL
;
506 weak_alias (__regcomp
, regcomp
)
509 /* Returns a message corresponding to an error code, ERRCODE, returned
510 from either regcomp or regexec. We don't use PREG here. */
513 regerror (errcode
, preg
, errbuf
, errbuf_size
)
523 || errcode
>= (int) (sizeof (__re_error_msgid_idx
)
524 / sizeof (__re_error_msgid_idx
[0])), 0))
525 /* Only error codes returned by the rest of the code should be passed
526 to this routine. If we are given anything else, or if other regex
527 code generates an invalid error code, then the program has a bug.
528 Dump core so we can fix it. */
531 msg
= gettext (__re_error_msgid
+ __re_error_msgid_idx
[errcode
]);
533 msg_size
= strlen (msg
) + 1; /* Includes the null. */
535 if (BE (errbuf_size
!= 0, 1))
537 if (BE (msg_size
> errbuf_size
, 0))
539 #if defined HAVE_MEMPCPY || defined _LIBC
540 *((char *) __mempcpy (errbuf
, msg
, errbuf_size
- 1)) = '\0';
542 memcpy (errbuf
, msg
, errbuf_size
- 1);
543 errbuf
[errbuf_size
- 1] = 0;
547 memcpy (errbuf
, msg
, msg_size
);
553 weak_alias (__regerror
, regerror
)
558 free_dfa_content (re_dfa_t
*dfa
)
562 re_free (dfa
->subexps
);
564 for (i
= 0; i
< dfa
->nodes_len
; ++i
)
566 re_token_t
*node
= dfa
->nodes
+ i
;
567 #ifdef RE_ENABLE_I18N
568 if (node
->type
== COMPLEX_BRACKET
&& node
->duplicated
== 0)
569 free_charset (node
->opr
.mbcset
);
571 #endif /* RE_ENABLE_I18N */
572 if (node
->type
== SIMPLE_BRACKET
&& node
->duplicated
== 0)
573 re_free (node
->opr
.sbcset
);
575 re_free (dfa
->nexts
);
576 for (i
= 0; i
< dfa
->nodes_len
; ++i
)
578 if (dfa
->eclosures
!= NULL
)
579 re_node_set_free (dfa
->eclosures
+ i
);
580 if (dfa
->inveclosures
!= NULL
)
581 re_node_set_free (dfa
->inveclosures
+ i
);
582 if (dfa
->edests
!= NULL
)
583 re_node_set_free (dfa
->edests
+ i
);
585 re_free (dfa
->edests
);
586 re_free (dfa
->eclosures
);
587 re_free (dfa
->inveclosures
);
588 re_free (dfa
->nodes
);
590 for (i
= 0; i
<= dfa
->state_hash_mask
; ++i
)
592 struct re_state_table_entry
*entry
= dfa
->state_table
+ i
;
593 for (j
= 0; j
< entry
->num
; ++j
)
595 re_dfastate_t
*state
= entry
->array
[j
];
598 re_free (entry
->array
);
600 re_free (dfa
->state_table
);
602 if (dfa
->word_char
!= NULL
)
603 re_free (dfa
->word_char
);
605 re_free (dfa
->re_str
);
612 /* Free dynamically allocated space used by PREG. */
618 re_dfa_t
*dfa
= (re_dfa_t
*) preg
->buffer
;
619 if (BE (dfa
!= NULL
, 1))
620 free_dfa_content (dfa
);
622 re_free (preg
->fastmap
);
625 weak_alias (__regfree
, regfree
)
628 /* Entry points compatible with 4.2 BSD regex library. We don't define
629 them unless specifically requested. */
631 #if defined _REGEX_RE_COMP || defined _LIBC
633 /* BSD has one and only one pattern buffer. */
634 static struct re_pattern_buffer re_comp_buf
;
638 /* Make these definitions weak in libc, so POSIX programs can redefine
639 these names if they don't use our functions, and still use
640 regcomp/regexec above without link errors. */
651 if (!re_comp_buf
.buffer
)
652 return gettext ("No previous regular expression");
656 if (re_comp_buf
.buffer
)
658 fastmap
= re_comp_buf
.fastmap
;
659 re_comp_buf
.fastmap
= NULL
;
660 __regfree (&re_comp_buf
);
661 memset (&re_comp_buf
, '\0', sizeof (re_comp_buf
));
662 re_comp_buf
.fastmap
= fastmap
;
665 if (re_comp_buf
.fastmap
== NULL
)
667 re_comp_buf
.fastmap
= (char *) malloc (SBC_MAX
);
668 if (re_comp_buf
.fastmap
== NULL
)
669 return (char *) gettext (__re_error_msgid
670 + __re_error_msgid_idx
[(int) REG_ESPACE
]);
673 /* Since `re_exec' always passes NULL for the `regs' argument, we
674 don't need to initialize the pattern buffer fields which affect it. */
676 /* Match anchors at newlines. */
677 re_comp_buf
.newline_anchor
= 1;
679 ret
= re_compile_internal (&re_comp_buf
, s
, strlen (s
), re_syntax_options
);
684 /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
685 return (char *) gettext (__re_error_msgid
+ __re_error_msgid_idx
[(int) ret
]);
689 libc_freeres_fn (free_mem
)
691 __regfree (&re_comp_buf
);
695 #endif /* _REGEX_RE_COMP */
697 /* Internal entry point.
698 Compile the regular expression PATTERN, whose length is LENGTH.
699 SYNTAX indicate regular expression's syntax. */
702 re_compile_internal (preg
, pattern
, length
, syntax
)
704 const char * pattern
;
708 reg_errcode_t err
= REG_NOERROR
;
712 /* Initialize the pattern buffer. */
713 preg
->fastmap_accurate
= 0;
714 preg
->syntax
= syntax
;
715 preg
->not_bol
= preg
->not_eol
= 0;
718 preg
->can_be_null
= 0;
719 preg
->regs_allocated
= REGS_UNALLOCATED
;
721 /* Initialize the dfa. */
722 dfa
= (re_dfa_t
*) preg
->buffer
;
723 if (preg
->allocated
< sizeof (re_dfa_t
))
725 /* If zero allocated, but buffer is non-null, try to realloc
726 enough space. This loses if buffer's address is bogus, but
727 that is the user's responsibility. If ->buffer is NULL this
728 is a simple allocation. */
729 dfa
= re_realloc (preg
->buffer
, re_dfa_t
, 1);
732 preg
->allocated
= sizeof (re_dfa_t
);
734 preg
->buffer
= (unsigned char *) dfa
;
735 preg
->used
= sizeof (re_dfa_t
);
737 err
= init_dfa (dfa
, length
);
738 if (BE (err
!= REG_NOERROR
, 0))
746 dfa
->re_str
= re_malloc (char, length
+ 1);
747 strncpy (dfa
->re_str
, pattern
, length
+ 1);
750 err
= re_string_construct (®exp
, pattern
, length
, preg
->translate
,
751 syntax
& RE_ICASE
, dfa
->mb_cur_max
,
753 if (BE (err
!= REG_NOERROR
, 0))
761 /* Parse the regular expression, and build a structure tree. */
763 dfa
->str_tree
= parse (®exp
, preg
, syntax
, &err
);
764 if (BE (dfa
->str_tree
== NULL
, 0))
765 goto re_compile_internal_free_return
;
767 #ifdef RE_ENABLE_I18N
768 /* If possible, do searching in single byte encoding to speed things up. */
769 if (dfa
->is_utf8
&& !(syntax
& RE_ICASE
))
773 /* Analyze the tree and collect information which is necessary to
776 if (BE (err
!= REG_NOERROR
, 0))
777 goto re_compile_internal_free_return
;
779 /* Then create the initial state of the dfa. */
780 err
= create_initial_state (dfa
);
782 /* Release work areas. */
783 free_workarea_compile (preg
);
784 re_string_destruct (®exp
);
786 if (BE (err
!= REG_NOERROR
, 0))
788 re_compile_internal_free_return
:
789 free_dfa_content (dfa
);
797 /* Initialize DFA. We use the length of the regular expression PAT_LEN
798 as the initial length of some arrays. */
801 init_dfa (dfa
, pat_len
)
807 memset (dfa
, '\0', sizeof (re_dfa_t
));
809 dfa
->nodes_alloc
= pat_len
+ 1;
810 dfa
->nodes
= re_malloc (re_token_t
, dfa
->nodes_alloc
);
812 dfa
->states_alloc
= pat_len
+ 1;
814 /* table_size = 2 ^ ceil(log pat_len) */
815 for (table_size
= 1; table_size
> 0; table_size
<<= 1)
816 if (table_size
> pat_len
)
819 dfa
->state_table
= calloc (sizeof (struct re_state_table_entry
), table_size
);
820 dfa
->state_hash_mask
= table_size
- 1;
822 dfa
->subexps_alloc
= 1;
823 dfa
->subexps
= re_malloc (re_subexp_t
, dfa
->subexps_alloc
);
824 dfa
->word_char
= NULL
;
826 dfa
->mb_cur_max
= MB_CUR_MAX
;
828 if (dfa
->mb_cur_max
> 1
829 && strcmp (_NL_CURRENT (LC_CTYPE
, _NL_CTYPE_CODESET_NAME
), "UTF-8") == 0)
833 if (BE (dfa
->nodes
== NULL
|| dfa
->state_table
== NULL
834 || dfa
->subexps
== NULL
, 0))
836 /* We don't bother to free anything which was allocated. Very
837 soon the process will go down anyway. */
839 dfa
->state_table
= NULL
;
846 /* Initialize WORD_CHAR table, which indicate which character is
847 "word". In this case "word" means that it is the word construction
848 character used by some operators like "\<", "\>", etc. */
855 dfa
->word_char
= (re_bitset_ptr_t
) calloc (sizeof (bitset
), 1);
856 if (BE (dfa
->word_char
== NULL
, 0))
858 for (i
= 0, ch
= 0; i
< BITSET_UINTS
; ++i
)
859 for (j
= 0; j
< UINT_BITS
; ++j
, ++ch
)
860 if (isalnum (ch
) || ch
== '_')
861 dfa
->word_char
[i
] |= 1 << j
;
865 /* Free the work area which are only used while compiling. */
868 free_workarea_compile (preg
)
871 re_dfa_t
*dfa
= (re_dfa_t
*) preg
->buffer
;
872 free_bin_tree (dfa
->str_tree
);
873 dfa
->str_tree
= NULL
;
874 re_free (dfa
->org_indices
);
875 dfa
->org_indices
= NULL
;
878 /* Create initial states for all contexts. */
881 create_initial_state (dfa
)
886 re_node_set init_nodes
;
888 /* Initial states have the epsilon closure of the node which is
889 the first node of the regular expression. */
890 first
= dfa
->str_tree
->first
;
891 dfa
->init_node
= first
;
892 err
= re_node_set_init_copy (&init_nodes
, dfa
->eclosures
+ first
);
893 if (BE (err
!= REG_NOERROR
, 0))
896 /* The back-references which are in initial states can epsilon transit,
897 since in this case all of the subexpressions can be null.
898 Then we add epsilon closures of the nodes which are the next nodes of
899 the back-references. */
900 if (dfa
->nbackref
> 0)
901 for (i
= 0; i
< init_nodes
.nelem
; ++i
)
903 int node_idx
= init_nodes
.elems
[i
];
904 re_token_type_t type
= dfa
->nodes
[node_idx
].type
;
907 if (type
!= OP_BACK_REF
)
909 for (clexp_idx
= 0; clexp_idx
< init_nodes
.nelem
; ++clexp_idx
)
911 re_token_t
*clexp_node
;
912 clexp_node
= dfa
->nodes
+ init_nodes
.elems
[clexp_idx
];
913 if (clexp_node
->type
== OP_CLOSE_SUBEXP
914 && clexp_node
->opr
.idx
+ 1 == dfa
->nodes
[node_idx
].opr
.idx
)
917 if (clexp_idx
== init_nodes
.nelem
)
920 if (type
== OP_BACK_REF
)
922 int dest_idx
= dfa
->edests
[node_idx
].elems
[0];
923 if (!re_node_set_contains (&init_nodes
, dest_idx
))
925 re_node_set_merge (&init_nodes
, dfa
->eclosures
+ dest_idx
);
931 /* It must be the first time to invoke acquire_state. */
932 dfa
->init_state
= re_acquire_state_context (&err
, dfa
, &init_nodes
, 0);
933 /* We don't check ERR here, since the initial state must not be NULL. */
934 if (BE (dfa
->init_state
== NULL
, 0))
936 if (dfa
->init_state
->has_constraint
)
938 dfa
->init_state_word
= re_acquire_state_context (&err
, dfa
, &init_nodes
,
940 dfa
->init_state_nl
= re_acquire_state_context (&err
, dfa
, &init_nodes
,
942 dfa
->init_state_begbuf
= re_acquire_state_context (&err
, dfa
,
946 if (BE (dfa
->init_state_word
== NULL
|| dfa
->init_state_nl
== NULL
947 || dfa
->init_state_begbuf
== NULL
, 0))
951 dfa
->init_state_word
= dfa
->init_state_nl
952 = dfa
->init_state_begbuf
= dfa
->init_state
;
954 re_node_set_free (&init_nodes
);
958 #ifdef RE_ENABLE_I18N
959 /* If it is possible to do searching in single byte encoding instead of UTF-8
960 to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change
961 DFA nodes where needed. */
969 for (node
= 0; node
< dfa
->nodes_len
; ++node
)
970 switch (dfa
->nodes
[node
].type
)
973 /* Chars >= 0x80 are optimizable in some cases (e.g. when not
974 followed by DUP operator, not in bracket etc.).
975 For now punt on them all. */
976 if (dfa
->nodes
[node
].opr
.c
>= 0x80)
980 switch (dfa
->nodes
[node
].opr
.idx
)
988 /* Word anchors etc. cannot be handled. */
996 case OP_DUP_ASTERISK
:
997 case OP_DUP_QUESTION
:
1000 case OP_CLOSE_SUBEXP
:
1006 /* The search can be in single byte locale. */
1007 dfa
->mb_cur_max
= 1;
1009 dfa
->has_mb_node
= dfa
->nbackref
> 0;
1013 /* Analyze the structure tree, and calculate "first", "next", "edest",
1014 "eclosure", and "inveclosure". */
1016 static reg_errcode_t
1023 /* Allocate arrays. */
1024 dfa
->nexts
= re_malloc (int, dfa
->nodes_alloc
);
1025 dfa
->org_indices
= re_malloc (int, dfa
->nodes_alloc
);
1026 dfa
->edests
= re_malloc (re_node_set
, dfa
->nodes_alloc
);
1027 dfa
->eclosures
= re_malloc (re_node_set
, dfa
->nodes_alloc
);
1028 dfa
->inveclosures
= re_malloc (re_node_set
, dfa
->nodes_alloc
);
1029 if (BE (dfa
->nexts
== NULL
|| dfa
->org_indices
== NULL
|| dfa
->edests
== NULL
1030 || dfa
->eclosures
== NULL
|| dfa
->inveclosures
== NULL
, 0))
1032 /* Initialize them. */
1033 for (i
= 0; i
< dfa
->nodes_len
; ++i
)
1036 re_node_set_init_empty (dfa
->edests
+ i
);
1037 re_node_set_init_empty (dfa
->eclosures
+ i
);
1038 re_node_set_init_empty (dfa
->inveclosures
+ i
);
1041 ret
= analyze_tree (dfa
, dfa
->str_tree
);
1042 if (BE (ret
== REG_NOERROR
, 1))
1044 ret
= calc_eclosure (dfa
);
1045 if (ret
== REG_NOERROR
)
1046 calc_inveclosure (dfa
);
1051 /* Helper functions for analyze.
1052 This function calculate "first", "next", and "edest" for the subtree
1053 whose root is NODE. */
1055 static reg_errcode_t
1056 analyze_tree (dfa
, node
)
1061 if (node
->first
== -1)
1062 calc_first (dfa
, node
);
1063 if (node
->next
== -1)
1064 calc_next (dfa
, node
);
1065 if (node
->eclosure
.nelem
== 0)
1066 calc_epsdest (dfa
, node
);
1067 /* Calculate "first" etc. for the left child. */
1068 if (node
->left
!= NULL
)
1070 ret
= analyze_tree (dfa
, node
->left
);
1071 if (BE (ret
!= REG_NOERROR
, 0))
1074 /* Calculate "first" etc. for the right child. */
1075 if (node
->right
!= NULL
)
1077 ret
= analyze_tree (dfa
, node
->right
);
1078 if (BE (ret
!= REG_NOERROR
, 0))
1084 /* Calculate "first" for the node NODE. */
1086 calc_first (dfa
, node
)
1091 idx
= node
->node_idx
;
1092 type
= (node
->type
== 0) ? dfa
->nodes
[idx
].type
: node
->type
;
1097 case OP_OPEN_BRACKET
:
1098 case OP_CLOSE_BRACKET
:
1099 case OP_OPEN_DUP_NUM
:
1100 case OP_CLOSE_DUP_NUM
:
1101 case OP_NON_MATCH_LIST
:
1102 case OP_OPEN_COLL_ELEM
:
1103 case OP_CLOSE_COLL_ELEM
:
1104 case OP_OPEN_EQUIV_CLASS
:
1105 case OP_CLOSE_EQUIV_CLASS
:
1106 case OP_OPEN_CHAR_CLASS
:
1107 case OP_CLOSE_CHAR_CLASS
:
1108 /* These must not be appeared here. */
1114 case OP_DUP_ASTERISK
:
1115 case OP_DUP_QUESTION
:
1116 #ifdef RE_ENABLE_I18N
1117 case COMPLEX_BRACKET
:
1118 #endif /* RE_ENABLE_I18N */
1119 case SIMPLE_BRACKET
:
1122 case OP_OPEN_SUBEXP
:
1123 case OP_CLOSE_SUBEXP
:
1128 assert (node
->left
!= NULL
);
1130 if (node
->left
->first
== -1)
1131 calc_first (dfa
, node
->left
);
1132 node
->first
= node
->left
->first
;
1137 /* else fall through */
1140 assert (node
->left
!= NULL
);
1142 if (node
->left
->first
== -1)
1143 calc_first (dfa
, node
->left
);
1144 node
->first
= node
->left
->first
;
1149 /* Calculate "next" for the node NODE. */
1152 calc_next (dfa
, node
)
1157 bin_tree_t
*parent
= node
->parent
;
1161 idx
= node
->node_idx
;
1162 if (node
->type
== 0)
1163 dfa
->nexts
[idx
] = node
->next
;
1167 idx
= parent
->node_idx
;
1168 type
= (parent
->type
== 0) ? dfa
->nodes
[idx
].type
: parent
->type
;
1172 case OP_DUP_ASTERISK
:
1177 if (parent
->left
== node
)
1179 if (parent
->right
->first
== -1)
1180 calc_first (dfa
, parent
->right
);
1181 node
->next
= parent
->right
->first
;
1184 /* else fall through */
1186 if (parent
->next
== -1)
1187 calc_next (dfa
, parent
);
1188 node
->next
= parent
->next
;
1191 idx
= node
->node_idx
;
1192 if (node
->type
== 0)
1193 dfa
->nexts
[idx
] = node
->next
;
1196 /* Calculate "edest" for the node NODE. */
1199 calc_epsdest (dfa
, node
)
1204 idx
= node
->node_idx
;
1205 if (node
->type
== 0)
1207 if (dfa
->nodes
[idx
].type
== OP_DUP_ASTERISK
1208 || dfa
->nodes
[idx
].type
== OP_DUP_PLUS
1209 || dfa
->nodes
[idx
].type
== OP_DUP_QUESTION
)
1211 if (node
->left
->first
== -1)
1212 calc_first (dfa
, node
->left
);
1213 if (node
->next
== -1)
1214 calc_next (dfa
, node
);
1215 re_node_set_init_2 (dfa
->edests
+ idx
, node
->left
->first
,
1218 else if (dfa
->nodes
[idx
].type
== OP_ALT
)
1221 if (node
->left
!= NULL
)
1223 if (node
->left
->first
== -1)
1224 calc_first (dfa
, node
->left
);
1225 left
= node
->left
->first
;
1229 if (node
->next
== -1)
1230 calc_next (dfa
, node
);
1233 if (node
->right
!= NULL
)
1235 if (node
->right
->first
== -1)
1236 calc_first (dfa
, node
->right
);
1237 right
= node
->right
->first
;
1241 if (node
->next
== -1)
1242 calc_next (dfa
, node
);
1245 re_node_set_init_2 (dfa
->edests
+ idx
, left
, right
);
1247 else if (dfa
->nodes
[idx
].type
== ANCHOR
1248 || dfa
->nodes
[idx
].type
== OP_OPEN_SUBEXP
1249 || dfa
->nodes
[idx
].type
== OP_CLOSE_SUBEXP
1250 || dfa
->nodes
[idx
].type
== OP_BACK_REF
)
1251 re_node_set_init_1 (dfa
->edests
+ idx
, node
->next
);
1253 assert (!IS_EPSILON_NODE (dfa
->nodes
[idx
].type
));
1257 /* Duplicate the epsilon closure of the node ROOT_NODE.
1258 Note that duplicated nodes have constraint INIT_CONSTRAINT in addition
1259 to their own constraint. */
1261 static reg_errcode_t
1262 duplicate_node_closure (dfa
, top_org_node
, top_clone_node
, root_node
,
1265 int top_org_node
, top_clone_node
, root_node
;
1266 unsigned int init_constraint
;
1269 int org_node
, clone_node
, ret
;
1270 unsigned int constraint
= init_constraint
;
1271 for (org_node
= top_org_node
, clone_node
= top_clone_node
;;)
1273 int org_dest
, clone_dest
;
1274 if (dfa
->nodes
[org_node
].type
== OP_BACK_REF
)
1276 /* If the back reference epsilon-transit, its destination must
1277 also have the constraint. Then duplicate the epsilon closure
1278 of the destination of the back reference, and store it in
1279 edests of the back reference. */
1280 org_dest
= dfa
->nexts
[org_node
];
1281 re_node_set_empty (dfa
->edests
+ clone_node
);
1282 err
= duplicate_node (&clone_dest
, dfa
, org_dest
, constraint
);
1283 if (BE (err
!= REG_NOERROR
, 0))
1285 dfa
->nexts
[clone_node
] = dfa
->nexts
[org_node
];
1286 ret
= re_node_set_insert (dfa
->edests
+ clone_node
, clone_dest
);
1287 if (BE (ret
< 0, 0))
1290 else if (dfa
->edests
[org_node
].nelem
== 0)
1292 /* In case of the node can't epsilon-transit, don't duplicate the
1293 destination and store the original destination as the
1294 destination of the node. */
1295 dfa
->nexts
[clone_node
] = dfa
->nexts
[org_node
];
1298 else if (dfa
->edests
[org_node
].nelem
== 1)
1300 /* In case of the node can epsilon-transit, and it has only one
1302 org_dest
= dfa
->edests
[org_node
].elems
[0];
1303 re_node_set_empty (dfa
->edests
+ clone_node
);
1304 if (dfa
->nodes
[org_node
].type
== ANCHOR
)
1306 /* In case of the node has another constraint, append it. */
1307 if (org_node
== root_node
&& clone_node
!= org_node
)
1309 /* ...but if the node is root_node itself, it means the
1310 epsilon closure have a loop, then tie it to the
1311 destination of the root_node. */
1312 ret
= re_node_set_insert (dfa
->edests
+ clone_node
,
1314 if (BE (ret
< 0, 0))
1318 constraint
|= dfa
->nodes
[org_node
].opr
.ctx_type
;
1320 err
= duplicate_node (&clone_dest
, dfa
, org_dest
, constraint
);
1321 if (BE (err
!= REG_NOERROR
, 0))
1323 ret
= re_node_set_insert (dfa
->edests
+ clone_node
, clone_dest
);
1324 if (BE (ret
< 0, 0))
1327 else /* dfa->edests[org_node].nelem == 2 */
1329 /* In case of the node can epsilon-transit, and it has two
1330 destinations. E.g. '|', '*', '+', '?'. */
1331 org_dest
= dfa
->edests
[org_node
].elems
[0];
1332 re_node_set_empty (dfa
->edests
+ clone_node
);
1333 /* Search for a duplicated node which satisfies the constraint. */
1334 clone_dest
= search_duplicated_node (dfa
, org_dest
, constraint
);
1335 if (clone_dest
== -1)
1337 /* There are no such a duplicated node, create a new one. */
1338 err
= duplicate_node (&clone_dest
, dfa
, org_dest
, constraint
);
1339 if (BE (err
!= REG_NOERROR
, 0))
1341 ret
= re_node_set_insert (dfa
->edests
+ clone_node
, clone_dest
);
1342 if (BE (ret
< 0, 0))
1344 err
= duplicate_node_closure (dfa
, org_dest
, clone_dest
,
1345 root_node
, constraint
);
1346 if (BE (err
!= REG_NOERROR
, 0))
1351 /* There are a duplicated node which satisfy the constraint,
1352 use it to avoid infinite loop. */
1353 ret
= re_node_set_insert (dfa
->edests
+ clone_node
, clone_dest
);
1354 if (BE (ret
< 0, 0))
1358 org_dest
= dfa
->edests
[org_node
].elems
[1];
1359 err
= duplicate_node (&clone_dest
, dfa
, org_dest
, constraint
);
1360 if (BE (err
!= REG_NOERROR
, 0))
1362 ret
= re_node_set_insert (dfa
->edests
+ clone_node
, clone_dest
);
1363 if (BE (ret
< 0, 0))
1366 org_node
= org_dest
;
1367 clone_node
= clone_dest
;
1372 /* Search for a node which is duplicated from the node ORG_NODE, and
1373 satisfies the constraint CONSTRAINT. */
1376 search_duplicated_node (dfa
, org_node
, constraint
)
1379 unsigned int constraint
;
1382 for (idx
= dfa
->nodes_len
- 1; dfa
->nodes
[idx
].duplicated
&& idx
> 0; --idx
)
1384 if (org_node
== dfa
->org_indices
[idx
]
1385 && constraint
== dfa
->nodes
[idx
].constraint
)
1386 return idx
; /* Found. */
1388 return -1; /* Not found. */
1391 /* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
1392 The new index will be stored in NEW_IDX and return REG_NOERROR if succeeded,
1393 otherwise return the error code. */
1395 static reg_errcode_t
1396 duplicate_node (new_idx
, dfa
, org_idx
, constraint
)
1398 int *new_idx
, org_idx
;
1399 unsigned int constraint
;
1404 dup
= dfa
->nodes
[org_idx
];
1405 dup_idx
= re_dfa_add_node (dfa
, dup
, 1);
1406 if (BE (dup_idx
== -1, 0))
1408 dfa
->nodes
[dup_idx
].constraint
= constraint
;
1409 if (dfa
->nodes
[org_idx
].type
== ANCHOR
)
1410 dfa
->nodes
[dup_idx
].constraint
|= dfa
->nodes
[org_idx
].opr
.ctx_type
;
1411 dfa
->nodes
[dup_idx
].duplicated
= 1;
1412 re_node_set_init_empty (dfa
->edests
+ dup_idx
);
1413 re_node_set_init_empty (dfa
->eclosures
+ dup_idx
);
1414 re_node_set_init_empty (dfa
->inveclosures
+ dup_idx
);
1416 /* Store the index of the original node. */
1417 dfa
->org_indices
[dup_idx
] = org_idx
;
1423 calc_inveclosure (dfa
)
1427 for (src
= 0; src
< dfa
->nodes_len
; ++src
)
1429 for (idx
= 0; idx
< dfa
->eclosures
[src
].nelem
; ++idx
)
1431 dest
= dfa
->eclosures
[src
].elems
[idx
];
1432 re_node_set_insert (dfa
->inveclosures
+ dest
, src
);
1437 /* Calculate "eclosure" for all the node in DFA. */
1439 static reg_errcode_t
1443 int node_idx
, incomplete
;
1445 assert (dfa
->nodes_len
> 0);
1448 /* For each nodes, calculate epsilon closure. */
1449 for (node_idx
= 0; ; ++node_idx
)
1452 re_node_set eclosure_elem
;
1453 if (node_idx
== dfa
->nodes_len
)
1462 assert (dfa
->eclosures
[node_idx
].nelem
!= -1);
1464 /* If we have already calculated, skip it. */
1465 if (dfa
->eclosures
[node_idx
].nelem
!= 0)
1467 /* Calculate epsilon closure of `node_idx'. */
1468 err
= calc_eclosure_iter (&eclosure_elem
, dfa
, node_idx
, 1);
1469 if (BE (err
!= REG_NOERROR
, 0))
1472 if (dfa
->eclosures
[node_idx
].nelem
== 0)
1475 re_node_set_free (&eclosure_elem
);
1481 /* Calculate epsilon closure of NODE. */
1483 static reg_errcode_t
1484 calc_eclosure_iter (new_set
, dfa
, node
, root
)
1485 re_node_set
*new_set
;
1490 unsigned int constraint
;
1492 re_node_set eclosure
;
1494 err
= re_node_set_alloc (&eclosure
, dfa
->edests
[node
].nelem
+ 1);
1495 if (BE (err
!= REG_NOERROR
, 0))
1498 /* This indicates that we are calculating this node now.
1499 We reference this value to avoid infinite loop. */
1500 dfa
->eclosures
[node
].nelem
= -1;
1502 constraint
= ((dfa
->nodes
[node
].type
== ANCHOR
)
1503 ? dfa
->nodes
[node
].opr
.ctx_type
: 0);
1504 /* If the current node has constraints, duplicate all nodes.
1505 Since they must inherit the constraints. */
1506 if (constraint
&& !dfa
->nodes
[dfa
->edests
[node
].elems
[0]].duplicated
)
1508 int org_node
, cur_node
;
1509 org_node
= cur_node
= node
;
1510 err
= duplicate_node_closure (dfa
, node
, node
, node
, constraint
);
1511 if (BE (err
!= REG_NOERROR
, 0))
1515 /* Expand each epsilon destination nodes. */
1516 if (IS_EPSILON_NODE(dfa
->nodes
[node
].type
))
1517 for (i
= 0; i
< dfa
->edests
[node
].nelem
; ++i
)
1519 re_node_set eclosure_elem
;
1520 int edest
= dfa
->edests
[node
].elems
[i
];
1521 /* If calculating the epsilon closure of `edest' is in progress,
1522 return intermediate result. */
1523 if (dfa
->eclosures
[edest
].nelem
== -1)
1528 /* If we haven't calculated the epsilon closure of `edest' yet,
1529 calculate now. Otherwise use calculated epsilon closure. */
1530 if (dfa
->eclosures
[edest
].nelem
== 0)
1532 err
= calc_eclosure_iter (&eclosure_elem
, dfa
, edest
, 0);
1533 if (BE (err
!= REG_NOERROR
, 0))
1537 eclosure_elem
= dfa
->eclosures
[edest
];
1538 /* Merge the epsilon closure of `edest'. */
1539 re_node_set_merge (&eclosure
, &eclosure_elem
);
1540 /* If the epsilon closure of `edest' is incomplete,
1541 the epsilon closure of this node is also incomplete. */
1542 if (dfa
->eclosures
[edest
].nelem
== 0)
1545 re_node_set_free (&eclosure_elem
);
1549 /* Epsilon closures include itself. */
1550 re_node_set_insert (&eclosure
, node
);
1551 if (incomplete
&& !root
)
1552 dfa
->eclosures
[node
].nelem
= 0;
1554 dfa
->eclosures
[node
] = eclosure
;
1555 *new_set
= eclosure
;
1559 /* Functions for token which are used in the parser. */
1561 /* Fetch a token from INPUT.
1562 We must not use this function inside bracket expressions. */
1565 fetch_token (input
, syntax
)
1567 reg_syntax_t syntax
;
1571 consumed_byte
= peek_token (&token
, input
, syntax
);
1572 re_string_skip_bytes (input
, consumed_byte
);
1576 /* Peek a token from INPUT, and return the length of the token.
1577 We must not use this function inside bracket expressions. */
1580 peek_token (token
, input
, syntax
)
1583 reg_syntax_t syntax
;
1587 if (re_string_eoi (input
))
1589 token
->type
= END_OF_RE
;
1593 c
= re_string_peek_byte (input
, 0);
1596 #ifdef RE_ENABLE_I18N
1597 token
->mb_partial
= 0;
1598 if (input
->mb_cur_max
> 1 &&
1599 !re_string_first_byte (input
, re_string_cur_idx (input
)))
1601 token
->type
= CHARACTER
;
1602 token
->mb_partial
= 1;
1609 if (re_string_cur_idx (input
) + 1 >= re_string_length (input
))
1611 token
->type
= BACK_SLASH
;
1615 c2
= re_string_peek_byte_case (input
, 1);
1617 token
->type
= CHARACTER
;
1621 if (!(syntax
& RE_LIMITED_OPS
) && !(syntax
& RE_NO_BK_VBAR
))
1622 token
->type
= OP_ALT
;
1624 case '1': case '2': case '3': case '4': case '5':
1625 case '6': case '7': case '8': case '9':
1626 if (!(syntax
& RE_NO_BK_REFS
))
1628 token
->type
= OP_BACK_REF
;
1629 token
->opr
.idx
= c2
- '0';
1633 if (!(syntax
& RE_NO_GNU_OPS
))
1635 token
->type
= ANCHOR
;
1636 token
->opr
.idx
= WORD_FIRST
;
1640 if (!(syntax
& RE_NO_GNU_OPS
))
1642 token
->type
= ANCHOR
;
1643 token
->opr
.idx
= WORD_LAST
;
1647 if (!(syntax
& RE_NO_GNU_OPS
))
1649 token
->type
= ANCHOR
;
1650 token
->opr
.idx
= WORD_DELIM
;
1654 if (!(syntax
& RE_NO_GNU_OPS
))
1656 token
->type
= ANCHOR
;
1657 token
->opr
.idx
= INSIDE_WORD
;
1661 if (!(syntax
& RE_NO_GNU_OPS
))
1662 token
->type
= OP_WORD
;
1665 if (!(syntax
& RE_NO_GNU_OPS
))
1666 token
->type
= OP_NOTWORD
;
1669 if (!(syntax
& RE_NO_GNU_OPS
))
1670 token
->type
= OP_SPACE
;
1673 if (!(syntax
& RE_NO_GNU_OPS
))
1674 token
->type
= OP_NOTSPACE
;
1677 if (!(syntax
& RE_NO_GNU_OPS
))
1679 token
->type
= ANCHOR
;
1680 token
->opr
.idx
= BUF_FIRST
;
1684 if (!(syntax
& RE_NO_GNU_OPS
))
1686 token
->type
= ANCHOR
;
1687 token
->opr
.idx
= BUF_LAST
;
1691 if (!(syntax
& RE_NO_BK_PARENS
))
1692 token
->type
= OP_OPEN_SUBEXP
;
1695 if (!(syntax
& RE_NO_BK_PARENS
))
1696 token
->type
= OP_CLOSE_SUBEXP
;
1699 if (!(syntax
& RE_LIMITED_OPS
) && (syntax
& RE_BK_PLUS_QM
))
1700 token
->type
= OP_DUP_PLUS
;
1703 if (!(syntax
& RE_LIMITED_OPS
) && (syntax
& RE_BK_PLUS_QM
))
1704 token
->type
= OP_DUP_QUESTION
;
1707 if ((syntax
& RE_INTERVALS
) && (!(syntax
& RE_NO_BK_BRACES
)))
1708 token
->type
= OP_OPEN_DUP_NUM
;
1711 if ((syntax
& RE_INTERVALS
) && (!(syntax
& RE_NO_BK_BRACES
)))
1712 token
->type
= OP_CLOSE_DUP_NUM
;
1720 token
->type
= CHARACTER
;
1724 if (syntax
& RE_NEWLINE_ALT
)
1725 token
->type
= OP_ALT
;
1728 if (!(syntax
& RE_LIMITED_OPS
) && (syntax
& RE_NO_BK_VBAR
))
1729 token
->type
= OP_ALT
;
1732 token
->type
= OP_DUP_ASTERISK
;
1735 if (!(syntax
& RE_LIMITED_OPS
) && !(syntax
& RE_BK_PLUS_QM
))
1736 token
->type
= OP_DUP_PLUS
;
1739 if (!(syntax
& RE_LIMITED_OPS
) && !(syntax
& RE_BK_PLUS_QM
))
1740 token
->type
= OP_DUP_QUESTION
;
1743 if ((syntax
& RE_INTERVALS
) && (syntax
& RE_NO_BK_BRACES
))
1744 token
->type
= OP_OPEN_DUP_NUM
;
1747 if ((syntax
& RE_INTERVALS
) && (syntax
& RE_NO_BK_BRACES
))
1748 token
->type
= OP_CLOSE_DUP_NUM
;
1751 if (syntax
& RE_NO_BK_PARENS
)
1752 token
->type
= OP_OPEN_SUBEXP
;
1755 if (syntax
& RE_NO_BK_PARENS
)
1756 token
->type
= OP_CLOSE_SUBEXP
;
1759 token
->type
= OP_OPEN_BRACKET
;
1762 token
->type
= OP_PERIOD
;
1765 if (!(syntax
& (RE_CONTEXT_INDEP_ANCHORS
| RE_CARET_ANCHORS_HERE
)) &&
1766 re_string_cur_idx (input
) != 0)
1768 char prev
= re_string_peek_byte (input
, -1);
1769 if (!(syntax
& RE_NEWLINE_ALT
) || prev
!= '\n')
1772 token
->type
= ANCHOR
;
1773 token
->opr
.idx
= LINE_FIRST
;
1776 if (!(syntax
& RE_CONTEXT_INDEP_ANCHORS
) &&
1777 re_string_cur_idx (input
) + 1 != re_string_length (input
))
1780 re_string_skip_bytes (input
, 1);
1781 peek_token (&next
, input
, syntax
);
1782 re_string_skip_bytes (input
, -1);
1783 if (next
.type
!= OP_ALT
&& next
.type
!= OP_CLOSE_SUBEXP
)
1786 token
->type
= ANCHOR
;
1787 token
->opr
.idx
= LINE_LAST
;
1795 /* Peek a token from INPUT, and return the length of the token.
1796 We must not use this function out of bracket expressions. */
1799 peek_token_bracket (token
, input
, syntax
)
1802 reg_syntax_t syntax
;
1805 if (re_string_eoi (input
))
1807 token
->type
= END_OF_RE
;
1810 c
= re_string_peek_byte (input
, 0);
1813 #ifdef RE_ENABLE_I18N
1814 if (input
->mb_cur_max
> 1 &&
1815 !re_string_first_byte (input
, re_string_cur_idx (input
)))
1817 token
->type
= CHARACTER
;
1820 #endif /* RE_ENABLE_I18N */
1822 if (c
== '\\' && (syntax
& RE_BACKSLASH_ESCAPE_IN_LISTS
))
1824 /* In this case, '\' escape a character. */
1826 re_string_skip_bytes (input
, 1);
1827 c2
= re_string_peek_byte (input
, 0);
1829 token
->type
= CHARACTER
;
1832 if (c
== '[') /* '[' is a special char in a bracket exps. */
1836 c2
= re_string_peek_byte (input
, 1);
1842 token
->type
= OP_OPEN_COLL_ELEM
;
1845 token
->type
= OP_OPEN_EQUIV_CLASS
;
1848 if (syntax
& RE_CHAR_CLASSES
)
1850 token
->type
= OP_OPEN_CHAR_CLASS
;
1853 /* else fall through. */
1855 token
->type
= CHARACTER
;
1865 token
->type
= OP_CHARSET_RANGE
;
1868 token
->type
= OP_CLOSE_BRACKET
;
1871 token
->type
= OP_NON_MATCH_LIST
;
1874 token
->type
= CHARACTER
;
1879 /* Functions for parser. */
1881 /* Entry point of the parser.
1882 Parse the regular expression REGEXP and return the structure tree.
1883 If an error is occured, ERR is set by error code, and return NULL.
1884 This function build the following tree, from regular expression <reg_exp>:
1890 CAT means concatenation.
1891 EOR means end of regular expression. */
1894 parse (regexp
, preg
, syntax
, err
)
1895 re_string_t
*regexp
;
1897 reg_syntax_t syntax
;
1900 re_dfa_t
*dfa
= (re_dfa_t
*) preg
->buffer
;
1901 bin_tree_t
*tree
, *eor
, *root
;
1902 re_token_t current_token
;
1904 current_token
= fetch_token (regexp
, syntax
| RE_CARET_ANCHORS_HERE
);
1905 tree
= parse_reg_exp (regexp
, preg
, ¤t_token
, syntax
, 0, err
);
1906 if (BE (*err
!= REG_NOERROR
&& tree
== NULL
, 0))
1908 new_idx
= re_dfa_add_node (dfa
, current_token
, 0);
1909 eor
= create_tree (NULL
, NULL
, 0, new_idx
);
1911 root
= create_tree (tree
, eor
, CONCAT
, 0);
1914 if (BE (new_idx
== -1 || eor
== NULL
|| root
== NULL
, 0))
1922 /* This function build the following tree, from regular expression
1923 <branch1>|<branch2>:
1929 ALT means alternative, which represents the operator `|'. */
1932 parse_reg_exp (regexp
, preg
, token
, syntax
, nest
, err
)
1933 re_string_t
*regexp
;
1936 reg_syntax_t syntax
;
1940 re_dfa_t
*dfa
= (re_dfa_t
*) preg
->buffer
;
1941 bin_tree_t
*tree
, *branch
= NULL
;
1943 tree
= parse_branch (regexp
, preg
, token
, syntax
, nest
, err
);
1944 if (BE (*err
!= REG_NOERROR
&& tree
== NULL
, 0))
1947 while (token
->type
== OP_ALT
)
1949 re_token_t alt_token
= *token
;
1950 new_idx
= re_dfa_add_node (dfa
, alt_token
, 0);
1951 *token
= fetch_token (regexp
, syntax
| RE_CARET_ANCHORS_HERE
);
1952 if (token
->type
!= OP_ALT
&& token
->type
!= END_OF_RE
1953 && (nest
== 0 || token
->type
!= OP_CLOSE_SUBEXP
))
1955 branch
= parse_branch (regexp
, preg
, token
, syntax
, nest
, err
);
1956 if (BE (*err
!= REG_NOERROR
&& branch
== NULL
, 0))
1958 free_bin_tree (tree
);
1964 tree
= create_tree (tree
, branch
, 0, new_idx
);
1965 if (BE (new_idx
== -1 || tree
== NULL
, 0))
1970 dfa
->has_plural_match
= 1;
1975 /* This function build the following tree, from regular expression
1982 CAT means concatenation. */
1985 parse_branch (regexp
, preg
, token
, syntax
, nest
, err
)
1986 re_string_t
*regexp
;
1989 reg_syntax_t syntax
;
1993 bin_tree_t
*tree
, *exp
;
1994 tree
= parse_expression (regexp
, preg
, token
, syntax
, nest
, err
);
1995 if (BE (*err
!= REG_NOERROR
&& tree
== NULL
, 0))
1998 while (token
->type
!= OP_ALT
&& token
->type
!= END_OF_RE
1999 && (nest
== 0 || token
->type
!= OP_CLOSE_SUBEXP
))
2001 exp
= parse_expression (regexp
, preg
, token
, syntax
, nest
, err
);
2002 if (BE (*err
!= REG_NOERROR
&& exp
== NULL
, 0))
2004 free_bin_tree (tree
);
2007 if (tree
!= NULL
&& exp
!= NULL
)
2009 tree
= create_tree (tree
, exp
, CONCAT
, 0);
2016 else if (tree
== NULL
)
2018 /* Otherwise exp == NULL, we don't need to create new tree. */
2023 /* This function build the following tree, from regular expression a*:
2030 parse_expression (regexp
, preg
, token
, syntax
, nest
, err
)
2031 re_string_t
*regexp
;
2034 reg_syntax_t syntax
;
2038 re_dfa_t
*dfa
= (re_dfa_t
*) preg
->buffer
;
2041 switch (token
->type
)
2044 new_idx
= re_dfa_add_node (dfa
, *token
, 0);
2045 tree
= create_tree (NULL
, NULL
, 0, new_idx
);
2046 if (BE (new_idx
== -1 || tree
== NULL
, 0))
2051 #ifdef RE_ENABLE_I18N
2052 if (dfa
->mb_cur_max
> 1)
2054 while (!re_string_eoi (regexp
)
2055 && !re_string_first_byte (regexp
, re_string_cur_idx (regexp
)))
2057 bin_tree_t
*mbc_remain
;
2058 *token
= fetch_token (regexp
, syntax
);
2059 new_idx
= re_dfa_add_node (dfa
, *token
, 0);
2060 mbc_remain
= create_tree (NULL
, NULL
, 0, new_idx
);
2061 tree
= create_tree (tree
, mbc_remain
, CONCAT
, 0);
2062 if (BE (new_idx
== -1 || mbc_remain
== NULL
|| tree
== NULL
, 0))
2071 case OP_OPEN_SUBEXP
:
2072 tree
= parse_sub_exp (regexp
, preg
, token
, syntax
, nest
+ 1, err
);
2073 if (BE (*err
!= REG_NOERROR
&& tree
== NULL
, 0))
2076 case OP_OPEN_BRACKET
:
2077 tree
= parse_bracket_exp (regexp
, dfa
, token
, syntax
, err
);
2078 if (BE (*err
!= REG_NOERROR
&& tree
== NULL
, 0))
2082 if (BE (preg
->re_nsub
< token
->opr
.idx
2083 || dfa
->subexps
[token
->opr
.idx
- 1].end
== -1, 0))
2088 dfa
->used_bkref_map
|= 1 << (token
->opr
.idx
- 1);
2089 new_idx
= re_dfa_add_node (dfa
, *token
, 0);
2090 tree
= create_tree (NULL
, NULL
, 0, new_idx
);
2091 if (BE (new_idx
== -1 || tree
== NULL
, 0))
2097 dfa
->has_mb_node
= 1;
2099 case OP_OPEN_DUP_NUM
:
2100 if (syntax
& RE_CONTEXT_INVALID_DUP
)
2106 case OP_DUP_ASTERISK
:
2108 case OP_DUP_QUESTION
:
2109 if (syntax
& RE_CONTEXT_INVALID_OPS
)
2114 else if (syntax
& RE_CONTEXT_INDEP_OPS
)
2116 *token
= fetch_token (regexp
, syntax
);
2117 return parse_expression (regexp
, preg
, token
, syntax
, nest
, err
);
2119 /* else fall through */
2120 case OP_CLOSE_SUBEXP
:
2121 if ((token
->type
== OP_CLOSE_SUBEXP
) &&
2122 !(syntax
& RE_UNMATCHED_RIGHT_PAREN_ORD
))
2127 /* else fall through */
2128 case OP_CLOSE_DUP_NUM
:
2129 /* We treat it as a normal character. */
2131 /* Then we can these characters as normal characters. */
2132 token
->type
= CHARACTER
;
2133 new_idx
= re_dfa_add_node (dfa
, *token
, 0);
2134 tree
= create_tree (NULL
, NULL
, 0, new_idx
);
2135 if (BE (new_idx
== -1 || tree
== NULL
, 0))
2142 if (dfa
->word_char
== NULL
)
2144 *err
= init_word_char (dfa
);
2145 if (BE (*err
!= REG_NOERROR
, 0))
2148 if (token
->opr
.ctx_type
== WORD_DELIM
)
2150 bin_tree_t
*tree_first
, *tree_last
;
2151 int idx_first
, idx_last
;
2152 token
->opr
.ctx_type
= WORD_FIRST
;
2153 idx_first
= re_dfa_add_node (dfa
, *token
, 0);
2154 tree_first
= create_tree (NULL
, NULL
, 0, idx_first
);
2155 token
->opr
.ctx_type
= WORD_LAST
;
2156 idx_last
= re_dfa_add_node (dfa
, *token
, 0);
2157 tree_last
= create_tree (NULL
, NULL
, 0, idx_last
);
2158 token
->type
= OP_ALT
;
2159 new_idx
= re_dfa_add_node (dfa
, *token
, 0);
2160 tree
= create_tree (tree_first
, tree_last
, 0, new_idx
);
2161 if (BE (idx_first
== -1 || idx_last
== -1 || new_idx
== -1
2162 || tree_first
== NULL
|| tree_last
== NULL
2163 || tree
== NULL
, 0))
2171 new_idx
= re_dfa_add_node (dfa
, *token
, 0);
2172 tree
= create_tree (NULL
, NULL
, 0, new_idx
);
2173 if (BE (new_idx
== -1 || tree
== NULL
, 0))
2179 /* We must return here, since ANCHORs can't be followed
2180 by repetition operators.
2181 eg. RE"^*" is invalid or "<ANCHOR(^)><CHAR(*)>",
2182 it must not be "<ANCHOR(^)><REPEAT(*)>". */
2183 *token
= fetch_token (regexp
, syntax
);
2186 new_idx
= re_dfa_add_node (dfa
, *token
, 0);
2187 tree
= create_tree (NULL
, NULL
, 0, new_idx
);
2188 if (BE (new_idx
== -1 || tree
== NULL
, 0))
2193 if (dfa
->mb_cur_max
> 1)
2194 dfa
->has_mb_node
= 1;
2197 tree
= build_charclass_op (dfa
, regexp
->trans
, "alnum", "_", 0, err
);
2198 if (BE (*err
!= REG_NOERROR
&& tree
== NULL
, 0))
2202 tree
= build_charclass_op (dfa
, regexp
->trans
, "alnum", "_", 1, err
);
2203 if (BE (*err
!= REG_NOERROR
&& tree
== NULL
, 0))
2207 tree
= build_charclass_op (dfa
, regexp
->trans
, "space", "", 0, err
);
2208 if (BE (*err
!= REG_NOERROR
&& tree
== NULL
, 0))
2212 tree
= build_charclass_op (dfa
, regexp
->trans
, "space", "", 1, err
);
2213 if (BE (*err
!= REG_NOERROR
&& tree
== NULL
, 0))
2223 /* Must not happen? */
2229 *token
= fetch_token (regexp
, syntax
);
2231 while (token
->type
== OP_DUP_ASTERISK
|| token
->type
== OP_DUP_PLUS
2232 || token
->type
== OP_DUP_QUESTION
|| token
->type
== OP_OPEN_DUP_NUM
)
2234 tree
= parse_dup_op (tree
, regexp
, dfa
, token
, syntax
, err
);
2235 if (BE (*err
!= REG_NOERROR
&& tree
== NULL
, 0))
2237 /* In BRE consecutive duplications are not allowed. */
2238 if ((syntax
& RE_CONTEXT_INVALID_DUP
)
2239 && (token
->type
== OP_DUP_ASTERISK
2240 || token
->type
== OP_OPEN_DUP_NUM
))
2245 dfa
->has_plural_match
= 1;
2251 /* This function build the following tree, from regular expression
2259 parse_sub_exp (regexp
, preg
, token
, syntax
, nest
, err
)
2260 re_string_t
*regexp
;
2263 reg_syntax_t syntax
;
2267 re_dfa_t
*dfa
= (re_dfa_t
*) preg
->buffer
;
2268 bin_tree_t
*tree
, *left_par
, *right_par
;
2271 cur_nsub
= preg
->re_nsub
++;
2272 if (dfa
->subexps_alloc
< preg
->re_nsub
)
2274 re_subexp_t
*new_array
;
2275 dfa
->subexps_alloc
*= 2;
2276 new_array
= re_realloc (dfa
->subexps
, re_subexp_t
, dfa
->subexps_alloc
);
2277 if (BE (new_array
== NULL
, 0))
2279 dfa
->subexps_alloc
/= 2;
2283 dfa
->subexps
= new_array
;
2285 dfa
->subexps
[cur_nsub
].start
= dfa
->nodes_len
;
2286 dfa
->subexps
[cur_nsub
].end
= -1;
2288 new_idx
= re_dfa_add_node (dfa
, *token
, 0);
2289 left_par
= create_tree (NULL
, NULL
, 0, new_idx
);
2290 if (BE (new_idx
== -1 || left_par
== NULL
, 0))
2295 dfa
->nodes
[new_idx
].opr
.idx
= cur_nsub
;
2296 *token
= fetch_token (regexp
, syntax
| RE_CARET_ANCHORS_HERE
);
2298 /* The subexpression may be a null string. */
2299 if (token
->type
== OP_CLOSE_SUBEXP
)
2303 tree
= parse_reg_exp (regexp
, preg
, token
, syntax
, nest
, err
);
2304 if (BE (*err
!= REG_NOERROR
&& tree
== NULL
, 0))
2307 if (BE (token
->type
!= OP_CLOSE_SUBEXP
, 0))
2309 free_bin_tree (tree
);
2313 new_idx
= re_dfa_add_node (dfa
, *token
, 0);
2314 dfa
->subexps
[cur_nsub
].end
= dfa
->nodes_len
;
2315 right_par
= create_tree (NULL
, NULL
, 0, new_idx
);
2316 tree
= ((tree
== NULL
) ? right_par
2317 : create_tree (tree
, right_par
, CONCAT
, 0));
2318 tree
= create_tree (left_par
, tree
, CONCAT
, 0);
2319 if (BE (new_idx
== -1 || right_par
== NULL
|| tree
== NULL
, 0))
2324 dfa
->nodes
[new_idx
].opr
.idx
= cur_nsub
;
2329 /* This function parse repetition operators like "*", "+", "{1,3}" etc. */
2332 parse_dup_op (dup_elem
, regexp
, dfa
, token
, syntax
, err
)
2333 bin_tree_t
*dup_elem
;
2334 re_string_t
*regexp
;
2337 reg_syntax_t syntax
;
2340 re_token_t dup_token
;
2341 bin_tree_t
*tree
= dup_elem
, *work_tree
;
2342 int new_idx
, start_idx
= re_string_cur_idx (regexp
);
2343 re_token_t start_token
= *token
;
2344 if (token
->type
== OP_OPEN_DUP_NUM
)
2348 int start
= fetch_number (regexp
, token
, syntax
);
2352 if (token
->type
== CHARACTER
&& token
->opr
.c
== ',')
2353 start
= 0; /* We treat "{,m}" as "{0,m}". */
2356 *err
= REG_BADBR
; /* <re>{} is invalid. */
2360 if (BE (start
!= -2, 1))
2362 /* We treat "{n}" as "{n,n}". */
2363 end
= ((token
->type
== OP_CLOSE_DUP_NUM
) ? start
2364 : ((token
->type
== CHARACTER
&& token
->opr
.c
== ',')
2365 ? fetch_number (regexp
, token
, syntax
) : -2));
2367 if (BE (start
== -2 || end
== -2, 0))
2369 /* Invalid sequence. */
2370 if (token
->type
== OP_CLOSE_DUP_NUM
)
2371 goto parse_dup_op_invalid_interval
;
2373 goto parse_dup_op_ebrace
;
2375 if (BE (start
== 0 && end
== 0, 0))
2377 /* We treat "<re>{0}" and "<re>{0,0}" as null string. */
2378 *token
= fetch_token (regexp
, syntax
);
2379 free_bin_tree (dup_elem
);
2383 /* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}". */
2385 for (i
= 0; i
< start
; ++i
)
2388 work_tree
= duplicate_tree (elem
, dfa
);
2389 tree
= create_tree (tree
, work_tree
, CONCAT
, 0);
2390 if (BE (work_tree
== NULL
|| tree
== NULL
, 0))
2391 goto parse_dup_op_espace
;
2396 /* We treat "<re>{0,}" as "<re>*". */
2397 dup_token
.type
= OP_DUP_ASTERISK
;
2400 elem
= duplicate_tree (elem
, dfa
);
2401 new_idx
= re_dfa_add_node (dfa
, dup_token
, 0);
2402 work_tree
= create_tree (elem
, NULL
, 0, new_idx
);
2403 tree
= create_tree (tree
, work_tree
, CONCAT
, 0);
2404 if (BE (elem
== NULL
|| new_idx
== -1 || work_tree
== NULL
2405 || tree
== NULL
, 0))
2406 goto parse_dup_op_espace
;
2410 new_idx
= re_dfa_add_node (dfa
, dup_token
, 0);
2411 tree
= create_tree (elem
, NULL
, 0, new_idx
);
2412 if (BE (new_idx
== -1 || tree
== NULL
, 0))
2413 goto parse_dup_op_espace
;
2416 else if (BE (start
> end
, 0))
2418 /* First number greater than first. */
2422 else if (end
- start
> 0)
2424 /* Then extract "<re>{0,m}" to "<re>?<re>?...<re>?". */
2425 dup_token
.type
= OP_DUP_QUESTION
;
2428 elem
= duplicate_tree (elem
, dfa
);
2429 new_idx
= re_dfa_add_node (dfa
, dup_token
, 0);
2430 elem
= create_tree (elem
, NULL
, 0, new_idx
);
2431 tree
= create_tree (tree
, elem
, CONCAT
, 0);
2432 if (BE (elem
== NULL
|| new_idx
== -1 || tree
== NULL
, 0))
2433 goto parse_dup_op_espace
;
2437 new_idx
= re_dfa_add_node (dfa
, dup_token
, 0);
2438 tree
= elem
= create_tree (elem
, NULL
, 0, new_idx
);
2439 if (BE (new_idx
== -1 || tree
== NULL
, 0))
2440 goto parse_dup_op_espace
;
2442 for (i
= 1; i
< end
- start
; ++i
)
2444 work_tree
= duplicate_tree (elem
, dfa
);
2445 tree
= create_tree (tree
, work_tree
, CONCAT
, 0);
2446 if (BE (work_tree
== NULL
|| tree
== NULL
, 0))
2456 new_idx
= re_dfa_add_node (dfa
, *token
, 0);
2457 tree
= create_tree (tree
, NULL
, 0, new_idx
);
2458 if (BE (new_idx
== -1 || tree
== NULL
, 0))
2464 *token
= fetch_token (regexp
, syntax
);
2467 parse_dup_op_espace
:
2468 free_bin_tree (tree
);
2472 parse_dup_op_ebrace
:
2473 if (BE (!(syntax
& RE_INVALID_INTERVAL_ORD
), 0))
2478 goto parse_dup_op_rollback
;
2479 parse_dup_op_invalid_interval
:
2480 if (BE (!(syntax
& RE_INVALID_INTERVAL_ORD
), 0))
2485 parse_dup_op_rollback
:
2486 re_string_set_index (regexp
, start_idx
);
2487 *token
= start_token
;
2488 token
->type
= CHARACTER
;
2492 /* Size of the names for collating symbol/equivalence_class/character_class.
2493 I'm not sure, but maybe enough. */
2494 #define BRACKET_NAME_BUF_SIZE 32
2497 /* Local function for parse_bracket_exp only used in case of NOT _LIBC.
2498 Build the range expression which starts from START_ELEM, and ends
2499 at END_ELEM. The result are written to MBCSET and SBCSET.
2500 RANGE_ALLOC is the allocated size of mbcset->range_starts, and
2501 mbcset->range_ends, is a pointer argument sinse we may
2504 static reg_errcode_t
2505 # ifdef RE_ENABLE_I18N
2506 build_range_exp (sbcset
, mbcset
, range_alloc
, start_elem
, end_elem
)
2507 re_charset_t
*mbcset
;
2509 # else /* not RE_ENABLE_I18N */
2510 build_range_exp (sbcset
, start_elem
, end_elem
)
2511 # endif /* not RE_ENABLE_I18N */
2512 re_bitset_ptr_t sbcset
;
2513 bracket_elem_t
*start_elem
, *end_elem
;
2515 unsigned int start_ch
, end_ch
;
2516 /* Equivalence Classes and Character Classes can't be a range start/end. */
2517 if (BE (start_elem
->type
== EQUIV_CLASS
|| start_elem
->type
== CHAR_CLASS
2518 || end_elem
->type
== EQUIV_CLASS
|| end_elem
->type
== CHAR_CLASS
,
2522 /* We can handle no multi character collating elements without libc
2524 if (BE ((start_elem
->type
== COLL_SYM
2525 && strlen ((char *) start_elem
->opr
.name
) > 1)
2526 || (end_elem
->type
== COLL_SYM
2527 && strlen ((char *) end_elem
->opr
.name
) > 1), 0))
2528 return REG_ECOLLATE
;
2530 # ifdef RE_ENABLE_I18N
2532 wchar_t wc
, start_wc
, end_wc
;
2533 wchar_t cmp_buf
[6] = {L
'\0', L
'\0', L
'\0', L
'\0', L
'\0', L
'\0'};
2535 start_ch
= ((start_elem
->type
== SB_CHAR
) ? start_elem
->opr
.ch
2536 : ((start_elem
->type
== COLL_SYM
) ? start_elem
->opr
.name
[0]
2538 end_ch
= ((end_elem
->type
== SB_CHAR
) ? end_elem
->opr
.ch
2539 : ((end_elem
->type
== COLL_SYM
) ? end_elem
->opr
.name
[0]
2541 start_wc
= ((start_elem
->type
== SB_CHAR
|| start_elem
->type
== COLL_SYM
)
2542 ? __btowc (start_ch
) : start_elem
->opr
.wch
);
2543 end_wc
= ((end_elem
->type
== SB_CHAR
|| end_elem
->type
== COLL_SYM
)
2544 ? __btowc (end_ch
) : end_elem
->opr
.wch
);
2545 cmp_buf
[0] = start_wc
;
2546 cmp_buf
[4] = end_wc
;
2547 if (wcscoll (cmp_buf
, cmp_buf
+ 4) > 0)
2550 /* Check the space of the arrays. */
2551 if (*range_alloc
== mbcset
->nranges
)
2553 /* There are not enough space, need realloc. */
2554 wchar_t *new_array_start
, *new_array_end
;
2557 /* +1 in case of mbcset->nranges is 0. */
2558 new_nranges
= 2 * mbcset
->nranges
+ 1;
2559 /* Use realloc since mbcset->range_starts and mbcset->range_ends
2560 are NULL if *range_alloc == 0. */
2561 new_array_start
= re_realloc (mbcset
->range_starts
, wchar_t,
2563 new_array_end
= re_realloc (mbcset
->range_ends
, wchar_t,
2566 if (BE (new_array_start
== NULL
|| new_array_end
== NULL
, 0))
2569 mbcset
->range_starts
= new_array_start
;
2570 mbcset
->range_ends
= new_array_end
;
2571 *range_alloc
= new_nranges
;
2574 mbcset
->range_starts
[mbcset
->nranges
] = start_wc
;
2575 mbcset
->range_ends
[mbcset
->nranges
++] = end_wc
;
2577 /* Build the table for single byte characters. */
2578 for (wc
= 0; wc
<= SBC_MAX
; ++wc
)
2581 if (wcscoll (cmp_buf
, cmp_buf
+ 2) <= 0
2582 && wcscoll (cmp_buf
+ 2, cmp_buf
+ 4) <= 0)
2583 bitset_set (sbcset
, wc
);
2586 # else /* not RE_ENABLE_I18N */
2589 start_ch
= ((start_elem
->type
== SB_CHAR
) ? start_elem
->opr
.ch
2590 : ((start_elem
->type
== COLL_SYM
) ? start_elem
->opr
.name
[0]
2592 end_ch
= ((end_elem
->type
== SB_CHAR
) ? end_elem
->opr
.ch
2593 : ((end_elem
->type
== COLL_SYM
) ? end_elem
->opr
.name
[0]
2595 if (start_ch
> end_ch
)
2597 /* Build the table for single byte characters. */
2598 for (ch
= 0; ch
<= SBC_MAX
; ++ch
)
2599 if (start_ch
<= ch
&& ch
<= end_ch
)
2600 bitset_set (sbcset
, ch
);
2602 # endif /* not RE_ENABLE_I18N */
2605 #endif /* not _LIBC */
2608 /* Helper function for parse_bracket_exp only used in case of NOT _LIBC..
2609 Build the collating element which is represented by NAME.
2610 The result are written to MBCSET and SBCSET.
2611 COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
2612 pointer argument since we may update it. */
2614 static reg_errcode_t
2615 # ifdef RE_ENABLE_I18N
2616 build_collating_symbol (sbcset
, mbcset
, coll_sym_alloc
, name
)
2617 re_charset_t
*mbcset
;
2618 int *coll_sym_alloc
;
2619 # else /* not RE_ENABLE_I18N */
2620 build_collating_symbol (sbcset
, name
)
2621 # endif /* not RE_ENABLE_I18N */
2622 re_bitset_ptr_t sbcset
;
2623 const unsigned char *name
;
2625 size_t name_len
= strlen ((const char *) name
);
2626 if (BE (name_len
!= 1, 0))
2627 return REG_ECOLLATE
;
2630 bitset_set (sbcset
, name
[0]);
2634 #endif /* not _LIBC */
2636 /* This function parse bracket expression like "[abc]", "[a-c]",
2640 parse_bracket_exp (regexp
, dfa
, token
, syntax
, err
)
2641 re_string_t
*regexp
;
2644 reg_syntax_t syntax
;
2648 const unsigned char *collseqmb
;
2649 const char *collseqwc
;
2652 const int32_t *symb_table
;
2653 const unsigned char *extra
;
2655 /* Local function for parse_bracket_exp used in _LIBC environement.
2656 Seek the collating symbol entry correspondings to NAME.
2657 Return the index of the symbol in the SYMB_TABLE. */
2659 static inline int32_t
2660 __attribute ((always_inline
))
2661 seek_collating_symbol_entry (name
, name_len
)
2662 const unsigned char *name
;
2665 int32_t hash
= elem_hash ((const char *) name
, name_len
);
2666 int32_t elem
= hash
% table_size
;
2667 int32_t second
= hash
% (table_size
- 2);
2668 while (symb_table
[2 * elem
] != 0)
2670 /* First compare the hashing value. */
2671 if (symb_table
[2 * elem
] == hash
2672 /* Compare the length of the name. */
2673 && name_len
== extra
[symb_table
[2 * elem
+ 1]]
2674 /* Compare the name. */
2675 && memcmp (name
, &extra
[symb_table
[2 * elem
+ 1] + 1],
2678 /* Yep, this is the entry. */
2688 /* Local function for parse_bracket_exp used in _LIBC environement.
2689 Look up the collation sequence value of BR_ELEM.
2690 Return the value if succeeded, UINT_MAX otherwise. */
2692 static inline unsigned int
2693 __attribute ((always_inline
))
2694 lookup_collation_sequence_value (br_elem
)
2695 bracket_elem_t
*br_elem
;
2697 if (br_elem
->type
== SB_CHAR
)
2700 if (MB_CUR_MAX == 1)
2703 return collseqmb
[br_elem
->opr
.ch
];
2706 wint_t wc
= __btowc (br_elem
->opr
.ch
);
2707 return __collseq_table_lookup (collseqwc
, wc
);
2710 else if (br_elem
->type
== MB_CHAR
)
2712 return __collseq_table_lookup (collseqwc
, br_elem
->opr
.wch
);
2714 else if (br_elem
->type
== COLL_SYM
)
2716 size_t sym_name_len
= strlen ((char *) br_elem
->opr
.name
);
2720 elem
= seek_collating_symbol_entry (br_elem
->opr
.name
,
2722 if (symb_table
[2 * elem
] != 0)
2724 /* We found the entry. */
2725 idx
= symb_table
[2 * elem
+ 1];
2726 /* Skip the name of collating element name. */
2727 idx
+= 1 + extra
[idx
];
2728 /* Skip the byte sequence of the collating element. */
2729 idx
+= 1 + extra
[idx
];
2730 /* Adjust for the alignment. */
2731 idx
= (idx
+ 3) & ~3;
2732 /* Skip the multibyte collation sequence value. */
2733 idx
+= sizeof (unsigned int);
2734 /* Skip the wide char sequence of the collating element. */
2735 idx
+= sizeof (unsigned int) *
2736 (1 + *(unsigned int *) (extra
+ idx
));
2737 /* Return the collation sequence value. */
2738 return *(unsigned int *) (extra
+ idx
);
2740 else if (symb_table
[2 * elem
] == 0 && sym_name_len
== 1)
2742 /* No valid character. Match it as a single byte
2744 return collseqmb
[br_elem
->opr
.name
[0]];
2747 else if (sym_name_len
== 1)
2748 return collseqmb
[br_elem
->opr
.name
[0]];
2753 /* Local function for parse_bracket_exp used in _LIBC environement.
2754 Build the range expression which starts from START_ELEM, and ends
2755 at END_ELEM. The result are written to MBCSET and SBCSET.
2756 RANGE_ALLOC is the allocated size of mbcset->range_starts, and
2757 mbcset->range_ends, is a pointer argument sinse we may
2760 static inline reg_errcode_t
2761 __attribute ((always_inline
))
2762 # ifdef RE_ENABLE_I18N
2763 build_range_exp (sbcset
, mbcset
, range_alloc
, start_elem
, end_elem
)
2764 re_charset_t
*mbcset
;
2766 # else /* not RE_ENABLE_I18N */
2767 build_range_exp (sbcset
, start_elem
, end_elem
)
2768 # endif /* not RE_ENABLE_I18N */
2769 re_bitset_ptr_t sbcset
;
2770 bracket_elem_t
*start_elem
, *end_elem
;
2773 uint32_t start_collseq
;
2774 uint32_t end_collseq
;
2776 # ifdef RE_ENABLE_I18N
2777 /* Check the space of the arrays. */
2778 if (*range_alloc
== mbcset
->nranges
)
2780 /* There are not enough space, need realloc. */
2781 uint32_t *new_array_start
;
2782 uint32_t *new_array_end
;
2785 /* +1 in case of mbcset->nranges is 0. */
2786 new_nranges
= 2 * mbcset
->nranges
+ 1;
2787 /* Use realloc since mbcset->range_starts and mbcset->range_ends
2788 are NULL if *range_alloc == 0. */
2789 new_array_start
= re_realloc (mbcset
->range_starts
, uint32_t,
2791 new_array_end
= re_realloc (mbcset
->range_ends
, uint32_t,
2794 if (BE (new_array_start
== NULL
|| new_array_end
== NULL
, 0))
2797 mbcset
->range_starts
= new_array_start
;
2798 mbcset
->range_ends
= new_array_end
;
2799 *range_alloc
= new_nranges
;
2801 # endif /* RE_ENABLE_I18N */
2803 /* Equivalence Classes and Character Classes can't be a range
2805 if (BE (start_elem
->type
== EQUIV_CLASS
|| start_elem
->type
== CHAR_CLASS
2806 || end_elem
->type
== EQUIV_CLASS
|| end_elem
->type
== CHAR_CLASS
,
2810 start_collseq
= lookup_collation_sequence_value (start_elem
);
2811 end_collseq
= lookup_collation_sequence_value (end_elem
);
2812 /* Check start/end collation sequence values. */
2813 if (BE (start_collseq
== UINT_MAX
|| end_collseq
== UINT_MAX
, 0))
2814 return REG_ECOLLATE
;
2815 if (BE ((syntax
& RE_NO_EMPTY_RANGES
) && start_collseq
> end_collseq
, 0))
2818 # ifdef RE_ENABLE_I18N
2819 /* Got valid collation sequence values, add them as a new entry. */
2820 mbcset
->range_starts
[mbcset
->nranges
] = start_collseq
;
2821 mbcset
->range_ends
[mbcset
->nranges
++] = end_collseq
;
2822 # endif /* RE_ENABLE_I18N */
2824 /* Build the table for single byte characters. */
2825 for (ch
= 0; ch
<= SBC_MAX
; ch
++)
2827 uint32_t ch_collseq
;
2829 if (MB_CUR_MAX == 1)
2832 ch_collseq
= collseqmb
[ch
];
2834 ch_collseq
= __collseq_table_lookup (collseqwc
, __btowc (ch
));
2835 if (start_collseq
<= ch_collseq
&& ch_collseq
<= end_collseq
)
2836 bitset_set (sbcset
, ch
);
2841 /* Local function for parse_bracket_exp used in _LIBC environement.
2842 Build the collating element which is represented by NAME.
2843 The result are written to MBCSET and SBCSET.
2844 COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
2845 pointer argument sinse we may update it. */
2847 static inline reg_errcode_t
2848 __attribute ((always_inline
))
2849 # ifdef RE_ENABLE_I18N
2850 build_collating_symbol (sbcset
, mbcset
, coll_sym_alloc
, name
)
2851 re_charset_t
*mbcset
;
2852 int *coll_sym_alloc
;
2853 # else /* not RE_ENABLE_I18N */
2854 build_collating_symbol (sbcset
, name
)
2855 # endif /* not RE_ENABLE_I18N */
2856 re_bitset_ptr_t sbcset
;
2857 const unsigned char *name
;
2860 size_t name_len
= strlen ((const char *) name
);
2863 elem
= seek_collating_symbol_entry (name
, name_len
);
2864 if (symb_table
[2 * elem
] != 0)
2866 /* We found the entry. */
2867 idx
= symb_table
[2 * elem
+ 1];
2868 /* Skip the name of collating element name. */
2869 idx
+= 1 + extra
[idx
];
2871 else if (symb_table
[2 * elem
] == 0 && name_len
== 1)
2873 /* No valid character, treat it as a normal
2875 bitset_set (sbcset
, name
[0]);
2879 return REG_ECOLLATE
;
2881 # ifdef RE_ENABLE_I18N
2882 /* Got valid collation sequence, add it as a new entry. */
2883 /* Check the space of the arrays. */
2884 if (*coll_sym_alloc
== mbcset
->ncoll_syms
)
2886 /* Not enough, realloc it. */
2887 /* +1 in case of mbcset->ncoll_syms is 0. */
2888 *coll_sym_alloc
= 2 * mbcset
->ncoll_syms
+ 1;
2889 /* Use realloc since mbcset->coll_syms is NULL
2891 mbcset
->coll_syms
= re_realloc (mbcset
->coll_syms
, int32_t,
2893 if (BE (mbcset
->coll_syms
== NULL
, 0))
2896 mbcset
->coll_syms
[mbcset
->ncoll_syms
++] = idx
;
2897 # endif /* RE_ENABLE_I18N */
2902 if (BE (name_len
!= 1, 0))
2903 return REG_ECOLLATE
;
2906 bitset_set (sbcset
, name
[0]);
2913 re_token_t br_token
;
2914 re_bitset_ptr_t sbcset
;
2915 #ifdef RE_ENABLE_I18N
2916 re_charset_t
*mbcset
;
2917 int coll_sym_alloc
= 0, range_alloc
= 0, mbchar_alloc
= 0;
2918 int equiv_class_alloc
= 0, char_class_alloc
= 0;
2919 #else /* not RE_ENABLE_I18N */
2921 #endif /* not RE_ENABLE_I18N */
2922 bin_tree_t
*work_tree
;
2923 int token_len
, new_idx
;
2925 collseqmb
= (const unsigned char *)
2926 _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_COLLSEQMB
);
2927 nrules
= _NL_CURRENT_WORD (LC_COLLATE
, _NL_COLLATE_NRULES
);
2933 collseqwc
= _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_COLLSEQWC
);
2934 table_size
= _NL_CURRENT_WORD (LC_COLLATE
, _NL_COLLATE_SYMB_HASH_SIZEMB
);
2935 symb_table
= (const int32_t *) _NL_CURRENT (LC_COLLATE
,
2936 _NL_COLLATE_SYMB_TABLEMB
);
2937 extra
= (const unsigned char *) _NL_CURRENT (LC_COLLATE
,
2938 _NL_COLLATE_SYMB_EXTRAMB
);
2941 sbcset
= (re_bitset_ptr_t
) calloc (sizeof (unsigned int), BITSET_UINTS
);
2942 #ifdef RE_ENABLE_I18N
2943 mbcset
= (re_charset_t
*) calloc (sizeof (re_charset_t
), 1);
2944 #endif /* RE_ENABLE_I18N */
2945 #ifdef RE_ENABLE_I18N
2946 if (BE (sbcset
== NULL
|| mbcset
== NULL
, 0))
2948 if (BE (sbcset
== NULL
, 0))
2949 #endif /* RE_ENABLE_I18N */
2955 token_len
= peek_token_bracket (token
, regexp
, syntax
);
2956 if (BE (token
->type
== END_OF_RE
, 0))
2959 goto parse_bracket_exp_free_return
;
2961 if (token
->type
== OP_NON_MATCH_LIST
)
2963 #ifdef RE_ENABLE_I18N
2965 mbcset
->non_match
= 1;
2966 #else /* not RE_ENABLE_I18N */
2968 #endif /* not RE_ENABLE_I18N */
2969 if (syntax
& RE_HAT_LISTS_NOT_NEWLINE
)
2970 bitset_set (sbcset
, '\0');
2971 re_string_skip_bytes (regexp
, token_len
); /* Skip a token. */
2972 token_len
= peek_token_bracket (token
, regexp
, syntax
);
2973 if (BE (token
->type
== END_OF_RE
, 0))
2976 goto parse_bracket_exp_free_return
;
2978 #ifdef RE_ENABLE_I18N
2979 if (dfa
->mb_cur_max
> 1)
2980 for (i
= 0; i
< SBC_MAX
; ++i
)
2981 if (__btowc (i
) == WEOF
)
2982 bitset_set (sbcset
, i
);
2983 #endif /* RE_ENABLE_I18N */
2986 /* We treat the first ']' as a normal character. */
2987 if (token
->type
== OP_CLOSE_BRACKET
)
2988 token
->type
= CHARACTER
;
2990 int first_round
= 1;
2993 bracket_elem_t start_elem
, end_elem
;
2994 unsigned char start_name_buf
[BRACKET_NAME_BUF_SIZE
];
2995 unsigned char end_name_buf
[BRACKET_NAME_BUF_SIZE
];
2997 int token_len2
= 0, is_range_exp
= 0;
3000 start_elem
.opr
.name
= start_name_buf
;
3001 ret
= parse_bracket_element (&start_elem
, regexp
, token
, token_len
, dfa
,
3002 syntax
, first_round
);
3003 if (BE (ret
!= REG_NOERROR
, 0))
3006 goto parse_bracket_exp_free_return
;
3010 /* Get information about the next token. We need it in any case. */
3011 token_len
= peek_token_bracket (token
, regexp
, syntax
);
3013 /* Do not check for ranges if we know they are not allowed. */
3014 if (start_elem
.type
!= CHAR_CLASS
&& start_elem
.type
!= EQUIV_CLASS
)
3016 if (BE (token
->type
== END_OF_RE
, 0))
3019 goto parse_bracket_exp_free_return
;
3021 if (token
->type
== OP_CHARSET_RANGE
)
3023 re_string_skip_bytes (regexp
, token_len
); /* Skip '-'. */
3024 token_len2
= peek_token_bracket (&token2
, regexp
, syntax
);
3025 if (BE (token2
.type
== END_OF_RE
, 0))
3028 goto parse_bracket_exp_free_return
;
3030 if (token2
.type
== OP_CLOSE_BRACKET
)
3032 /* We treat the last '-' as a normal character. */
3033 re_string_skip_bytes (regexp
, -token_len
);
3034 token
->type
= CHARACTER
;
3041 if (is_range_exp
== 1)
3043 end_elem
.opr
.name
= end_name_buf
;
3044 ret
= parse_bracket_element (&end_elem
, regexp
, &token2
, token_len2
,
3046 if (BE (ret
!= REG_NOERROR
, 0))
3049 goto parse_bracket_exp_free_return
;
3052 token_len
= peek_token_bracket (token
, regexp
, syntax
);
3054 *err
= build_range_exp (sbcset
,
3055 #ifdef RE_ENABLE_I18N
3056 mbcset
, &range_alloc
,
3057 #endif /* RE_ENABLE_I18N */
3058 &start_elem
, &end_elem
);
3059 if (BE (*err
!= REG_NOERROR
, 0))
3060 goto parse_bracket_exp_free_return
;
3064 switch (start_elem
.type
)
3067 bitset_set (sbcset
, start_elem
.opr
.ch
);
3069 #ifdef RE_ENABLE_I18N
3071 /* Check whether the array has enough space. */
3072 if (mbchar_alloc
== mbcset
->nmbchars
)
3074 /* Not enough, realloc it. */
3075 /* +1 in case of mbcset->nmbchars is 0. */
3076 mbchar_alloc
= 2 * mbcset
->nmbchars
+ 1;
3077 /* Use realloc since array is NULL if *alloc == 0. */
3078 mbcset
->mbchars
= re_realloc (mbcset
->mbchars
, wchar_t,
3080 if (BE (mbcset
->mbchars
== NULL
, 0))
3081 goto parse_bracket_exp_espace
;
3083 mbcset
->mbchars
[mbcset
->nmbchars
++] = start_elem
.opr
.wch
;
3085 #endif /* RE_ENABLE_I18N */
3087 *err
= build_equiv_class (sbcset
,
3088 #ifdef RE_ENABLE_I18N
3089 mbcset
, &equiv_class_alloc
,
3090 #endif /* RE_ENABLE_I18N */
3091 start_elem
.opr
.name
);
3092 if (BE (*err
!= REG_NOERROR
, 0))
3093 goto parse_bracket_exp_free_return
;
3096 *err
= build_collating_symbol (sbcset
,
3097 #ifdef RE_ENABLE_I18N
3098 mbcset
, &coll_sym_alloc
,
3099 #endif /* RE_ENABLE_I18N */
3100 start_elem
.opr
.name
);
3101 if (BE (*err
!= REG_NOERROR
, 0))
3102 goto parse_bracket_exp_free_return
;
3105 *err
= build_charclass (regexp
->trans
, sbcset
,
3106 #ifdef RE_ENABLE_I18N
3107 mbcset
, &char_class_alloc
,
3108 #endif /* RE_ENABLE_I18N */
3109 start_elem
.opr
.name
, syntax
);
3110 if (BE (*err
!= REG_NOERROR
, 0))
3111 goto parse_bracket_exp_free_return
;
3118 if (BE (token
->type
== END_OF_RE
, 0))
3121 goto parse_bracket_exp_free_return
;
3123 if (token
->type
== OP_CLOSE_BRACKET
)
3127 re_string_skip_bytes (regexp
, token_len
); /* Skip a token. */
3129 /* If it is non-matching list. */
3130 #ifdef RE_ENABLE_I18N
3131 if (mbcset
->non_match
)
3132 #else /* not RE_ENABLE_I18N */
3134 #endif /* not RE_ENABLE_I18N */
3135 bitset_not (sbcset
);
3137 /* Build a tree for simple bracket. */
3138 br_token
.type
= SIMPLE_BRACKET
;
3139 br_token
.opr
.sbcset
= sbcset
;
3140 new_idx
= re_dfa_add_node (dfa
, br_token
, 0);
3141 work_tree
= create_tree (NULL
, NULL
, 0, new_idx
);
3142 if (BE (new_idx
== -1 || work_tree
== NULL
, 0))
3143 goto parse_bracket_exp_espace
;
3145 #ifdef RE_ENABLE_I18N
3146 if (mbcset
->nmbchars
|| mbcset
->ncoll_syms
|| mbcset
->nequiv_classes
3147 || mbcset
->nranges
|| (dfa
->mb_cur_max
> 1 && (mbcset
->nchar_classes
3148 || mbcset
->non_match
)))
3150 re_token_t alt_token
;
3151 bin_tree_t
*mbc_tree
;
3152 /* Build a tree for complex bracket. */
3153 br_token
.type
= COMPLEX_BRACKET
;
3154 br_token
.opr
.mbcset
= mbcset
;
3155 dfa
->has_mb_node
= 1;
3156 new_idx
= re_dfa_add_node (dfa
, br_token
, 0);
3157 mbc_tree
= create_tree (NULL
, NULL
, 0, new_idx
);
3158 if (BE (new_idx
== -1 || mbc_tree
== NULL
, 0))
3159 goto parse_bracket_exp_espace
;
3160 /* Then join them by ALT node. */
3161 dfa
->has_plural_match
= 1;
3162 alt_token
.type
= OP_ALT
;
3163 new_idx
= re_dfa_add_node (dfa
, alt_token
, 0);
3164 work_tree
= create_tree (work_tree
, mbc_tree
, 0, new_idx
);
3165 if (BE (new_idx
!= -1 && mbc_tree
!= NULL
, 1))
3170 free_charset (mbcset
);
3173 #else /* not RE_ENABLE_I18N */
3175 #endif /* not RE_ENABLE_I18N */
3177 parse_bracket_exp_espace
:
3179 parse_bracket_exp_free_return
:
3181 #ifdef RE_ENABLE_I18N
3182 free_charset (mbcset
);
3183 #endif /* RE_ENABLE_I18N */
3187 /* Parse an element in the bracket expression. */
3189 static reg_errcode_t
3190 parse_bracket_element (elem
, regexp
, token
, token_len
, dfa
, syntax
,
3192 bracket_elem_t
*elem
;
3193 re_string_t
*regexp
;
3197 reg_syntax_t syntax
;
3200 #ifdef RE_ENABLE_I18N
3202 cur_char_size
= re_string_char_size_at (regexp
, re_string_cur_idx (regexp
));
3203 if (cur_char_size
> 1)
3205 elem
->type
= MB_CHAR
;
3206 elem
->opr
.wch
= re_string_wchar_at (regexp
, re_string_cur_idx (regexp
));
3207 re_string_skip_bytes (regexp
, cur_char_size
);
3210 #endif /* RE_ENABLE_I18N */
3211 re_string_skip_bytes (regexp
, token_len
); /* Skip a token. */
3212 if (token
->type
== OP_OPEN_COLL_ELEM
|| token
->type
== OP_OPEN_CHAR_CLASS
3213 || token
->type
== OP_OPEN_EQUIV_CLASS
)
3214 return parse_bracket_symbol (elem
, regexp
, token
);
3215 if (BE (token
->type
== OP_CHARSET_RANGE
, 0) && !accept_hyphen
)
3217 /* A '-' must only appear as anything but a range indicator before
3218 the closing bracket. Everything else is an error. */
3220 (void) peek_token_bracket (&token2
, regexp
, syntax
);
3221 if (token2
.type
!= OP_CLOSE_BRACKET
)
3222 /* The actual error value is not standardized since this whole
3223 case is undefined. But ERANGE makes good sense. */
3226 elem
->type
= SB_CHAR
;
3227 elem
->opr
.ch
= token
->opr
.c
;
3231 /* Parse a bracket symbol in the bracket expression. Bracket symbols are
3232 such as [:<character_class>:], [.<collating_element>.], and
3233 [=<equivalent_class>=]. */
3235 static reg_errcode_t
3236 parse_bracket_symbol (elem
, regexp
, token
)
3237 bracket_elem_t
*elem
;
3238 re_string_t
*regexp
;
3241 unsigned char ch
, delim
= token
->opr
.c
;
3245 if (re_string_eoi(regexp
) || i
>= BRACKET_NAME_BUF_SIZE
)
3247 if (token
->type
== OP_OPEN_CHAR_CLASS
)
3248 ch
= re_string_fetch_byte_case (regexp
);
3250 ch
= re_string_fetch_byte (regexp
);
3251 if (ch
== delim
&& re_string_peek_byte (regexp
, 0) == ']')
3253 elem
->opr
.name
[i
] = ch
;
3255 re_string_skip_bytes (regexp
, 1);
3256 elem
->opr
.name
[i
] = '\0';
3257 switch (token
->type
)
3259 case OP_OPEN_COLL_ELEM
:
3260 elem
->type
= COLL_SYM
;
3262 case OP_OPEN_EQUIV_CLASS
:
3263 elem
->type
= EQUIV_CLASS
;
3265 case OP_OPEN_CHAR_CLASS
:
3266 elem
->type
= CHAR_CLASS
;
3274 /* Helper function for parse_bracket_exp.
3275 Build the equivalence class which is represented by NAME.
3276 The result are written to MBCSET and SBCSET.
3277 EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes,
3278 is a pointer argument sinse we may update it. */
3280 static reg_errcode_t
3281 #ifdef RE_ENABLE_I18N
3282 build_equiv_class (sbcset
, mbcset
, equiv_class_alloc
, name
)
3283 re_charset_t
*mbcset
;
3284 int *equiv_class_alloc
;
3285 #else /* not RE_ENABLE_I18N */
3286 build_equiv_class (sbcset
, name
)
3287 #endif /* not RE_ENABLE_I18N */
3288 re_bitset_ptr_t sbcset
;
3289 const unsigned char *name
;
3291 #if defined _LIBC && defined RE_ENABLE_I18N
3292 uint32_t nrules
= _NL_CURRENT_WORD (LC_COLLATE
, _NL_COLLATE_NRULES
);
3295 const int32_t *table
, *indirect
;
3296 const unsigned char *weights
, *extra
, *cp
;
3297 unsigned char char_buf
[2];
3301 /* This #include defines a local function! */
3302 # include <locale/weight.h>
3303 /* Calculate the index for equivalence class. */
3305 table
= (const int32_t *) _NL_CURRENT (LC_COLLATE
, _NL_COLLATE_TABLEMB
);
3306 weights
= (const unsigned char *) _NL_CURRENT (LC_COLLATE
,
3307 _NL_COLLATE_WEIGHTMB
);
3308 extra
= (const unsigned char *) _NL_CURRENT (LC_COLLATE
,
3309 _NL_COLLATE_EXTRAMB
);
3310 indirect
= (const int32_t *) _NL_CURRENT (LC_COLLATE
,
3311 _NL_COLLATE_INDIRECTMB
);
3312 idx1
= findidx (&cp
);
3313 if (BE (idx1
== 0 || cp
< name
+ strlen ((const char *) name
), 0))
3314 /* This isn't a valid character. */
3315 return REG_ECOLLATE
;
3317 /* Build single byte matcing table for this equivalence class. */
3318 char_buf
[1] = (unsigned char) '\0';
3319 len
= weights
[idx1
];
3320 for (ch
= 0; ch
< SBC_MAX
; ++ch
)
3324 idx2
= findidx (&cp
);
3329 /* This isn't a valid character. */
3331 if (len
== weights
[idx2
])
3334 while (cnt
<= len
&&
3335 weights
[idx1
+ 1 + cnt
] == weights
[idx2
+ 1 + cnt
])
3339 bitset_set (sbcset
, ch
);
3342 /* Check whether the array has enough space. */
3343 if (*equiv_class_alloc
== mbcset
->nequiv_classes
)
3345 /* Not enough, realloc it. */
3346 /* +1 in case of mbcset->nequiv_classes is 0. */
3347 *equiv_class_alloc
= 2 * mbcset
->nequiv_classes
+ 1;
3348 /* Use realloc since the array is NULL if *alloc == 0. */
3349 mbcset
->equiv_classes
= re_realloc (mbcset
->equiv_classes
, int32_t,
3350 *equiv_class_alloc
);
3351 if (BE (mbcset
->equiv_classes
== NULL
, 0))
3354 mbcset
->equiv_classes
[mbcset
->nequiv_classes
++] = idx1
;
3357 #endif /* _LIBC && RE_ENABLE_I18N */
3359 if (BE (strlen ((const char *) name
) != 1, 0))
3360 return REG_ECOLLATE
;
3361 bitset_set (sbcset
, *name
);
3366 /* Helper function for parse_bracket_exp.
3367 Build the character class which is represented by NAME.
3368 The result are written to MBCSET and SBCSET.
3369 CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes,
3370 is a pointer argument sinse we may update it. */
3372 static reg_errcode_t
3373 #ifdef RE_ENABLE_I18N
3374 build_charclass (trans
, sbcset
, mbcset
, char_class_alloc
, class_name
, syntax
)
3375 re_charset_t
*mbcset
;
3376 int *char_class_alloc
;
3377 #else /* not RE_ENABLE_I18N */
3378 build_charclass (trans
, sbcset
, class_name
, syntax
)
3379 #endif /* not RE_ENABLE_I18N */
3380 RE_TRANSLATE_TYPE trans
;
3381 re_bitset_ptr_t sbcset
;
3382 const unsigned char *class_name
;
3383 reg_syntax_t syntax
;
3386 const char *name
= (const char *) class_name
;
3388 /* In case of REG_ICASE "upper" and "lower" match the both of
3389 upper and lower cases. */
3390 if ((syntax
& RE_ICASE
)
3391 && (strcmp (name
, "upper") == 0 || strcmp (name
, "lower") == 0))
3394 #ifdef RE_ENABLE_I18N
3395 /* Check the space of the arrays. */
3396 if (*char_class_alloc
== mbcset
->nchar_classes
)
3398 /* Not enough, realloc it. */
3399 /* +1 in case of mbcset->nchar_classes is 0. */
3400 *char_class_alloc
= 2 * mbcset
->nchar_classes
+ 1;
3401 /* Use realloc since array is NULL if *alloc == 0. */
3402 mbcset
->char_classes
= re_realloc (mbcset
->char_classes
, wctype_t,
3404 if (BE (mbcset
->char_classes
== NULL
, 0))
3407 mbcset
->char_classes
[mbcset
->nchar_classes
++] = __wctype (name
);
3408 #endif /* RE_ENABLE_I18N */
3410 #define BUILD_CHARCLASS_LOOP(ctype_func) \
3411 for (i = 0; i < SBC_MAX; ++i) \
3413 if (ctype_func (i)) \
3415 int ch = trans ? trans[i] : i; \
3416 bitset_set (sbcset, ch); \
3420 if (strcmp (name
, "alnum") == 0)
3421 BUILD_CHARCLASS_LOOP (isalnum
)
3422 else if (strcmp (name
, "cntrl") == 0)
3423 BUILD_CHARCLASS_LOOP (iscntrl
)
3424 else if (strcmp (name
, "lower") == 0)
3425 BUILD_CHARCLASS_LOOP (islower
)
3426 else if (strcmp (name
, "space") == 0)
3427 BUILD_CHARCLASS_LOOP (isspace
)
3428 else if (strcmp (name
, "alpha") == 0)
3429 BUILD_CHARCLASS_LOOP (isalpha
)
3430 else if (strcmp (name
, "digit") == 0)
3431 BUILD_CHARCLASS_LOOP (isdigit
)
3432 else if (strcmp (name
, "print") == 0)
3433 BUILD_CHARCLASS_LOOP (isprint
)
3434 else if (strcmp (name
, "upper") == 0)
3435 BUILD_CHARCLASS_LOOP (isupper
)
3436 else if (strcmp (name
, "blank") == 0)
3437 BUILD_CHARCLASS_LOOP (isblank
)
3438 else if (strcmp (name
, "graph") == 0)
3439 BUILD_CHARCLASS_LOOP (isgraph
)
3440 else if (strcmp (name
, "punct") == 0)
3441 BUILD_CHARCLASS_LOOP (ispunct
)
3442 else if (strcmp (name
, "xdigit") == 0)
3443 BUILD_CHARCLASS_LOOP (isxdigit
)
3451 build_charclass_op (dfa
, trans
, class_name
, extra
, not, err
)
3453 RE_TRANSLATE_TYPE trans
;
3454 const unsigned char *class_name
;
3455 const unsigned char *extra
;
3459 re_bitset_ptr_t sbcset
;
3460 #ifdef RE_ENABLE_I18N
3461 re_charset_t
*mbcset
;
3463 #else /* not RE_ENABLE_I18N */
3465 #endif /* not RE_ENABLE_I18N */
3467 re_token_t br_token
;
3471 sbcset
= (re_bitset_ptr_t
) calloc (sizeof (unsigned int), BITSET_UINTS
);
3472 #ifdef RE_ENABLE_I18N
3473 mbcset
= (re_charset_t
*) calloc (sizeof (re_charset_t
), 1);
3474 #endif /* RE_ENABLE_I18N */
3476 #ifdef RE_ENABLE_I18N
3477 if (BE (sbcset
== NULL
|| mbcset
== NULL
, 0))
3478 #else /* not RE_ENABLE_I18N */
3479 if (BE (sbcset
== NULL
, 0))
3480 #endif /* not RE_ENABLE_I18N */
3488 #ifdef RE_ENABLE_I18N
3491 if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
3492 bitset_set(cset->sbcset, '\0');
3494 mbcset
->non_match
= 1;
3495 if (dfa
->mb_cur_max
> 1)
3496 for (i
= 0; i
< SBC_MAX
; ++i
)
3497 if (__btowc (i
) == WEOF
)
3498 bitset_set (sbcset
, i
);
3499 #else /* not RE_ENABLE_I18N */
3501 #endif /* not RE_ENABLE_I18N */
3504 /* We don't care the syntax in this case. */
3505 ret
= build_charclass (trans
, sbcset
,
3506 #ifdef RE_ENABLE_I18N
3508 #endif /* RE_ENABLE_I18N */
3511 if (BE (ret
!= REG_NOERROR
, 0))
3514 #ifdef RE_ENABLE_I18N
3515 free_charset (mbcset
);
3516 #endif /* RE_ENABLE_I18N */
3520 /* \w match '_' also. */
3521 for (; *extra
; extra
++)
3522 bitset_set (sbcset
, *extra
);
3524 /* If it is non-matching list. */
3525 #ifdef RE_ENABLE_I18N
3526 if (mbcset
->non_match
)
3527 #else /* not RE_ENABLE_I18N */
3529 #endif /* not RE_ENABLE_I18N */
3530 bitset_not (sbcset
);
3532 /* Build a tree for simple bracket. */
3533 br_token
.type
= SIMPLE_BRACKET
;
3534 br_token
.opr
.sbcset
= sbcset
;
3535 new_idx
= re_dfa_add_node (dfa
, br_token
, 0);
3536 tree
= create_tree (NULL
, NULL
, 0, new_idx
);
3537 if (BE (new_idx
== -1 || tree
== NULL
, 0))
3538 goto build_word_op_espace
;
3540 #ifdef RE_ENABLE_I18N
3541 if (dfa
->mb_cur_max
> 1)
3543 re_token_t alt_token
;
3544 bin_tree_t
*mbc_tree
;
3545 /* Build a tree for complex bracket. */
3546 br_token
.type
= COMPLEX_BRACKET
;
3547 br_token
.opr
.mbcset
= mbcset
;
3548 dfa
->has_mb_node
= 1;
3549 new_idx
= re_dfa_add_node (dfa
, br_token
, 0);
3550 mbc_tree
= create_tree (NULL
, NULL
, 0, new_idx
);
3551 if (BE (new_idx
== -1 || mbc_tree
== NULL
, 0))
3552 goto build_word_op_espace
;
3553 /* Then join them by ALT node. */
3554 alt_token
.type
= OP_ALT
;
3555 new_idx
= re_dfa_add_node (dfa
, alt_token
, 0);
3556 tree
= create_tree (tree
, mbc_tree
, 0, new_idx
);
3557 if (BE (new_idx
!= -1 && mbc_tree
!= NULL
, 1))
3562 free_charset (mbcset
);
3565 #else /* not RE_ENABLE_I18N */
3567 #endif /* not RE_ENABLE_I18N */
3569 build_word_op_espace
:
3571 #ifdef RE_ENABLE_I18N
3572 free_charset (mbcset
);
3573 #endif /* RE_ENABLE_I18N */
3578 /* This is intended for the expressions like "a{1,3}".
3579 Fetch a number from `input', and return the number.
3580 Return -1, if the number field is empty like "{,1}".
3581 Return -2, If an error is occured. */
3584 fetch_number (input
, token
, syntax
)
3587 reg_syntax_t syntax
;
3593 *token
= fetch_token (input
, syntax
);
3595 if (BE (token
->type
== END_OF_RE
, 0))
3597 if (token
->type
== OP_CLOSE_DUP_NUM
|| c
== ',')
3599 num
= ((token
->type
!= CHARACTER
|| c
< '0' || '9' < c
|| num
== -2)
3600 ? -2 : ((num
== -1) ? c
- '0' : num
* 10 + c
- '0'));
3601 num
= (num
> RE_DUP_MAX
) ? -2 : num
;
3606 #ifdef RE_ENABLE_I18N
3608 free_charset (re_charset_t
*cset
)
3610 re_free (cset
->mbchars
);
3612 re_free (cset
->coll_syms
);
3613 re_free (cset
->equiv_classes
);
3614 re_free (cset
->range_starts
);
3615 re_free (cset
->range_ends
);
3617 re_free (cset
->char_classes
);
3620 #endif /* RE_ENABLE_I18N */
3622 /* Functions for binary tree operation. */
3624 /* Create a node of tree.
3625 Note: This function automatically free left and right if malloc fails. */
3628 create_tree (left
, right
, type
, index
)
3631 re_token_type_t type
;
3635 tree
= re_malloc (bin_tree_t
, 1);
3636 if (BE (tree
== NULL
, 0))
3638 free_bin_tree (left
);
3639 free_bin_tree (right
);
3642 tree
->parent
= NULL
;
3644 tree
->right
= right
;
3646 tree
->node_idx
= index
;
3649 re_node_set_init_empty (&tree
->eclosure
);
3652 left
->parent
= tree
;
3654 right
->parent
= tree
;
3658 /* Free the sub tree pointed by TREE. */
3661 free_bin_tree (tree
)
3666 /*re_node_set_free (&tree->eclosure);*/
3667 free_bin_tree (tree
->left
);
3668 free_bin_tree (tree
->right
);
3672 /* Duplicate the node SRC, and return new node. */
3675 duplicate_tree (src
, dfa
)
3676 const bin_tree_t
*src
;
3679 bin_tree_t
*left
= NULL
, *right
= NULL
, *new_tree
;
3681 /* Since node indies must be according to Post-order of the tree,
3682 we must duplicate the left at first. */
3683 if (src
->left
!= NULL
)
3685 left
= duplicate_tree (src
->left
, dfa
);
3690 /* Secondaly, duplicate the right. */
3691 if (src
->right
!= NULL
)
3693 right
= duplicate_tree (src
->right
, dfa
);
3696 free_bin_tree (left
);
3701 /* At last, duplicate itself. */
3702 if (src
->type
== NON_TYPE
)
3704 new_node_idx
= re_dfa_add_node (dfa
, dfa
->nodes
[src
->node_idx
], 0);
3705 dfa
->nodes
[new_node_idx
].duplicated
= 1;
3706 if (BE (new_node_idx
== -1, 0))
3708 free_bin_tree (left
);
3709 free_bin_tree (right
);
3714 new_node_idx
= src
->type
;
3716 new_tree
= create_tree (left
, right
, src
->type
, new_node_idx
);
3717 if (BE (new_tree
== NULL
, 0))
3719 free_bin_tree (left
);
3720 free_bin_tree (right
);