/* Extended regular expression matching and search library.
- Copyright (C) 2002-2014 Free Software Foundation, Inc.
+ Copyright (C) 2002-2015 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
#include <stdint.h>
+#ifdef _LIBC
+# include <locale/weight.h>
+#endif
+
static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern,
size_t length, reg_syntax_t syntax);
static void re_compile_fastmap_iter (regex_t *bufp,
compiles PATTERN (of length LENGTH) and puts the result in BUFP.
Returns 0 if the pattern was valid, otherwise an error string.
- Assumes the `allocated' (and perhaps `buffer') and `translate' fields
+ Assumes the 'allocated' (and perhaps 'buffer') and 'translate' fields
are set in BUFP on entry. */
const char *
weak_alias (__re_compile_pattern, re_compile_pattern)
#endif
-/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
+/* Set by 're_set_syntax' to the current regexp syntax to recognize. Can
also be assigned to arbitrarily: each pattern buffer stores its own
syntax, so it can be changed between regex compilations. */
/* This has no initializer because initialized variables in Emacs
#endif
static inline void
-__attribute ((always_inline))
-re_set_fastmap (char *fastmap, int icase, int ch)
+__attribute__ ((always_inline))
+re_set_fastmap (char *fastmap, bool icase, int ch)
{
fastmap[ch] = 1;
if (icase)
memset (&state, '\0', sizeof (state));
if (__mbrtowc (&wc, (const char *) buf, p - buf,
&state) == p - buf
- && (__wcrtomb ((char *) buf, towlower (wc), &state)
+ && (__wcrtomb ((char *) buf, __towlower (wc), &state)
!= (size_t) -1))
re_set_fastmap (fastmap, 0, buf[0]);
}
re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
{
- if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state)
+ if (__wcrtomb (buf, __towlower (cset->mbchars[i]), &state)
!= (size_t) -1)
re_set_fastmap (fastmap, false, *(unsigned char *) buf);
}
PREG is a regex_t *. We do not expect any fields to be initialized,
since POSIX says we shouldn't. Thus, we set
- `buffer' to the compiled pattern;
- `used' to the length of the compiled pattern;
- `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
+ 'buffer' to the compiled pattern;
+ 'used' to the length of the compiled pattern;
+ 'syntax' to RE_SYNTAX_POSIX_EXTENDED if the
REG_EXTENDED bit in CFLAGS is set; otherwise, to
RE_SYNTAX_POSIX_BASIC;
- `newline_anchor' to REG_NEWLINE being set in CFLAGS;
- `fastmap' to an allocated space for the fastmap;
- `fastmap_accurate' to zero;
- `re_nsub' to the number of subexpressions in PATTERN.
+ 'newline_anchor' to REG_NEWLINE being set in CFLAGS;
+ 'fastmap' to an allocated space for the fastmap;
+ 'fastmap_accurate' to zero;
+ 're_nsub' to the number of subexpressions in PATTERN.
PATTERN is the address of the pattern string.
+ __re_error_msgid_idx[(int) REG_ESPACE]);
}
- /* Since `re_exec' always passes NULL for the `regs' argument, we
+ /* Since 're_exec' always passes NULL for the 'regs' argument, we
don't need to initialize the pattern buffer fields which affect it. */
/* Match anchors at newlines. */
destination. */
org_dest = dfa->edests[org_node].elems[0];
re_node_set_empty (dfa->edests + clone_node);
- /* If the node is root_node itself, it means the epsilon clsoure
+ /* If the node is root_node itself, it means the epsilon closure
has a loop. Then tie it to the destination of the root_node. */
if (org_node == root_node && clone_node != org_node)
{
return REG_ESPACE;
break;
}
- /* In case of the node has another constraint, add it. */
+ /* In case the node has another constraint, append it. */
constraint |= dfa->nodes[org_node].constraint;
clone_dest = duplicate_node (dfa, org_dest, constraint);
if (BE (clone_dest == -1, 0))
/* If we have already calculated, skip it. */
if (dfa->eclosures[node_idx].nelem != 0)
continue;
- /* Calculate epsilon closure of `node_idx'. */
+ /* Calculate epsilon closure of 'node_idx'. */
err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1);
if (BE (err != REG_NOERROR, 0))
return err;
}
else
eclosure_elem = dfa->eclosures[edest];
- /* Merge the epsilon closure of `edest'. */
+ /* Merge the epsilon closure of 'edest'. */
err = re_node_set_merge (&eclosure, &eclosure_elem);
if (BE (err != REG_NOERROR, 0))
return err;
- /* If the epsilon closure of `edest' is incomplete,
+ /* If the epsilon closure of 'edest' is incomplete,
the epsilon closure of this node is also incomplete. */
if (dfa->eclosures[edest].nelem == 0)
{
/* Entry point of the parser.
Parse the regular expression REGEXP and return the structure tree.
- If an error is occured, ERR is set by error code, and return NULL.
+ If an error occurs, ERR is set by error code, and return NULL.
This function build the following tree, from regular expression <reg_exp>:
CAT
/ \
/ \
<branch1> <branch2>
- ALT means alternative, which represents the operator `|'. */
+ ALT means alternative, which represents the operator '|'. */
static bin_tree_t *
parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
{
branch = parse_branch (regexp, preg, token, syntax, nest, err);
if (BE (*err != REG_NOERROR && branch == NULL, 0))
- return NULL;
+ {
+ if (tree != NULL)
+ postorder (tree, free_tree, NULL);
+ return NULL;
+ }
}
else
branch = NULL;
/* Duplicate ELEM before it is marked optional. */
elem = duplicate_tree (elem, dfa);
+ if (BE (elem == NULL, 0))
+ goto parse_dup_op_espace;
old_tree = tree;
}
else
Build the range expression which starts from START_ELEM, and ends
at END_ELEM. The result are written to MBCSET and SBCSET.
RANGE_ALLOC is the allocated size of mbcset->range_starts, and
- mbcset->range_ends, is a pointer argument sinse we may
+ mbcset->range_ends, is a pointer argument since we may
update it. */
static reg_errcode_t
const int32_t *symb_table;
const unsigned char *extra;
- /* Local function for parse_bracket_exp used in _LIBC environement.
- Seek the collating symbol entry correspondings to NAME.
+ /* Local function for parse_bracket_exp used in _LIBC environment.
+ Seek the collating symbol entry corresponding to NAME.
Return the index of the symbol in the SYMB_TABLE,
or -1 if not found. */
auto inline int32_t
- __attribute ((always_inline))
+ __attribute__ ((always_inline))
seek_collating_symbol_entry (const unsigned char *name, size_t name_len)
{
int32_t elem;
Return the value if succeeded, UINT_MAX otherwise. */
auto inline unsigned int
- __attribute ((always_inline))
+ __attribute__ ((always_inline))
lookup_collation_sequence_value (bracket_elem_t *br_elem)
{
if (br_elem->type == SB_CHAR)
return UINT_MAX;
}
- /* Local function for parse_bracket_exp used in _LIBC environement.
+ /* Local function for parse_bracket_exp used in _LIBC environment.
Build the range expression which starts from START_ELEM, and ends
at END_ELEM. The result are written to MBCSET and SBCSET.
RANGE_ALLOC is the allocated size of mbcset->range_starts, and
- mbcset->range_ends, is a pointer argument sinse we may
+ mbcset->range_ends, is a pointer argument since we may
update it. */
auto inline reg_errcode_t
- __attribute ((always_inline))
+ __attribute__ ((always_inline))
build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
bracket_elem_t *start_elem, bracket_elem_t *end_elem)
{
return REG_NOERROR;
}
- /* Local function for parse_bracket_exp used in _LIBC environement.
+ /* Local function for parse_bracket_exp used in _LIBC environment.
Build the collating element which is represented by NAME.
The result are written to MBCSET and SBCSET.
COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
- pointer argument sinse we may update it. */
+ pointer argument since we may update it. */
auto inline reg_errcode_t
- __attribute ((always_inline))
+ __attribute__ ((always_inline))
build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
int *coll_sym_alloc, const unsigned char *name)
{
re_token_t token2;
start_elem.opr.name = start_name_buf;
+ start_elem.type = COLL_SYM;
ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa,
syntax, first_round);
if (BE (ret != REG_NOERROR, 0))
if (is_range_exp == 1)
{
end_elem.opr.name = end_name_buf;
+ end_elem.type = COLL_SYM;
ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2,
dfa, syntax, 1);
if (BE (ret != REG_NOERROR, 0))
Build the equivalence class which is represented by NAME.
The result are written to MBCSET and SBCSET.
EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes,
- is a pointer argument sinse we may update it. */
+ is a pointer argument since we may update it. */
static reg_errcode_t
#ifdef RE_ENABLE_I18N
int32_t idx1, idx2;
unsigned int ch;
size_t len;
- /* This #include defines a local function! */
-# include <locale/weight.h>
/* Calculate the index for equivalence class. */
cp = name;
table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
_NL_COLLATE_EXTRAMB);
indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
_NL_COLLATE_INDIRECTMB);
- idx1 = findidx (&cp, -1);
+ idx1 = findidx (table, indirect, extra, &cp, -1);
if (BE (idx1 == 0 || *cp != '\0', 0))
/* This isn't a valid character. */
return REG_ECOLLATE;
{
char_buf[0] = ch;
cp = char_buf;
- idx2 = findidx (&cp, 1);
+ idx2 = findidx (table, indirect, extra, &cp, 1);
/*
idx2 = table[ch];
*/
Build the character class which is represented by NAME.
The result are written to MBCSET and SBCSET.
CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes,
- is a pointer argument sinse we may update it. */
+ is a pointer argument since we may update it. */
static reg_errcode_t
#ifdef RE_ENABLE_I18N