From: Michael Tremer Date: Mon, 20 Aug 2012 09:38:48 +0000 (+0200) Subject: grep: Update to 2.13. X-Git-Tag: v2.13-beta1~231 X-Git-Url: http://git.ipfire.org/?p=ipfire-2.x.git;a=commitdiff_plain;h=998334b028478be01aa37437fa510ef30be00d10 grep: Update to 2.13. --- diff --git a/config/rootfiles/common/grep b/config/rootfiles/common/grep index 5d5db68a12..a00737d4da 100644 --- a/config/rootfiles/common/grep +++ b/config/rootfiles/common/grep @@ -1,7 +1,7 @@ bin/egrep bin/fgrep bin/grep -#usr/info/grep.info -#usr/man/man1/egrep.1 -#usr/man/man1/fgrep.1 -#usr/man/man1/grep.1 +#usr/share/info/grep.info +#usr/share/man/man1/egrep.1 +#usr/share/man/man1/fgrep.1 +#usr/share/man/man1/grep.1 diff --git a/lfs/grep b/lfs/grep index 7d62f8b8df..0514ccb384 100644 --- a/lfs/grep +++ b/lfs/grep @@ -24,10 +24,10 @@ include Config -VER = 2.5.1a +VER = 2.13 THISAPP = grep-$(VER) -DL_FILE = $(THISAPP).tar.bz2 +DL_FILE = $(THISAPP).tar.xz DL_FROM = $(URL_IPFIRE) DIR_APP = $(DIR_SRC)/$(THISAPP) @@ -54,7 +54,7 @@ objects = $(DL_FILE) $(DL_FILE) = $(DL_FROM)/$(DL_FILE) -$(DL_FILE)_MD5 = 52202fe462770fa6be1bb667bd6cf30c +$(DL_FILE)_MD5 = 5894d484e6c02249f9702d0d8a472115 install : $(TARGET) @@ -83,11 +83,7 @@ $(subst %,%_MD5,$(objects)) : $(TARGET) : $(patsubst %,$(DIR_DL)/%,$(objects)) @$(PREBUILD) - @rm -rf $(DIR_APP) && cd $(DIR_SRC) && tar jxf $(DIR_DL)/$(DL_FILE) -ifeq "$(ROOT)" "" - cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/$(THISAPP)-redhat_fixes-2.patch - cd $(DIR_APP) && chmod +x tests/fmbtest.sh -endif + @rm -rf $(DIR_APP) && cd $(DIR_SRC) && tar axf $(DIR_DL)/$(DL_FILE) cd $(DIR_APP) && ./configure $(EXTRA_CONFIG) cd $(DIR_APP) && make $(MAKETUNING) $(EXTRA_MAKE) cd $(DIR_APP) && make $(EXTRA_INSTALL) install diff --git a/src/patches/grep-2.5.1a-redhat_fixes-2.patch b/src/patches/grep-2.5.1a-redhat_fixes-2.patch deleted file mode 100644 index 9c30342299..0000000000 --- a/src/patches/grep-2.5.1a-redhat_fixes-2.patch +++ /dev/null @@ -1,2109 +0,0 @@ -Submitted by: Alexander E. Patrakov -Date: 2005-08-13 -Initial Package Version: 2.5.1a -Upstream Status: Partially accepted, partially rejected, but required for LSB >= 2.0 certification -Origin: RedHat -Description: Various fixes from RedHat. Individual patches: - - grep-2.5.1-fgrep.patch - grep-2.5.1-bracket.patch - grep-2.5-i18n.patch - grep-2.5.1-oi.patch - grep-2.5.1-manpage.patch - grep-2.5.1-color.patch - grep-2.5.1-icolor.patch - grep-2.5.1-egf-speedup.patch - grep-2.5.1-dfa-optional.patch - grep-2.5.1-tests.patch - grep-2.5.1-w.patch - -Testcases: - - -fgrep: ???, but required for other patches - -bracket: echo "[" | LANG=en_US.UTF-8 grep "[[:space:]]" - -i18n: many fixes for multibyte locale support, required for LSB. - -oi: echo xxYYzz | LANG=C grep -i -o yy - -manpage: typo - -color: restore the background color correctly - -icolor: ??? echo 'spam foo SPAM FOO' | grep -i --color spam - (but that's also fixed by -oi. Is this patch just a cleanup?) - -egf-speedup: without this, grep is as slow as a snail in UTF-8 locales. - -dfa-optional: disables dfa in multibyte locales by default. - -w: (echo 'foo';echo 'fo') > /tmp/testfile && grep -F -w fo /tmp/testfile - -diff -urN grep-2.5.1a.orig/doc/grep.1 grep-2.5.1a/doc/grep.1 ---- grep-2.5.1a.orig/doc/grep.1 2004-11-12 16:26:37.000000000 +0500 -+++ grep-2.5.1a/doc/grep.1 2005-10-23 09:49:43.000000000 +0600 -@@ -191,6 +191,7 @@ - .I PATTERN - as a list of fixed strings, separated by newlines, - any of which is to be matched. -+.TP - .BR \-P ", " \-\^\-perl-regexp - Interpret - .I PATTERN -@@ -302,7 +303,7 @@ - This is especially useful for tools like zgrep, e.g. - .B "gzip -cd foo.gz |grep --label=foo something" - .TP --.BR \-\^\-line-buffering -+.BR \-\^\-line-buffered - Use line buffering, it can be a performance penality. - .TP - .BR \-q ", " \-\^\-quiet ", " \-\^\-silent -diff -urN grep-2.5.1a.orig/lib/posix/regex.h grep-2.5.1a/lib/posix/regex.h ---- grep-2.5.1a.orig/lib/posix/regex.h 2001-04-02 23:56:50.000000000 +0600 -+++ grep-2.5.1a/lib/posix/regex.h 2005-10-23 09:49:31.000000000 +0600 -@@ -109,6 +109,10 @@ - If not set, \{, \}, {, and } are literals. */ - #define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) - -+/* If this bit is set, then ignore case when matching. -+ If not set, then case is significant. */ -+#define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1) -+ - /* If this bit is set, +, ? and | aren't recognized as operators. - If not set, they are. */ - #define RE_LIMITED_OPS (RE_INTERVALS << 1) -diff -urN grep-2.5.1a.orig/src/dfa.c grep-2.5.1a/src/dfa.c ---- grep-2.5.1a.orig/src/dfa.c 2001-09-26 22:57:55.000000000 +0600 -+++ grep-2.5.1a/src/dfa.c 2005-10-23 09:49:17.000000000 +0600 -@@ -414,7 +414,7 @@ - - /* This function fetch a wide character, and update cur_mb_len, - used only if the current locale is a multibyte environment. */ --static wchar_t -+static wint_t - fetch_wc (char const *eoferr) - { - wchar_t wc; -@@ -423,7 +423,7 @@ - if (eoferr != 0) - dfaerror (eoferr); - else -- return -1; -+ return WEOF; - } - - cur_mb_len = mbrtowc(&wc, lexptr, lexleft, &mbs); -@@ -459,7 +459,7 @@ - static void - parse_bracket_exp_mb () - { -- wchar_t wc, wc1, wc2; -+ wint_t wc, wc1, wc2; - - /* Work area to build a mb_char_classes. */ - struct mb_char_classes *work_mbc; -@@ -496,7 +496,7 @@ - work_mbc->invert = 0; - do - { -- wc1 = -1; /* mark wc1 is not initialized". */ -+ wc1 = WEOF; /* mark wc1 is not initialized". */ - - /* Note that if we're looking at some other [:...:] construct, - we just treat it as a bunch of ordinary characters. We can do -@@ -586,7 +586,7 @@ - work_mbc->coll_elems[work_mbc->ncoll_elems++] = elem; - } - } -- wc = -1; -+ wc1 = wc = WEOF; - } - else - /* We treat '[' as a normal character here. */ -@@ -600,7 +600,7 @@ - wc = fetch_wc(("Unbalanced [")); - } - -- if (wc1 == -1) -+ if (wc1 == WEOF) - wc1 = fetch_wc(_("Unbalanced [")); - - if (wc1 == L'-') -@@ -630,17 +630,17 @@ - } - REALLOC_IF_NECESSARY(work_mbc->range_sts, wchar_t, - range_sts_al, work_mbc->nranges + 1); -- work_mbc->range_sts[work_mbc->nranges] = wc; -+ work_mbc->range_sts[work_mbc->nranges] = (wchar_t)wc; - REALLOC_IF_NECESSARY(work_mbc->range_ends, wchar_t, - range_ends_al, work_mbc->nranges + 1); -- work_mbc->range_ends[work_mbc->nranges++] = wc2; -+ work_mbc->range_ends[work_mbc->nranges++] = (wchar_t)wc2; - } -- else if (wc != -1) -+ else if (wc != WEOF) - /* build normal characters. */ - { - REALLOC_IF_NECESSARY(work_mbc->chars, wchar_t, chars_al, - work_mbc->nchars + 1); -- work_mbc->chars[work_mbc->nchars++] = wc; -+ work_mbc->chars[work_mbc->nchars++] = (wchar_t)wc; - } - } - while ((wc = wc1) != L']'); -@@ -2552,6 +2552,8 @@ - } - - /* match with a character? */ -+ if (case_fold) -+ wc = towlower (wc); - for (i = 0; inchars; i++) - { - if (wc == work_mbc->chars[i]) -diff -urN grep-2.5.1a.orig/src/grep.c grep-2.5.1a/src/grep.c ---- grep-2.5.1a.orig/src/grep.c 2004-11-12 16:25:35.000000000 +0500 -+++ grep-2.5.1a/src/grep.c 2005-10-23 09:50:06.000000000 +0600 -@@ -30,6 +30,12 @@ - # include - # include - #endif -+#if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC -+/* We can handle multibyte string. */ -+# define MBS_SUPPORT -+# include -+# include -+#endif - #include - #include "system.h" - #include "getopt.h" -@@ -558,33 +564,6 @@ - { - size_t match_size; - size_t match_offset; -- if(match_icase) -- { -- /* Yuck, this is tricky */ -- char *buf = (char*) xmalloc (lim - beg); -- char *ibeg = buf; -- char *ilim = ibeg + (lim - beg); -- int i; -- for (i = 0; i < lim - beg; i++) -- ibeg[i] = tolower (beg[i]); -- while ((match_offset = (*execute) (ibeg, ilim-ibeg, &match_size, 1)) -- != (size_t) -1) -- { -- char const *b = beg + match_offset; -- if (b == lim) -- break; -- fwrite (beg, sizeof (char), match_offset, stdout); -- printf ("\33[%sm", grep_color); -- fwrite (b, sizeof (char), match_size, stdout); -- fputs ("\33[00m", stdout); -- beg = b + match_size; -- ibeg = ibeg + match_offset + match_size; -- } -- fwrite (beg, 1, lim - beg, stdout); -- free (buf); -- lastout = lim; -- return; -- } - while (lim-beg && (match_offset = (*execute) (beg, lim - beg, &match_size, 1)) - != (size_t) -1) - { -@@ -601,6 +580,7 @@ - fputs ("\33[00m", stdout); - beg = b + match_size; - } -+ fputs ("\33[K", stdout); - } - fwrite (beg, 1, lim - beg, stdout); - if (ferror (stdout)) -@@ -1697,6 +1677,37 @@ - if (!install_matcher (matcher) && !install_matcher ("default")) - abort (); - -+#ifdef MBS_SUPPORT -+ if (MB_CUR_MAX != 1 && match_icase) -+ { -+ wchar_t wc; -+ mbstate_t cur_state, prev_state; -+ int i, len = strlen(keys); -+ -+ memset(&cur_state, 0, sizeof(mbstate_t)); -+ for (i = 0; i <= len ;) -+ { -+ size_t mbclen; -+ mbclen = mbrtowc(&wc, keys + i, len - i, &cur_state); -+ if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0) -+ { -+ /* An invalid sequence, or a truncated multibyte character. -+ We treat it as a singlebyte character. */ -+ mbclen = 1; -+ } -+ else -+ { -+ if (iswupper((wint_t)wc)) -+ { -+ wc = towlower((wint_t)wc); -+ wcrtomb(keys + i, wc, &cur_state); -+ } -+ } -+ i += mbclen; -+ } -+ } -+#endif /* MBS_SUPPORT */ -+ - (*compile)(keys, keycc); - - if ((argc - optind > 1 && !no_filenames) || with_filenames) -diff -urN grep-2.5.1a.orig/src/search.c grep-2.5.1a/src/search.c ---- grep-2.5.1a.orig/src/search.c 2001-04-19 09:42:14.000000000 +0600 -+++ grep-2.5.1a/src/search.c 2005-10-23 09:51:25.000000000 +0600 -@@ -18,9 +18,13 @@ - - /* Written August 1992 by Mike Haertel. */ - -+#ifndef _GNU_SOURCE -+# define _GNU_SOURCE 1 -+#endif - #ifdef HAVE_CONFIG_H - # include - #endif -+#include - #include - #if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC - /* We can handle multibyte string. */ -@@ -31,7 +35,7 @@ - - #include "system.h" - #include "grep.h" --#include "regex.h" -+#include - #include "dfa.h" - #include "kwset.h" - #include "error.h" -@@ -39,6 +43,9 @@ - #ifdef HAVE_LIBPCRE - # include - #endif -+#ifdef HAVE_LANGINFO_CODESET -+# include -+#endif - - #define NCHAR (UCHAR_MAX + 1) - -@@ -70,9 +77,10 @@ - call the regexp matcher at all. */ - static int kwset_exact_matches; - --#if defined(MBS_SUPPORT) --static char* check_multibyte_string PARAMS ((char const *buf, size_t size)); --#endif -+/* UTF-8 encoding allows some optimizations that we can't otherwise -+ assume in a multibyte encoding. */ -+static int using_utf8; -+ - static void kwsinit PARAMS ((void)); - static void kwsmusts PARAMS ((void)); - static void Gcompile PARAMS ((char const *, size_t)); -@@ -84,6 +92,15 @@ - static size_t Pexecute PARAMS ((char const *, size_t, size_t *, int)); - - void -+check_utf8 (void) -+{ -+#ifdef HAVE_LANGINFO_CODESET -+ if (strcmp (nl_langinfo (CODESET), "UTF-8") == 0) -+ using_utf8 = 1; -+#endif -+} -+ -+void - dfaerror (char const *mesg) - { - error (2, 0, mesg); -@@ -141,38 +158,6 @@ - } - } - --#ifdef MBS_SUPPORT --/* This function allocate the array which correspond to "buf". -- Then this check multibyte string and mark on the positions which -- are not singlebyte character nor the first byte of a multibyte -- character. Caller must free the array. */ --static char* --check_multibyte_string(char const *buf, size_t size) --{ -- char *mb_properties = malloc(size); -- mbstate_t cur_state; -- int i; -- memset(&cur_state, 0, sizeof(mbstate_t)); -- memset(mb_properties, 0, sizeof(char)*size); -- for (i = 0; i < size ;) -- { -- size_t mbclen; -- mbclen = mbrlen(buf + i, size - i, &cur_state); -- -- if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0) -- { -- /* An invalid sequence, or a truncated multibyte character. -- We treat it as a singlebyte character. */ -- mbclen = 1; -- } -- mb_properties[i] = mbclen; -- i += mbclen; -- } -- -- return mb_properties; --} --#endif -- - static void - Gcompile (char const *pattern, size_t size) - { -@@ -181,7 +166,8 @@ - size_t total = size; - char const *motif = pattern; - -- re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE); -+ check_utf8 (); -+ re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE | (match_icase ? RE_ICASE : 0)); - dfasyntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase, eolbyte); - - /* For GNU regex compiler we have to pass the patterns separately to detect -@@ -233,7 +219,7 @@ - static char const line_end[] = "\\)$"; - static char const word_beg[] = "\\(^\\|[^[:alnum:]_]\\)\\("; - static char const word_end[] = "\\)\\([^[:alnum:]_]\\|$\\)"; -- char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end); -+ char *n = xmalloc (sizeof word_beg - 1 + size + sizeof word_end); - size_t i; - strcpy (n, match_lines ? line_beg : word_beg); - i = strlen (n); -@@ -257,14 +243,15 @@ - size_t total = size; - char const *motif = pattern; - -+ check_utf8 (); - if (strcmp (matcher, "awk") == 0) - { -- re_set_syntax (RE_SYNTAX_AWK); -+ re_set_syntax (RE_SYNTAX_AWK | (match_icase ? RE_ICASE : 0)); - dfasyntax (RE_SYNTAX_AWK, match_icase, eolbyte); - } - else - { -- re_set_syntax (RE_SYNTAX_POSIX_EGREP); -+ re_set_syntax (RE_SYNTAX_POSIX_EGREP | (match_icase ? RE_ICASE : 0)); - dfasyntax (RE_SYNTAX_POSIX_EGREP, match_icase, eolbyte); - } - -@@ -316,7 +303,7 @@ - static char const line_end[] = ")$"; - static char const word_beg[] = "(^|[^[:alnum:]_])("; - static char const word_end[] = ")([^[:alnum:]_]|$)"; -- char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end); -+ char *n = xmalloc (sizeof word_beg - 1 + size + sizeof word_end); - size_t i; - strcpy (n, match_lines ? line_beg : word_beg); - i = strlen(n); -@@ -339,15 +326,35 @@ - char eol = eolbyte; - int backref, start, len; - struct kwsmatch kwsm; -- size_t i; -+ size_t i, ret_val; -+ static int use_dfa; -+ static int use_dfa_checked = 0; - #ifdef MBS_SUPPORT -- char *mb_properties = NULL; -+ const char *last_char = NULL; -+ int mb_cur_max = MB_CUR_MAX; -+ mbstate_t mbs; -+ memset (&mbs, '\0', sizeof (mbstate_t)); - #endif /* MBS_SUPPORT */ - -+ if (!use_dfa_checked) -+ { -+ char *grep_use_dfa = getenv ("GREP_USE_DFA"); -+ if (!grep_use_dfa) -+ { - #ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1 && kwset) -- mb_properties = check_multibyte_string(buf, size); -+ /* Turn off DFA when processing multibyte input. */ -+ use_dfa = (MB_CUR_MAX == 1); -+#else -+ use_dfa = 1; - #endif /* MBS_SUPPORT */ -+ } -+ else -+ { -+ use_dfa = atoi (grep_use_dfa); -+ } -+ -+ use_dfa_checked = 1; -+ } - - buflim = buf + size; - -@@ -358,47 +365,124 @@ - if (kwset) - { - /* Find a possible match using the KWset matcher. */ -- size_t offset = kwsexec (kwset, beg, buflim - beg, &kwsm); -+#ifdef MBS_SUPPORT -+ size_t bytes_left = 0; -+#endif /* MBS_SUPPORT */ -+ size_t offset; -+#ifdef MBS_SUPPORT -+ /* kwsexec doesn't work with match_icase and multibyte input. */ -+ if (match_icase && mb_cur_max > 1) -+ /* Avoid kwset */ -+ offset = 0; -+ else -+#endif /* MBS_SUPPORT */ -+ offset = kwsexec (kwset, beg, buflim - beg, &kwsm); - if (offset == (size_t) -1) -- { -+ goto failure; - #ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1) -- free(mb_properties); --#endif -- return (size_t)-1; -+ if (mb_cur_max > 1 && !using_utf8) -+ { -+ bytes_left = offset; -+ while (bytes_left) -+ { -+ size_t mlen = mbrlen (beg, bytes_left, &mbs); -+ -+ last_char = beg; -+ if (mlen == (size_t) -1 || mlen == 0) -+ { -+ /* Incomplete character: treat as single-byte. */ -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ beg++; -+ bytes_left--; -+ continue; -+ } -+ -+ if (mlen == (size_t) -2) -+ /* Offset points inside multibyte character: -+ * no good. */ -+ break; -+ -+ beg += mlen; -+ bytes_left -= mlen; -+ } - } -+ else -+#endif /* MBS_SUPPORT */ - beg += offset; - /* Narrow down to the line containing the candidate, and - run it through DFA. */ - end = memchr(beg, eol, buflim - beg); - end++; - #ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1 && mb_properties[beg - buf] == 0) -+ if (mb_cur_max > 1 && bytes_left) - continue; --#endif -+#endif /* MBS_SUPPORT */ - while (beg > buf && beg[-1] != eol) - --beg; -- if (kwsm.index < kwset_exact_matches) -- goto success; -- if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1) -+ if ( -+#ifdef MBS_SUPPORT -+ !(match_icase && mb_cur_max > 1) && -+#endif /* MBS_SUPPORT */ -+ (kwsm.index < kwset_exact_matches)) -+ goto success_in_beg_and_end; -+ if (use_dfa && -+ dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1) - continue; - } - else - { - /* No good fixed strings; start with DFA. */ -- size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref); -+#ifdef MBS_SUPPORT -+ size_t bytes_left = 0; -+#endif /* MBS_SUPPORT */ -+ size_t offset = 0; -+ if (use_dfa) -+ offset = dfaexec (&dfa, beg, buflim - beg, &backref); - if (offset == (size_t) -1) - break; - /* Narrow down to the line we've found. */ -+#ifdef MBS_SUPPORT -+ if (mb_cur_max > 1 && !using_utf8) -+ { -+ bytes_left = offset; -+ while (bytes_left) -+ { -+ size_t mlen = mbrlen (beg, bytes_left, &mbs); -+ -+ last_char = beg; -+ if (mlen == (size_t) -1 || mlen == 0) -+ { -+ /* Incomplete character: treat as single-byte. */ -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ beg++; -+ bytes_left--; -+ continue; -+ } -+ -+ if (mlen == (size_t) -2) -+ /* Offset points inside multibyte character: -+ * no good. */ -+ break; -+ -+ beg += mlen; -+ bytes_left -= mlen; -+ } -+ } -+ else -+#endif /* MBS_SUPPORT */ - beg += offset; - end = memchr (beg, eol, buflim - beg); - end++; -+#ifdef MBS_SUPPORT -+ if (mb_cur_max > 1 && bytes_left) -+ continue; -+#endif /* MBS_SUPPORT */ - while (beg > buf && beg[-1] != eol) - --beg; - } - /* Successful, no backreferences encountered! */ -- if (!backref) -- goto success; -+ if (use_dfa && !backref) -+ goto success_in_beg_and_end; - } - else - end = beg + size; -@@ -413,14 +497,11 @@ - end - beg - 1, &(patterns[i].regs)))) - { - len = patterns[i].regs.end[0] - start; -- if (exact) -- { -- *match_size = len; -- return start; -- } -+ if (exact && !match_words) -+ goto success_in_start_and_len; - if ((!match_lines && !match_words) - || (match_lines && len == end - beg - 1)) -- goto success; -+ goto success_in_beg_and_end; - /* If -w, check if the match aligns with word boundaries. - We do this iteratively because: - (a) the line may contain more than one occurence of the -@@ -431,10 +512,84 @@ - if (match_words) - while (start >= 0) - { -- if ((start == 0 || !WCHAR ((unsigned char) beg[start - 1])) -- && (len == end - beg - 1 -- || !WCHAR ((unsigned char) beg[start + len]))) -- goto success; -+ int lword_match = 0; -+ if (start == 0) -+ lword_match = 1; -+ else -+ { -+ assert (start > 0); -+#ifdef MBS_SUPPORT -+ if (mb_cur_max > 1) -+ { -+ const char *s; -+ int mr; -+ wchar_t pwc; -+ -+ if (using_utf8) -+ { -+ s = beg + start - 1; -+ while (s > buf -+ && (unsigned char) *s >= 0x80 -+ && (unsigned char) *s <= 0xbf) -+ --s; -+ } -+ else -+ s = last_char; -+ mr = mbtowc (&pwc, s, beg + start - s); -+ if (mr <= 0) -+ { -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ lword_match = 1; -+ } -+ else if (!(iswalnum (pwc) || pwc == L'_') -+ && mr == (int) (beg + start - s)) -+ lword_match = 1; -+ } -+ else -+#endif /* MBS_SUPPORT */ -+ if (!WCHAR ((unsigned char) beg[start - 1])) -+ lword_match = 1; -+ } -+ -+ if (lword_match) -+ { -+ int rword_match = 0; -+ if (start + len == end - beg - 1) -+ rword_match = 1; -+ else -+ { -+#ifdef MBS_SUPPORT -+ if (mb_cur_max > 1) -+ { -+ wchar_t nwc; -+ int mr; -+ -+ mr = mbtowc (&nwc, beg + start + len, -+ end - beg - start - len - 1); -+ if (mr <= 0) -+ { -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ rword_match = 1; -+ } -+ else if (!iswalnum (nwc) && nwc != L'_') -+ rword_match = 1; -+ } -+ else -+#endif /* MBS_SUPPORT */ -+ if (!WCHAR ((unsigned char) beg[start + len])) -+ rword_match = 1; -+ } -+ -+ if (rword_match) -+ { -+ if (!exact) -+ /* Returns the whole line. */ -+ goto success_in_beg_and_end; -+ else -+ /* Returns just this word match. */ -+ goto success_in_start_and_len; -+ } -+ } - if (len > 0) - { - /* Try a shorter length anchored at the same place. */ -@@ -461,26 +616,154 @@ - } - } /* for Regex patterns. */ - } /* for (beg = end ..) */ --#ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1 && mb_properties) -- free (mb_properties); --#endif /* MBS_SUPPORT */ -+ -+ failure: - return (size_t) -1; - -- success: --#ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1 && mb_properties) -- free (mb_properties); --#endif /* MBS_SUPPORT */ -- *match_size = end - beg; -- return beg - buf; -+ success_in_beg_and_end: -+ len = end - beg; -+ start = beg - buf; -+ /* FALLTHROUGH */ -+ -+ success_in_start_and_len: -+ *match_size = len; -+ return start; - } - -+#ifdef MBS_SUPPORT -+static int f_i_multibyte; /* whether we're using the new -Fi MB method */ -+static struct -+{ -+ wchar_t **patterns; -+ size_t count, maxlen; -+ unsigned char *match; -+} Fimb; -+#endif -+ - static void - Fcompile (char const *pattern, size_t size) - { -+ int mb_cur_max = MB_CUR_MAX; - char const *beg, *lim, *err; - -+ check_utf8 (); -+#ifdef MBS_SUPPORT -+ /* Support -F -i for UTF-8 input. */ -+ if (match_icase && mb_cur_max > 1) -+ { -+ mbstate_t mbs; -+ wchar_t *wcpattern = xmalloc ((size + 1) * sizeof (wchar_t)); -+ const char *patternend = pattern; -+ size_t wcsize; -+ kwset_t fimb_kwset = NULL; -+ char *starts = NULL; -+ wchar_t *wcbeg, *wclim; -+ size_t allocated = 0; -+ -+ memset (&mbs, '\0', sizeof (mbs)); -+# ifdef __GNU_LIBRARY__ -+ wcsize = mbsnrtowcs (wcpattern, &patternend, size, size, &mbs); -+ if (patternend != pattern + size) -+ wcsize = (size_t) -1; -+# else -+ { -+ char *patterncopy = xmalloc (size + 1); -+ -+ memcpy (patterncopy, pattern, size); -+ patterncopy[size] = '\0'; -+ patternend = patterncopy; -+ wcsize = mbsrtowcs (wcpattern, &patternend, size, &mbs); -+ if (patternend != patterncopy + size) -+ wcsize = (size_t) -1; -+ free (patterncopy); -+ } -+# endif -+ if (wcsize + 2 <= 2) -+ { -+fimb_fail: -+ free (wcpattern); -+ free (starts); -+ if (fimb_kwset) -+ kwsfree (fimb_kwset); -+ free (Fimb.patterns); -+ Fimb.patterns = NULL; -+ } -+ else -+ { -+ if (!(fimb_kwset = kwsalloc (NULL))) -+ error (2, 0, _("memory exhausted")); -+ -+ starts = xmalloc (mb_cur_max * 3); -+ wcbeg = wcpattern; -+ do -+ { -+ int i; -+ size_t wclen; -+ -+ if (Fimb.count >= allocated) -+ { -+ if (allocated == 0) -+ allocated = 128; -+ else -+ allocated *= 2; -+ Fimb.patterns = xrealloc (Fimb.patterns, -+ sizeof (wchar_t *) * allocated); -+ } -+ Fimb.patterns[Fimb.count++] = wcbeg; -+ for (wclim = wcbeg; -+ wclim < wcpattern + wcsize && *wclim != L'\n'; ++wclim) -+ *wclim = towlower (*wclim); -+ *wclim = L'\0'; -+ wclen = wclim - wcbeg; -+ if (wclen > Fimb.maxlen) -+ Fimb.maxlen = wclen; -+ if (wclen > 3) -+ wclen = 3; -+ if (wclen == 0) -+ { -+ if ((err = kwsincr (fimb_kwset, "", 0)) != 0) -+ error (2, 0, err); -+ } -+ else -+ for (i = 0; i < (1 << wclen); i++) -+ { -+ char *p = starts; -+ int j, k; -+ -+ for (j = 0; j < wclen; ++j) -+ { -+ wchar_t wc = wcbeg[j]; -+ if (i & (1 << j)) -+ { -+ wc = towupper (wc); -+ if (wc == wcbeg[j]) -+ continue; -+ } -+ k = wctomb (p, wc); -+ if (k <= 0) -+ goto fimb_fail; -+ p += k; -+ } -+ if ((err = kwsincr (fimb_kwset, starts, p - starts)) != 0) -+ error (2, 0, err); -+ } -+ if (wclim < wcpattern + wcsize) -+ ++wclim; -+ wcbeg = wclim; -+ } -+ while (wcbeg < wcpattern + wcsize); -+ f_i_multibyte = 1; -+ kwset = fimb_kwset; -+ free (starts); -+ Fimb.match = xmalloc (Fimb.count); -+ if ((err = kwsprep (kwset)) != 0) -+ error (2, 0, err); -+ return; -+ } -+ } -+#endif /* MBS_SUPPORT */ -+ -+ - kwsinit (); - beg = pattern; - do -@@ -499,6 +782,76 @@ - error (2, 0, err); - } - -+#ifdef MBS_SUPPORT -+static int -+Fimbexec (const char *buf, size_t size, size_t *plen, int exact) -+{ -+ size_t len, letter, i; -+ int ret = -1; -+ mbstate_t mbs; -+ wchar_t wc; -+ int patterns_left; -+ -+ assert (match_icase && f_i_multibyte == 1); -+ assert (MB_CUR_MAX > 1); -+ -+ memset (&mbs, '\0', sizeof (mbs)); -+ memset (Fimb.match, '\1', Fimb.count); -+ letter = len = 0; -+ patterns_left = 1; -+ while (patterns_left && len <= size) -+ { -+ size_t c; -+ -+ patterns_left = 0; -+ if (len < size) -+ { -+ c = mbrtowc (&wc, buf + len, size - len, &mbs); -+ if (c + 2 <= 2) -+ return ret; -+ -+ wc = towlower (wc); -+ } -+ else -+ { -+ c = 1; -+ wc = L'\0'; -+ } -+ -+ for (i = 0; i < Fimb.count; i++) -+ { -+ if (Fimb.match[i]) -+ { -+ if (Fimb.patterns[i][letter] == L'\0') -+ { -+ /* Found a match. */ -+ *plen = len; -+ if (!exact && !match_words) -+ return 0; -+ else -+ { -+ /* For -w or exact look for longest match. */ -+ ret = 0; -+ Fimb.match[i] = '\0'; -+ continue; -+ } -+ } -+ -+ if (Fimb.patterns[i][letter] == wc) -+ patterns_left = 1; -+ else -+ Fimb.match[i] = '\0'; -+ } -+ } -+ -+ len += c; -+ letter++; -+ } -+ -+ return ret; -+} -+#endif /* MBS_SUPPORT */ -+ - static size_t - Fexecute (char const *buf, size_t size, size_t *match_size, int exact) - { -@@ -506,88 +859,268 @@ - register size_t len; - char eol = eolbyte; - struct kwsmatch kwsmatch; -+ size_t ret_val; - #ifdef MBS_SUPPORT -- char *mb_properties; -- if (MB_CUR_MAX > 1) -- mb_properties = check_multibyte_string (buf, size); -+ int mb_cur_max = MB_CUR_MAX; -+ mbstate_t mbs; -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ const char *last_char = NULL; - #endif /* MBS_SUPPORT */ - - for (beg = buf; beg <= buf + size; ++beg) - { -- size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch); -+ size_t offset; -+ offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch); -+ - if (offset == (size_t) -1) -- { -+ goto failure; - #ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1) -- free(mb_properties); --#endif /* MBS_SUPPORT */ -- return offset; -+ if (mb_cur_max > 1 && !using_utf8) -+ { -+ size_t bytes_left = offset; -+ while (bytes_left) -+ { -+ size_t mlen = mbrlen (beg, bytes_left, &mbs); -+ -+ last_char = beg; -+ if (mlen == (size_t) -1 || mlen == 0) -+ { -+ /* Incomplete character: treat as single-byte. */ -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ beg++; -+ bytes_left--; -+ continue; -+ } -+ -+ if (mlen == (size_t) -2) -+ /* Offset points inside multibyte character: no good. */ -+ break; -+ -+ beg += mlen; -+ bytes_left -= mlen; -+ } -+ -+ if (bytes_left) -+ continue; - } --#ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0) -- continue; /* It is a part of multibyte character. */ -+ else - #endif /* MBS_SUPPORT */ - beg += offset; -- len = kwsmatch.size[0]; -- if (exact) -- { -- *match_size = len; - #ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1) -- free (mb_properties); -+ /* For f_i_multibyte, the string at beg now matches first 3 chars of -+ one of the search strings (less if there are shorter search strings). -+ See if this is a real match. */ -+ if (f_i_multibyte -+ && Fimbexec (beg, buf + size - beg, &kwsmatch.size[0], exact)) -+ goto next_char; - #endif /* MBS_SUPPORT */ -- return beg - buf; -- } -+ len = kwsmatch.size[0]; -+ if (exact && !match_words) -+ goto success_in_beg_and_len; - if (match_lines) - { - if (beg > buf && beg[-1] != eol) -- continue; -+ goto next_char; - if (beg + len < buf + size && beg[len] != eol) -- continue; -+ goto next_char; - goto success; - } - else if (match_words) -- for (try = beg; len; ) -- { -- if (try > buf && WCHAR((unsigned char) try[-1])) -- break; -- if (try + len < buf + size && WCHAR((unsigned char) try[len])) -- { -- offset = kwsexec (kwset, beg, --len, &kwsmatch); -- if (offset == (size_t) -1) -- { -+ { -+ while (len) -+ { -+ int word_match = 0; -+ if (beg > buf) -+ { - #ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1) -- free (mb_properties); -+ if (mb_cur_max > 1) -+ { -+ const char *s; -+ int mr; -+ wchar_t pwc; -+ -+ if (using_utf8) -+ { -+ s = beg - 1; -+ while (s > buf -+ && (unsigned char) *s >= 0x80 -+ && (unsigned char) *s <= 0xbf) -+ --s; -+ } -+ else -+ s = last_char; -+ mr = mbtowc (&pwc, s, beg - s); -+ if (mr <= 0) -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ else if ((iswalnum (pwc) || pwc == L'_') -+ && mr == (int) (beg - s)) -+ goto next_char; -+ } -+ else - #endif /* MBS_SUPPORT */ -- return offset; -- } -- try = beg + offset; -- len = kwsmatch.size[0]; -- } -- else -- goto success; -- } -+ if (WCHAR ((unsigned char) beg[-1])) -+ goto next_char; -+ } -+#ifdef MBS_SUPPORT -+ if (mb_cur_max > 1) -+ { -+ wchar_t nwc; -+ int mr; -+ -+ mr = mbtowc (&nwc, beg + len, buf + size - beg - len); -+ if (mr <= 0) -+ { -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ word_match = 1; -+ } -+ else if (!iswalnum (nwc) && nwc != L'_') -+ word_match = 1; -+ } -+ else -+#endif /* MBS_SUPPORT */ -+ if (beg + len >= buf + size || !WCHAR ((unsigned char) beg[len])) -+ word_match = 1; -+ if (word_match) -+ { -+ if (!exact) -+ /* Returns the whole line now we know there's a word match. */ -+ goto success; -+ else -+ /* Returns just this word match. */ -+ goto success_in_beg_and_len; -+ } -+ if (len > 0) -+ { -+ /* Try a shorter length anchored at the same place. */ -+ --len; -+ offset = kwsexec (kwset, beg, len, &kwsmatch); -+ -+ if (offset == -1) -+ goto next_char; /* Try a different anchor. */ -+#ifdef MBS_SUPPORT -+ if (mb_cur_max > 1 && !using_utf8) -+ { -+ size_t bytes_left = offset; -+ while (bytes_left) -+ { -+ size_t mlen = mbrlen (beg, bytes_left, &mbs); -+ -+ last_char = beg; -+ if (mlen == (size_t) -1 || mlen == 0) -+ { -+ /* Incomplete character: treat as single-byte. */ -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ beg++; -+ bytes_left--; -+ continue; -+ } -+ -+ if (mlen == (size_t) -2) -+ { -+ /* Offset points inside multibyte character: -+ * no good. */ -+ break; -+ } -+ -+ beg += mlen; -+ bytes_left -= mlen; -+ } -+ -+ if (bytes_left) -+ { -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ goto next_char; /* Try a different anchor. */ -+ } -+ } -+ else -+#endif /* MBS_SUPPORT */ -+ beg += offset; -+#ifdef MBS_SUPPORT -+ /* The string at beg now matches first 3 chars of one of -+ the search strings (less if there are shorter search -+ strings). See if this is a real match. */ -+ if (f_i_multibyte -+ && Fimbexec (beg, len - offset, &kwsmatch.size[0], -+ exact)) -+ goto next_char; -+#endif /* MBS_SUPPORT */ -+ len = kwsmatch.size[0]; -+ } -+ } -+ } - else - goto success; -- } -- -+next_char:; - #ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1) -- free (mb_properties); -+ /* Advance to next character. For MB_CUR_MAX == 1 case this is handled -+ by ++beg above. */ -+ if (mb_cur_max > 1) -+ { -+ if (using_utf8) -+ { -+ unsigned char c = *beg; -+ if (c >= 0xc2) -+ { -+ if (c < 0xe0) -+ ++beg; -+ else if (c < 0xf0) -+ beg += 2; -+ else if (c < 0xf8) -+ beg += 3; -+ else if (c < 0xfc) -+ beg += 4; -+ else if (c < 0xfe) -+ beg += 5; -+ } -+ } -+ else -+ { -+ size_t l = mbrlen (beg, buf + size - beg, &mbs); -+ -+ last_char = beg; -+ if (l + 2 >= 2) -+ beg += l - 1; -+ else -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ } -+ } - #endif /* MBS_SUPPORT */ -+ } -+ -+ failure: - return -1; - - success: -+#ifdef MBS_SUPPORT -+ if (mb_cur_max > 1 && !using_utf8) -+ { -+ end = beg + len; -+ while (end < buf + size) -+ { -+ size_t mlen = mbrlen (end, buf + size - end, &mbs); -+ if (mlen == (size_t) -1 || mlen == (size_t) -2 || mlen == 0) -+ { -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ mlen = 1; -+ } -+ if (mlen == 1 && *end == eol) -+ break; -+ -+ end += mlen; -+ } -+ } -+ else -+#endif /* MBS_SUPPORT */ - end = memchr (beg + len, eol, (buf + size) - (beg + len)); -+ - end++; - while (buf < beg && beg[-1] != eol) - --beg; -- *match_size = end - beg; --#ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1) -- free (mb_properties); --#endif /* MBS_SUPPORT */ -+ len = end - beg; -+ /* FALLTHROUGH */ -+ -+ success_in_beg_and_len: -+ *match_size = len; - return beg - buf; - } - -diff -urN grep-2.5.1a.orig/src/search.c.orig grep-2.5.1a/src/search.c.orig ---- grep-2.5.1a.orig/src/search.c.orig 1970-01-01 05:00:00.000000000 +0500 -+++ grep-2.5.1a/src/search.c.orig 2005-10-23 09:48:39.000000000 +0600 -@@ -0,0 +1,714 @@ -+/* search.c - searching subroutines using dfa, kwset and regex for grep. -+ Copyright 1992, 1998, 2000 Free Software Foundation, Inc. -+ -+ This program is free software; you can redistribute it and/or modify -+ it under the terms of the GNU General Public License as published by -+ the Free Software Foundation; either version 2, or (at your option) -+ any later version. -+ -+ This program is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ GNU General Public License for more details. -+ -+ You should have received a copy of the GNU General Public License -+ along with this program; if not, write to the Free Software -+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA -+ 02111-1307, USA. */ -+ -+/* Written August 1992 by Mike Haertel. */ -+ -+#ifdef HAVE_CONFIG_H -+# include -+#endif -+#include -+#if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC -+/* We can handle multibyte string. */ -+# define MBS_SUPPORT -+# include -+# include -+#endif -+ -+#include "system.h" -+#include "grep.h" -+#include "regex.h" -+#include "dfa.h" -+#include "kwset.h" -+#include "error.h" -+#include "xalloc.h" -+#ifdef HAVE_LIBPCRE -+# include -+#endif -+ -+#define NCHAR (UCHAR_MAX + 1) -+ -+/* For -w, we also consider _ to be word constituent. */ -+#define WCHAR(C) (ISALNUM(C) || (C) == '_') -+ -+/* DFA compiled regexp. */ -+static struct dfa dfa; -+ -+/* The Regex compiled patterns. */ -+static struct patterns -+{ -+ /* Regex compiled regexp. */ -+ struct re_pattern_buffer regexbuf; -+ struct re_registers regs; /* This is here on account of a BRAIN-DEAD -+ Q@#%!# library interface in regex.c. */ -+} patterns0; -+ -+struct patterns *patterns; -+size_t pcount; -+ -+/* KWset compiled pattern. For Ecompile and Gcompile, we compile -+ a list of strings, at least one of which is known to occur in -+ any string matching the regexp. */ -+static kwset_t kwset; -+ -+/* Number of compiled fixed strings known to exactly match the regexp. -+ If kwsexec returns < kwset_exact_matches, then we don't need to -+ call the regexp matcher at all. */ -+static int kwset_exact_matches; -+ -+#if defined(MBS_SUPPORT) -+static char* check_multibyte_string PARAMS ((char const *buf, size_t size)); -+#endif -+static void kwsinit PARAMS ((void)); -+static void kwsmusts PARAMS ((void)); -+static void Gcompile PARAMS ((char const *, size_t)); -+static void Ecompile PARAMS ((char const *, size_t)); -+static size_t EGexecute PARAMS ((char const *, size_t, size_t *, int )); -+static void Fcompile PARAMS ((char const *, size_t)); -+static size_t Fexecute PARAMS ((char const *, size_t, size_t *, int)); -+static void Pcompile PARAMS ((char const *, size_t )); -+static size_t Pexecute PARAMS ((char const *, size_t, size_t *, int)); -+ -+void -+dfaerror (char const *mesg) -+{ -+ error (2, 0, mesg); -+} -+ -+static void -+kwsinit (void) -+{ -+ static char trans[NCHAR]; -+ int i; -+ -+ if (match_icase) -+ for (i = 0; i < NCHAR; ++i) -+ trans[i] = TOLOWER (i); -+ -+ if (!(kwset = kwsalloc (match_icase ? trans : (char *) 0))) -+ error (2, 0, _("memory exhausted")); -+} -+ -+/* If the DFA turns out to have some set of fixed strings one of -+ which must occur in the match, then we build a kwset matcher -+ to find those strings, and thus quickly filter out impossible -+ matches. */ -+static void -+kwsmusts (void) -+{ -+ struct dfamust const *dm; -+ char const *err; -+ -+ if (dfa.musts) -+ { -+ kwsinit (); -+ /* First, we compile in the substrings known to be exact -+ matches. The kwset matcher will return the index -+ of the matching string that it chooses. */ -+ for (dm = dfa.musts; dm; dm = dm->next) -+ { -+ if (!dm->exact) -+ continue; -+ ++kwset_exact_matches; -+ if ((err = kwsincr (kwset, dm->must, strlen (dm->must))) != 0) -+ error (2, 0, err); -+ } -+ /* Now, we compile the substrings that will require -+ the use of the regexp matcher. */ -+ for (dm = dfa.musts; dm; dm = dm->next) -+ { -+ if (dm->exact) -+ continue; -+ if ((err = kwsincr (kwset, dm->must, strlen (dm->must))) != 0) -+ error (2, 0, err); -+ } -+ if ((err = kwsprep (kwset)) != 0) -+ error (2, 0, err); -+ } -+} -+ -+#ifdef MBS_SUPPORT -+/* This function allocate the array which correspond to "buf". -+ Then this check multibyte string and mark on the positions which -+ are not singlebyte character nor the first byte of a multibyte -+ character. Caller must free the array. */ -+static char* -+check_multibyte_string(char const *buf, size_t size) -+{ -+ char *mb_properties = malloc(size); -+ mbstate_t cur_state; -+ int i; -+ memset(&cur_state, 0, sizeof(mbstate_t)); -+ memset(mb_properties, 0, sizeof(char)*size); -+ for (i = 0; i < size ;) -+ { -+ size_t mbclen; -+ mbclen = mbrlen(buf + i, size - i, &cur_state); -+ -+ if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0) -+ { -+ /* An invalid sequence, or a truncated multibyte character. -+ We treat it as a singlebyte character. */ -+ mbclen = 1; -+ } -+ mb_properties[i] = mbclen; -+ i += mbclen; -+ } -+ -+ return mb_properties; -+} -+#endif -+ -+static void -+Gcompile (char const *pattern, size_t size) -+{ -+ const char *err; -+ char const *sep; -+ size_t total = size; -+ char const *motif = pattern; -+ -+ re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE); -+ dfasyntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase, eolbyte); -+ -+ /* For GNU regex compiler we have to pass the patterns separately to detect -+ errors like "[\nallo\n]\n". The patterns here are "[", "allo" and "]" -+ GNU regex should have raise a syntax error. The same for backref, where -+ the backref should have been local to each pattern. */ -+ do -+ { -+ size_t len; -+ sep = memchr (motif, '\n', total); -+ if (sep) -+ { -+ len = sep - motif; -+ sep++; -+ total -= (len + 1); -+ } -+ else -+ { -+ len = total; -+ total = 0; -+ } -+ -+ patterns = realloc (patterns, (pcount + 1) * sizeof (*patterns)); -+ if (patterns == NULL) -+ error (2, errno, _("memory exhausted")); -+ -+ patterns[pcount] = patterns0; -+ -+ if ((err = re_compile_pattern (motif, len, -+ &(patterns[pcount].regexbuf))) != 0) -+ error (2, 0, err); -+ pcount++; -+ -+ motif = sep; -+ } while (sep && total != 0); -+ -+ /* In the match_words and match_lines cases, we use a different pattern -+ for the DFA matcher that will quickly throw out cases that won't work. -+ Then if DFA succeeds we do some hairy stuff using the regex matcher -+ to decide whether the match should really count. */ -+ if (match_words || match_lines) -+ { -+ /* In the whole-word case, we use the pattern: -+ \(^\|[^[:alnum:]_]\)\(userpattern\)\([^[:alnum:]_]|$\). -+ In the whole-line case, we use the pattern: -+ ^\(userpattern\)$. */ -+ -+ static char const line_beg[] = "^\\("; -+ static char const line_end[] = "\\)$"; -+ static char const word_beg[] = "\\(^\\|[^[:alnum:]_]\\)\\("; -+ static char const word_end[] = "\\)\\([^[:alnum:]_]\\|$\\)"; -+ char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end); -+ size_t i; -+ strcpy (n, match_lines ? line_beg : word_beg); -+ i = strlen (n); -+ memcpy (n + i, pattern, size); -+ i += size; -+ strcpy (n + i, match_lines ? line_end : word_end); -+ i += strlen (n + i); -+ pattern = n; -+ size = i; -+ } -+ -+ dfacomp (pattern, size, &dfa, 1); -+ kwsmusts (); -+} -+ -+static void -+Ecompile (char const *pattern, size_t size) -+{ -+ const char *err; -+ const char *sep; -+ size_t total = size; -+ char const *motif = pattern; -+ -+ if (strcmp (matcher, "awk") == 0) -+ { -+ re_set_syntax (RE_SYNTAX_AWK); -+ dfasyntax (RE_SYNTAX_AWK, match_icase, eolbyte); -+ } -+ else -+ { -+ re_set_syntax (RE_SYNTAX_POSIX_EGREP); -+ dfasyntax (RE_SYNTAX_POSIX_EGREP, match_icase, eolbyte); -+ } -+ -+ /* For GNU regex compiler we have to pass the patterns separately to detect -+ errors like "[\nallo\n]\n". The patterns here are "[", "allo" and "]" -+ GNU regex should have raise a syntax error. The same for backref, where -+ the backref should have been local to each pattern. */ -+ do -+ { -+ size_t len; -+ sep = memchr (motif, '\n', total); -+ if (sep) -+ { -+ len = sep - motif; -+ sep++; -+ total -= (len + 1); -+ } -+ else -+ { -+ len = total; -+ total = 0; -+ } -+ -+ patterns = realloc (patterns, (pcount + 1) * sizeof (*patterns)); -+ if (patterns == NULL) -+ error (2, errno, _("memory exhausted")); -+ patterns[pcount] = patterns0; -+ -+ if ((err = re_compile_pattern (motif, len, -+ &(patterns[pcount].regexbuf))) != 0) -+ error (2, 0, err); -+ pcount++; -+ -+ motif = sep; -+ } while (sep && total != 0); -+ -+ /* In the match_words and match_lines cases, we use a different pattern -+ for the DFA matcher that will quickly throw out cases that won't work. -+ Then if DFA succeeds we do some hairy stuff using the regex matcher -+ to decide whether the match should really count. */ -+ if (match_words || match_lines) -+ { -+ /* In the whole-word case, we use the pattern: -+ (^|[^[:alnum:]_])(userpattern)([^[:alnum:]_]|$). -+ In the whole-line case, we use the pattern: -+ ^(userpattern)$. */ -+ -+ static char const line_beg[] = "^("; -+ static char const line_end[] = ")$"; -+ static char const word_beg[] = "(^|[^[:alnum:]_])("; -+ static char const word_end[] = ")([^[:alnum:]_]|$)"; -+ char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end); -+ size_t i; -+ strcpy (n, match_lines ? line_beg : word_beg); -+ i = strlen(n); -+ memcpy (n + i, pattern, size); -+ i += size; -+ strcpy (n + i, match_lines ? line_end : word_end); -+ i += strlen (n + i); -+ pattern = n; -+ size = i; -+ } -+ -+ dfacomp (pattern, size, &dfa, 1); -+ kwsmusts (); -+} -+ -+static size_t -+EGexecute (char const *buf, size_t size, size_t *match_size, int exact) -+{ -+ register char const *buflim, *beg, *end; -+ char eol = eolbyte; -+ int backref, start, len; -+ struct kwsmatch kwsm; -+ size_t i; -+#ifdef MBS_SUPPORT -+ char *mb_properties = NULL; -+#endif /* MBS_SUPPORT */ -+ -+#ifdef MBS_SUPPORT -+ if (MB_CUR_MAX > 1 && kwset) -+ mb_properties = check_multibyte_string(buf, size); -+#endif /* MBS_SUPPORT */ -+ -+ buflim = buf + size; -+ -+ for (beg = end = buf; end < buflim; beg = end) -+ { -+ if (!exact) -+ { -+ if (kwset) -+ { -+ /* Find a possible match using the KWset matcher. */ -+ size_t offset = kwsexec (kwset, beg, buflim - beg, &kwsm); -+ if (offset == (size_t) -1) -+ goto failure; -+ beg += offset; -+ /* Narrow down to the line containing the candidate, and -+ run it through DFA. */ -+ end = memchr(beg, eol, buflim - beg); -+ end++; -+#ifdef MBS_SUPPORT -+ if (MB_CUR_MAX > 1 && mb_properties[beg - buf] == 0) -+ continue; -+#endif -+ while (beg > buf && beg[-1] != eol) -+ --beg; -+ if (kwsm.index < kwset_exact_matches) -+ goto success_in_beg_and_end; -+ if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1) -+ continue; -+ } -+ else -+ { -+ /* No good fixed strings; start with DFA. */ -+ size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref); -+ if (offset == (size_t) -1) -+ break; -+ /* Narrow down to the line we've found. */ -+ beg += offset; -+ end = memchr (beg, eol, buflim - beg); -+ end++; -+ while (beg > buf && beg[-1] != eol) -+ --beg; -+ } -+ /* Successful, no backreferences encountered! */ -+ if (!backref) -+ goto success_in_beg_and_end; -+ } -+ else -+ end = beg + size; -+ -+ /* If we've made it to this point, this means DFA has seen -+ a probable match, and we need to run it through Regex. */ -+ for (i = 0; i < pcount; i++) -+ { -+ patterns[i].regexbuf.not_eol = 0; -+ if (0 <= (start = re_search (&(patterns[i].regexbuf), beg, -+ end - beg - 1, 0, -+ end - beg - 1, &(patterns[i].regs)))) -+ { -+ len = patterns[i].regs.end[0] - start; -+ if (exact && !match_words) -+ goto success_in_start_and_len; -+ if ((!match_lines && !match_words) -+ || (match_lines && len == end - beg - 1)) -+ goto success_in_beg_and_end; -+ /* If -w, check if the match aligns with word boundaries. -+ We do this iteratively because: -+ (a) the line may contain more than one occurence of the -+ pattern, and -+ (b) Several alternatives in the pattern might be valid at a -+ given point, and we may need to consider a shorter one to -+ find a word boundary. */ -+ if (match_words) -+ while (start >= 0) -+ { -+ if ((start == 0 || !WCHAR ((unsigned char) beg[start - 1])) -+ && (len == end - beg - 1 -+ || !WCHAR ((unsigned char) beg[start + len]))) -+ goto success_in_beg_and_end; -+ if (len > 0) -+ { -+ /* Try a shorter length anchored at the same place. */ -+ --len; -+ patterns[i].regexbuf.not_eol = 1; -+ len = re_match (&(patterns[i].regexbuf), beg, -+ start + len, start, -+ &(patterns[i].regs)); -+ } -+ if (len <= 0) -+ { -+ /* Try looking further on. */ -+ if (start == end - beg - 1) -+ break; -+ ++start; -+ patterns[i].regexbuf.not_eol = 0; -+ start = re_search (&(patterns[i].regexbuf), beg, -+ end - beg - 1, -+ start, end - beg - 1 - start, -+ &(patterns[i].regs)); -+ len = patterns[i].regs.end[0] - start; -+ } -+ } -+ } -+ } /* for Regex patterns. */ -+ } /* for (beg = end ..) */ -+ -+ failure: -+#ifdef MBS_SUPPORT -+ if (MB_CUR_MAX > 1 && mb_properties) -+ free (mb_properties); -+#endif /* MBS_SUPPORT */ -+ return (size_t) -1; -+ -+ success_in_beg_and_end: -+ len = end - beg; -+ start = beg - buf; -+ /* FALLTHROUGH */ -+ -+ success_in_start_and_len: -+#ifdef MBS_SUPPORT -+ if (MB_CUR_MAX > 1 && mb_properties) -+ free (mb_properties); -+#endif /* MBS_SUPPORT */ -+ *match_size = len; -+ return start; -+} -+ -+static void -+Fcompile (char const *pattern, size_t size) -+{ -+ char const *beg, *lim, *err; -+ -+ kwsinit (); -+ beg = pattern; -+ do -+ { -+ for (lim = beg; lim < pattern + size && *lim != '\n'; ++lim) -+ ; -+ if ((err = kwsincr (kwset, beg, lim - beg)) != 0) -+ error (2, 0, err); -+ if (lim < pattern + size) -+ ++lim; -+ beg = lim; -+ } -+ while (beg < pattern + size); -+ -+ if ((err = kwsprep (kwset)) != 0) -+ error (2, 0, err); -+} -+ -+static size_t -+Fexecute (char const *buf, size_t size, size_t *match_size, int exact) -+{ -+ register char const *beg, *try, *end; -+ register size_t len; -+ char eol = eolbyte; -+ struct kwsmatch kwsmatch; -+#ifdef MBS_SUPPORT -+ char *mb_properties; -+ if (MB_CUR_MAX > 1) -+ mb_properties = check_multibyte_string (buf, size); -+#endif /* MBS_SUPPORT */ -+ -+ for (beg = buf; beg <= buf + size; ++beg) -+ { -+ size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch); -+ if (offset == (size_t) -1) -+ goto failure; -+#ifdef MBS_SUPPORT -+ if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0) -+ continue; /* It is a part of multibyte character. */ -+#endif /* MBS_SUPPORT */ -+ beg += offset; -+ len = kwsmatch.size[0]; -+ if (exact && !match_words) -+ goto success_in_beg_and_len; -+ if (match_lines) -+ { -+ if (beg > buf && beg[-1] != eol) -+ continue; -+ if (beg + len < buf + size && beg[len] != eol) -+ continue; -+ goto success; -+ } -+ else if (match_words) -+ for (try = beg; len; ) -+ { -+ if (try > buf && WCHAR((unsigned char) try[-1])) -+ break; -+ if (try + len < buf + size && WCHAR((unsigned char) try[len])) -+ { -+ offset = kwsexec (kwset, beg, --len, &kwsmatch); -+ if (offset == (size_t) -1) -+ { -+#ifdef MBS_SUPPORT -+ if (MB_CUR_MAX > 1) -+ free (mb_properties); -+#endif /* MBS_SUPPORT */ -+ return offset; -+ } -+ try = beg + offset; -+ len = kwsmatch.size[0]; -+ } -+ else -+ goto success; -+ } -+ else -+ goto success; -+ } -+ -+ failure: -+#ifdef MBS_SUPPORT -+ if (MB_CUR_MAX > 1) -+ free (mb_properties); -+#endif /* MBS_SUPPORT */ -+ return -1; -+ -+ success: -+ end = memchr (beg + len, eol, (buf + size) - (beg + len)); -+ end++; -+ while (buf < beg && beg[-1] != eol) -+ --beg; -+ len = end - beg; -+ /* FALLTHROUGH */ -+ -+ success_in_beg_and_len: -+ *match_size = len; -+#ifdef MBS_SUPPORT -+ if (MB_CUR_MAX > 1) -+ free (mb_properties); -+#endif /* MBS_SUPPORT */ -+ return beg - buf; -+} -+ -+#if HAVE_LIBPCRE -+/* Compiled internal form of a Perl regular expression. */ -+static pcre *cre; -+ -+/* Additional information about the pattern. */ -+static pcre_extra *extra; -+#endif -+ -+static void -+Pcompile (char const *pattern, size_t size) -+{ -+#if !HAVE_LIBPCRE -+ error (2, 0, _("The -P option is not supported")); -+#else -+ int e; -+ char const *ep; -+ char *re = xmalloc (4 * size + 7); -+ int flags = PCRE_MULTILINE | (match_icase ? PCRE_CASELESS : 0); -+ char const *patlim = pattern + size; -+ char *n = re; -+ char const *p; -+ char const *pnul; -+ -+ /* FIXME: Remove this restriction. */ -+ if (eolbyte != '\n') -+ error (2, 0, _("The -P and -z options cannot be combined")); -+ -+ *n = '\0'; -+ if (match_lines) -+ strcpy (n, "^("); -+ if (match_words) -+ strcpy (n, "\\b("); -+ n += strlen (n); -+ -+ /* The PCRE interface doesn't allow NUL bytes in the pattern, so -+ replace each NUL byte in the pattern with the four characters -+ "\000", removing a preceding backslash if there are an odd -+ number of backslashes before the NUL. -+ -+ FIXME: This method does not work with some multibyte character -+ encodings, notably Shift-JIS, where a multibyte character can end -+ in a backslash byte. */ -+ for (p = pattern; (pnul = memchr (p, '\0', patlim - p)); p = pnul + 1) -+ { -+ memcpy (n, p, pnul - p); -+ n += pnul - p; -+ for (p = pnul; pattern < p && p[-1] == '\\'; p--) -+ continue; -+ n -= (pnul - p) & 1; -+ strcpy (n, "\\000"); -+ n += 4; -+ } -+ -+ memcpy (n, p, patlim - p); -+ n += patlim - p; -+ *n = '\0'; -+ if (match_words) -+ strcpy (n, ")\\b"); -+ if (match_lines) -+ strcpy (n, ")$"); -+ -+ cre = pcre_compile (re, flags, &ep, &e, pcre_maketables ()); -+ if (!cre) -+ error (2, 0, ep); -+ -+ extra = pcre_study (cre, 0, &ep); -+ if (ep) -+ error (2, 0, ep); -+ -+ free (re); -+#endif -+} -+ -+static size_t -+Pexecute (char const *buf, size_t size, size_t *match_size, int exact) -+{ -+#if !HAVE_LIBPCRE -+ abort (); -+ return -1; -+#else -+ /* This array must have at least two elements; everything after that -+ is just for performance improvement in pcre_exec. */ -+ int sub[300]; -+ -+ int e = pcre_exec (cre, extra, buf, size, 0, 0, -+ sub, sizeof sub / sizeof *sub); -+ -+ if (e <= 0) -+ { -+ switch (e) -+ { -+ case PCRE_ERROR_NOMATCH: -+ return -1; -+ -+ case PCRE_ERROR_NOMEMORY: -+ error (2, 0, _("Memory exhausted")); -+ -+ default: -+ abort (); -+ } -+ } -+ else -+ { -+ /* Narrow down to the line we've found. */ -+ char const *beg = buf + sub[0]; -+ char const *end = buf + sub[1]; -+ char const *buflim = buf + size; -+ char eol = eolbyte; -+ if (!exact) -+ { -+ end = memchr (end, eol, buflim - end); -+ end++; -+ while (buf < beg && beg[-1] != eol) -+ --beg; -+ } -+ -+ *match_size = end - beg; -+ return beg - buf; -+ } -+#endif -+} -+ -+struct matcher const matchers[] = { -+ { "default", Gcompile, EGexecute }, -+ { "grep", Gcompile, EGexecute }, -+ { "egrep", Ecompile, EGexecute }, -+ { "awk", Ecompile, EGexecute }, -+ { "fgrep", Fcompile, Fexecute }, -+ { "perl", Pcompile, Pexecute }, -+ { "", 0, 0 }, -+}; -diff -urN grep-2.5.1a.orig/tests/fmbtest.sh grep-2.5.1a/tests/fmbtest.sh ---- grep-2.5.1a.orig/tests/fmbtest.sh 1970-01-01 05:00:00.000000000 +0500 -+++ grep-2.5.1a/tests/fmbtest.sh 2005-10-23 09:51:12.000000000 +0600 -@@ -0,0 +1,111 @@ -+#!/bin/sh -+ -+: ${srcdir=.} -+ -+# If cs_CZ.UTF-8 locale doesn't work, skip this test silently -+LC_ALL=cs_CZ.UTF-8 locale -k LC_CTYPE 2>/dev/null | ${GREP} -q charmap.*UTF-8 \ -+ || exit 77 -+ -+failures=0 -+ -+cat > csinput < cspatfile <