[people/teissler/ipfire-2.x.git] / src / patches / grep-2.5.1a-redhat_fixes-2.patch

Submitted by: Alexander E. Patrakov
Date: 2005-08-13
Initial Package Version: 2.5.1a
Upstream Status: Partially accepted, partially rejected, but required for LSB >= 2.0 certification
Origin: RedHat
Description: Various fixes from RedHat. Individual patches:

   grep-2.5.1-fgrep.patch
   grep-2.5.1-bracket.patch
   grep-2.5-i18n.patch
   grep-2.5.1-oi.patch
   grep-2.5.1-manpage.patch
   grep-2.5.1-color.patch
   grep-2.5.1-icolor.patch
   grep-2.5.1-egf-speedup.patch
   grep-2.5.1-dfa-optional.patch
   grep-2.5.1-tests.patch
   grep-2.5.1-w.patch

Testcases:

 -fgrep: ???, but required for other patches
 -bracket: echo "[" | LANG=en_US.UTF-8 grep "[[:space:]]"
 -i18n: many fixes for multibyte locale support, required for LSB.
 -oi: echo xxYYzz | LANG=C grep -i -o yy
 -manpage: typo
 -color: restore the background color correctly
 -icolor: ??? echo 'spam foo SPAM FOO' | grep -i --color spam
     (but that's also fixed by -oi. Is this patch just a cleanup?)
 -egf-speedup: without this, grep is as slow as a snail in UTF-8 locales.
 -dfa-optional: disables dfa in multibyte locales by default.
 -w: (echo 'foo';echo 'fo') > /tmp/testfile && grep -F -w fo /tmp/testfile

diff -urN grep-2.5.1a.orig/doc/grep.1 grep-2.5.1a/doc/grep.1
--- grep-2.5.1a.orig/doc/grep.1	2004-11-12 16:26:37.000000000 +0500
+++ grep-2.5.1a/doc/grep.1	2005-10-23 09:49:43.000000000 +0600
@@ -191,6 +191,7 @@
 .I PATTERN
 as a list of fixed strings, separated by newlines,
 any of which is to be matched.
+.TP
 .BR \-P ", " \-\^\-perl-regexp
 Interpret
 .I PATTERN
@@ -302,7 +303,7 @@
 This is especially useful for tools like zgrep, e.g.
 .B "gzip -cd foo.gz |grep --label=foo something"
 .TP
-.BR \-\^\-line-buffering
+.BR \-\^\-line-buffered
 Use line buffering, it can be a performance penality.
 .TP
 .BR \-q ", " \-\^\-quiet ", " \-\^\-silent
diff -urN grep-2.5.1a.orig/lib/posix/regex.h grep-2.5.1a/lib/posix/regex.h
--- grep-2.5.1a.orig/lib/posix/regex.h	2001-04-02 23:56:50.000000000 +0600
+++ grep-2.5.1a/lib/posix/regex.h	2005-10-23 09:49:31.000000000 +0600
@@ -109,6 +109,10 @@
    If not set, \{, \}, {, and } are literals.  */
 #define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
 
+/* If this bit is set, then ignore case when matching.
+   If not set, then case is significant.  */
+#define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1)
+
 /* If this bit is set, +, ? and | aren't recognized as operators.
    If not set, they are.  */
 #define RE_LIMITED_OPS (RE_INTERVALS << 1)
diff -urN grep-2.5.1a.orig/src/dfa.c grep-2.5.1a/src/dfa.c
--- grep-2.5.1a.orig/src/dfa.c	2001-09-26 22:57:55.000000000 +0600
+++ grep-2.5.1a/src/dfa.c	2005-10-23 09:49:17.000000000 +0600
@@ -414,7 +414,7 @@
 
 /* This function fetch a wide character, and update cur_mb_len,
    used only if the current locale is a multibyte environment.  */
-static wchar_t
+static wint_t
 fetch_wc (char const *eoferr)
 {
   wchar_t wc;
@@ -423,7 +423,7 @@
       if (eoferr != 0)
 	dfaerror (eoferr);
       else
-	return -1;
+	return WEOF;
     }
 
   cur_mb_len = mbrtowc(&wc, lexptr, lexleft, &mbs);
@@ -459,7 +459,7 @@
 static void
 parse_bracket_exp_mb ()
 {
-  wchar_t wc, wc1, wc2;
+  wint_t wc, wc1, wc2;
 
   /* Work area to build a mb_char_classes.  */
   struct mb_char_classes *work_mbc;
@@ -496,7 +496,7 @@
     work_mbc->invert = 0;
   do
     {
-      wc1 = -1; /* mark wc1 is not initialized".  */
+      wc1 = WEOF; /* mark wc1 is not initialized".  */
 
       /* Note that if we're looking at some other [:...:] construct,
 	 we just treat it as a bunch of ordinary characters.  We can do
@@ -586,7 +586,7 @@
 		      work_mbc->coll_elems[work_mbc->ncoll_elems++] = elem;
 		    }
  		}
-	      wc = -1;
+	      wc1 = wc = WEOF;
 	    }
 	  else
 	    /* We treat '[' as a normal character here.  */
@@ -600,7 +600,7 @@
 	    wc = fetch_wc(("Unbalanced ["));
 	}
 
-      if (wc1 == -1)
+      if (wc1 == WEOF)
 	wc1 = fetch_wc(_("Unbalanced ["));
 
       if (wc1 == L'-')
@@ -630,17 +630,17 @@
 	    }
 	  REALLOC_IF_NECESSARY(work_mbc->range_sts, wchar_t,
 			       range_sts_al, work_mbc->nranges + 1);
-	  work_mbc->range_sts[work_mbc->nranges] = wc;
+	  work_mbc->range_sts[work_mbc->nranges] = (wchar_t)wc;
 	  REALLOC_IF_NECESSARY(work_mbc->range_ends, wchar_t,
 			       range_ends_al, work_mbc->nranges + 1);
-	  work_mbc->range_ends[work_mbc->nranges++] = wc2;
+	  work_mbc->range_ends[work_mbc->nranges++] = (wchar_t)wc2;
 	}
-      else if (wc != -1)
+      else if (wc != WEOF)
 	/* build normal characters.  */
 	{
 	  REALLOC_IF_NECESSARY(work_mbc->chars, wchar_t, chars_al,
 			       work_mbc->nchars + 1);
-	  work_mbc->chars[work_mbc->nchars++] = wc;
+	  work_mbc->chars[work_mbc->nchars++] = (wchar_t)wc;
 	}
     }
   while ((wc = wc1) != L']');
@@ -2552,6 +2552,8 @@
     }
 
   /* match with a character?  */
+  if (case_fold)
+    wc = towlower (wc);
   for (i = 0; i<work_mbc->nchars; i++)
     {
       if (wc == work_mbc->chars[i])
diff -urN grep-2.5.1a.orig/src/grep.c grep-2.5.1a/src/grep.c
--- grep-2.5.1a.orig/src/grep.c	2004-11-12 16:25:35.000000000 +0500
+++ grep-2.5.1a/src/grep.c	2005-10-23 09:50:06.000000000 +0600
@@ -30,6 +30,12 @@
 # include <sys/time.h>
 # include <sys/resource.h>
 #endif
+#if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC
+/* We can handle multibyte string.  */
+# define MBS_SUPPORT
+# include <wchar.h>
+# include <wctype.h>
+#endif
 #include <stdio.h>
 #include "system.h"
 #include "getopt.h"
@@ -558,33 +564,6 @@
     {
       size_t match_size;
       size_t match_offset;
-      if(match_icase)
-        {
-	  /* Yuck, this is tricky */
-          char *buf = (char*) xmalloc (lim - beg);
-	  char *ibeg = buf;
-	  char *ilim = ibeg + (lim - beg);
-	  int i;
-	  for (i = 0; i < lim - beg; i++)
-	    ibeg[i] = tolower (beg[i]);
-	  while ((match_offset = (*execute) (ibeg, ilim-ibeg, &match_size, 1))
-		 != (size_t) -1)
-	    {
-	      char const *b = beg + match_offset;
-	      if (b == lim)
-		break;
-	      fwrite (beg, sizeof (char), match_offset, stdout);
-	      printf ("\33[%sm", grep_color);
-	      fwrite (b, sizeof (char), match_size, stdout);
-	      fputs ("\33[00m", stdout);
-	      beg = b + match_size;
-	      ibeg = ibeg + match_offset + match_size;
-	    }
-	  fwrite (beg, 1, lim - beg, stdout);
-	  free (buf);
-	  lastout = lim;
-	  return;
-	}
       while (lim-beg && (match_offset = (*execute) (beg, lim - beg, &match_size, 1))
 	     != (size_t) -1)
 	{
@@ -601,6 +580,7 @@
 	  fputs ("\33[00m", stdout);
 	  beg = b + match_size;
 	}
+      fputs ("\33[K", stdout);
     }
   fwrite (beg, 1, lim - beg, stdout);
   if (ferror (stdout))
@@ -1697,6 +1677,37 @@
   if (!install_matcher (matcher) && !install_matcher ("default"))
     abort ();
 
+#ifdef MBS_SUPPORT
+  if (MB_CUR_MAX != 1 && match_icase)
+    {
+      wchar_t wc;
+      mbstate_t cur_state, prev_state;
+      int i, len = strlen(keys);
+
+      memset(&cur_state, 0, sizeof(mbstate_t));
+      for (i = 0; i <= len ;)
+	{
+	  size_t mbclen;
+	  mbclen = mbrtowc(&wc, keys + i, len - i, &cur_state);
+	  if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
+	    {
+	      /* An invalid sequence, or a truncated multibyte character.
+		 We treat it as a singlebyte character.  */
+	      mbclen = 1;
+	    }
+	  else
+	    {
+	      if (iswupper((wint_t)wc))
+		{
+		  wc = towlower((wint_t)wc);
+		  wcrtomb(keys + i, wc, &cur_state);
+		}
+	    }
+	  i += mbclen;
+	}
+    }
+#endif /* MBS_SUPPORT */
+
   (*compile)(keys, keycc);
 
   if ((argc - optind > 1 && !no_filenames) || with_filenames)
diff -urN grep-2.5.1a.orig/src/search.c grep-2.5.1a/src/search.c
--- grep-2.5.1a.orig/src/search.c	2001-04-19 09:42:14.000000000 +0600
+++ grep-2.5.1a/src/search.c	2005-10-23 09:51:25.000000000 +0600
@@ -18,9 +18,13 @@
 
 /* Written August 1992 by Mike Haertel. */
 
+#ifndef _GNU_SOURCE
+# define _GNU_SOURCE 1
+#endif
 #ifdef HAVE_CONFIG_H
 # include <config.h>
 #endif
+#include <assert.h>
 #include <sys/types.h>
 #if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC
 /* We can handle multibyte string.  */
@@ -31,7 +35,7 @@
 
 #include "system.h"
 #include "grep.h"
-#include "regex.h"
+#include <regex.h>
 #include "dfa.h"
 #include "kwset.h"
 #include "error.h"
@@ -39,6 +43,9 @@
 #ifdef HAVE_LIBPCRE
 # include <pcre.h>
 #endif
+#ifdef HAVE_LANGINFO_CODESET
+# include <langinfo.h>
+#endif
 
 #define NCHAR (UCHAR_MAX + 1)
 
@@ -70,9 +77,10 @@
    call the regexp matcher at all. */
 static int kwset_exact_matches;
 
-#if defined(MBS_SUPPORT)
-static char* check_multibyte_string PARAMS ((char const *buf, size_t size));
-#endif
+/* UTF-8 encoding allows some optimizations that we can't otherwise
+   assume in a multibyte encoding. */
+static int using_utf8;
+
 static void kwsinit PARAMS ((void));
 static void kwsmusts PARAMS ((void));
 static void Gcompile PARAMS ((char const *, size_t));
@@ -84,6 +92,15 @@
 static size_t Pexecute PARAMS ((char const *, size_t, size_t *, int));
 
 void
+check_utf8 (void)
+{
+#ifdef HAVE_LANGINFO_CODESET
+  if (strcmp (nl_langinfo (CODESET), "UTF-8") == 0)
+    using_utf8 = 1;
+#endif
+}
+
+void
 dfaerror (char const *mesg)
 {
   error (2, 0, mesg);
@@ -141,38 +158,6 @@
     }
 }
 
-#ifdef MBS_SUPPORT
-/* This function allocate the array which correspond to "buf".
-   Then this check multibyte string and mark on the positions which
-   are not singlebyte character nor the first byte of a multibyte
-   character.  Caller must free the array.  */
-static char*
-check_multibyte_string(char const *buf, size_t size)
-{
-  char *mb_properties = malloc(size);
-  mbstate_t cur_state;
-  int i;
-  memset(&cur_state, 0, sizeof(mbstate_t));
-  memset(mb_properties, 0, sizeof(char)*size);
-  for (i = 0; i < size ;)
-    {
-      size_t mbclen;
-      mbclen = mbrlen(buf + i, size - i, &cur_state);
-
-      if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
-	{
-	  /* An invalid sequence, or a truncated multibyte character.
-	     We treat it as a singlebyte character.  */
-	  mbclen = 1;
-	}
-      mb_properties[i] = mbclen;
-      i += mbclen;
-    }
-
-  return mb_properties;
-}
-#endif
-
 static void
 Gcompile (char const *pattern, size_t size)
 {
@@ -181,7 +166,8 @@
   size_t total = size;
   char const *motif = pattern;
 
-  re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE);
+  check_utf8 ();
+  re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE | (match_icase ? RE_ICASE : 0));
   dfasyntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase, eolbyte);
 
   /* For GNU regex compiler we have to pass the patterns separately to detect
@@ -233,7 +219,7 @@
       static char const line_end[] = "\\)$";
       static char const word_beg[] = "\\(^\\|[^[:alnum:]_]\\)\\(";
       static char const word_end[] = "\\)\\([^[:alnum:]_]\\|$\\)";
-      char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end);
+      char *n = xmalloc (sizeof word_beg - 1 + size + sizeof word_end);
       size_t i;
       strcpy (n, match_lines ? line_beg : word_beg);
       i = strlen (n);
@@ -257,14 +243,15 @@
   size_t total = size;
   char const *motif = pattern;
 
+  check_utf8 ();
   if (strcmp (matcher, "awk") == 0)
     {
-      re_set_syntax (RE_SYNTAX_AWK);
+      re_set_syntax (RE_SYNTAX_AWK | (match_icase ? RE_ICASE : 0));
       dfasyntax (RE_SYNTAX_AWK, match_icase, eolbyte);
     }
   else
     {
-      re_set_syntax (RE_SYNTAX_POSIX_EGREP);
+      re_set_syntax (RE_SYNTAX_POSIX_EGREP | (match_icase ? RE_ICASE : 0));
       dfasyntax (RE_SYNTAX_POSIX_EGREP, match_icase, eolbyte);
     }
 
@@ -316,7 +303,7 @@
       static char const line_end[] = ")$";
       static char const word_beg[] = "(^|[^[:alnum:]_])(";
       static char const word_end[] = ")([^[:alnum:]_]|$)";
-      char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end);
+      char *n = xmalloc (sizeof word_beg - 1 + size + sizeof word_end);
       size_t i;
       strcpy (n, match_lines ? line_beg : word_beg);
       i = strlen(n);
@@ -339,15 +326,35 @@
   char eol = eolbyte;
   int backref, start, len;
   struct kwsmatch kwsm;
-  size_t i;
+  size_t i, ret_val;
+  static int use_dfa;
+  static int use_dfa_checked = 0;
 #ifdef MBS_SUPPORT
-  char *mb_properties = NULL;
+  const char *last_char = NULL;
+  int mb_cur_max = MB_CUR_MAX;
+  mbstate_t mbs;
+  memset (&mbs, '\0', sizeof (mbstate_t));
 #endif /* MBS_SUPPORT */
 
+  if (!use_dfa_checked)
+    {
+      char *grep_use_dfa = getenv ("GREP_USE_DFA");
+      if (!grep_use_dfa)
+	{
 #ifdef MBS_SUPPORT
-  if (MB_CUR_MAX > 1 && kwset)
-    mb_properties = check_multibyte_string(buf, size);
+	  /* Turn off DFA when processing multibyte input. */
+	  use_dfa = (MB_CUR_MAX == 1);
+#else
+	  use_dfa = 1;
 #endif /* MBS_SUPPORT */
+	}
+      else
+	{
+	  use_dfa = atoi (grep_use_dfa);
+	}
+
+      use_dfa_checked = 1;
+    }
 
   buflim = buf + size;
 
@@ -358,47 +365,124 @@
 	  if (kwset)
 	    {
 	      /* Find a possible match using the KWset matcher. */
-	      size_t offset = kwsexec (kwset, beg, buflim - beg, &kwsm);
+#ifdef MBS_SUPPORT
+	      size_t bytes_left = 0;
+#endif /* MBS_SUPPORT */
+	      size_t offset;
+#ifdef MBS_SUPPORT
+	      /* kwsexec doesn't work with match_icase and multibyte input. */
+	      if (match_icase && mb_cur_max > 1)
+		/* Avoid kwset */
+		offset = 0;
+	      else
+#endif /* MBS_SUPPORT */
+	      offset = kwsexec (kwset, beg, buflim - beg, &kwsm);
 	      if (offset == (size_t) -1)
-		{
+	        goto failure;
 #ifdef MBS_SUPPORT
-		  if (MB_CUR_MAX > 1)
-		    free(mb_properties);
-#endif
-		  return (size_t)-1;
+	      if (mb_cur_max > 1 && !using_utf8)
+		{
+		  bytes_left = offset;
+		  while (bytes_left)
+		    {
+		      size_t mlen = mbrlen (beg, bytes_left, &mbs);
+
+		      last_char = beg;
+		      if (mlen == (size_t) -1 || mlen == 0)
+			{
+			  /* Incomplete character: treat as single-byte. */
+			  memset (&mbs, '\0', sizeof (mbstate_t));
+			  beg++;
+			  bytes_left--;
+			  continue;
+			}
+
+		      if (mlen == (size_t) -2)
+			/* Offset points inside multibyte character:
+			 * no good. */
+			break;
+
+		      beg += mlen;
+		      bytes_left -= mlen;
+		    }
 		}
+	      else
+#endif /* MBS_SUPPORT */
 	      beg += offset;
 	      /* Narrow down to the line containing the candidate, and
 		 run it through DFA. */
 	      end = memchr(beg, eol, buflim - beg);
 	      end++;
 #ifdef MBS_SUPPORT
-	      if (MB_CUR_MAX > 1 && mb_properties[beg - buf] == 0)
+	      if (mb_cur_max > 1 && bytes_left)
 		continue;
-#endif
+#endif /* MBS_SUPPORT */
 	      while (beg > buf && beg[-1] != eol)
 		--beg;
-	      if (kwsm.index < kwset_exact_matches)
-		goto success;
-	      if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1)
+	      if (
+#ifdef MBS_SUPPORT
+		  !(match_icase && mb_cur_max > 1) &&
+#endif /* MBS_SUPPORT */
+		  (kwsm.index < kwset_exact_matches))
+		goto success_in_beg_and_end;
+	      if (use_dfa &&
+		  dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1)
 		continue;
 	    }
 	  else
 	    {
 	      /* No good fixed strings; start with DFA. */
-	      size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref);
+#ifdef MBS_SUPPORT
+	      size_t bytes_left = 0;
+#endif /* MBS_SUPPORT */
+	      size_t offset = 0;
+	      if (use_dfa)
+		offset = dfaexec (&dfa, beg, buflim - beg, &backref);
 	      if (offset == (size_t) -1)
 		break;
 	      /* Narrow down to the line we've found. */
+#ifdef MBS_SUPPORT
+	      if (mb_cur_max > 1 && !using_utf8)
+		{
+		  bytes_left = offset;
+		  while (bytes_left)
+		    {
+		      size_t mlen = mbrlen (beg, bytes_left, &mbs);
+
+		      last_char = beg;
+		      if (mlen == (size_t) -1 || mlen == 0)
+			{
+			  /* Incomplete character: treat as single-byte. */
+			  memset (&mbs, '\0', sizeof (mbstate_t));
+			  beg++;
+			  bytes_left--;
+			  continue;
+			}
+
+		      if (mlen == (size_t) -2)
+			/* Offset points inside multibyte character:
+			 * no good. */
+			break;
+
+		      beg += mlen;
+		      bytes_left -= mlen;
+		    }
+		}
+	      else
+#endif /* MBS_SUPPORT */
 	      beg += offset;
 	      end = memchr (beg, eol, buflim - beg);
 	      end++;
+#ifdef MBS_SUPPORT
+	      if (mb_cur_max > 1 && bytes_left)
+		continue;
+#endif /* MBS_SUPPORT */
 	      while (beg > buf && beg[-1] != eol)
 		--beg;
 	    }
 	  /* Successful, no backreferences encountered! */
-	  if (!backref)
-	    goto success;
+	  if (use_dfa && !backref)
+	    goto success_in_beg_and_end;
 	}
       else
 	end = beg + size;
@@ -413,14 +497,11 @@
 				       end - beg - 1, &(patterns[i].regs))))
 	    {
 	      len = patterns[i].regs.end[0] - start;
-	      if (exact)
-		{
-		  *match_size = len;
-		  return start;
-		}
+	      if (exact && !match_words)
+	        goto success_in_start_and_len;
 	      if ((!match_lines && !match_words)
 		  || (match_lines && len == end - beg - 1))
-		goto success;
+		goto success_in_beg_and_end;
 	      /* If -w, check if the match aligns with word boundaries.
 		 We do this iteratively because:
 		 (a) the line may contain more than one occurence of the
@@ -431,10 +512,84 @@
 	      if (match_words)
 		while (start >= 0)
 		  {
-		    if ((start == 0 || !WCHAR ((unsigned char) beg[start - 1]))
-			&& (len == end - beg - 1
-			    || !WCHAR ((unsigned char) beg[start + len])))
-		      goto success;
+		    int lword_match = 0;
+		    if (start == 0)
+		      lword_match = 1;
+		    else
+		      {
+			assert (start > 0);
+#ifdef MBS_SUPPORT
+			if (mb_cur_max > 1)
+			  {
+			    const char *s;
+			    int mr;
+			    wchar_t pwc;
+
+			    if (using_utf8)
+			      {
+				s = beg + start - 1;
+				while (s > buf
+				       && (unsigned char) *s >= 0x80
+				       && (unsigned char) *s <= 0xbf)
+				  --s;
+			      }
+			    else
+			      s = last_char;
+			    mr = mbtowc (&pwc, s, beg + start - s);
+			    if (mr <= 0)
+			      {
+				memset (&mbs, '\0', sizeof (mbstate_t));
+				lword_match = 1;
+			      }
+			    else if (!(iswalnum (pwc) || pwc == L'_')
+				     && mr == (int) (beg + start - s))
+			      lword_match = 1;
+			  }
+			else
+#endif /* MBS_SUPPORT */
+			if (!WCHAR ((unsigned char) beg[start - 1]))
+			  lword_match = 1;
+		      }
+
+		    if (lword_match)
+		      {
+			int rword_match = 0;
+			if (start + len == end - beg - 1)
+			  rword_match = 1;
+			else
+			  {
+#ifdef MBS_SUPPORT
+			    if (mb_cur_max > 1)
+			      {
+				wchar_t nwc;
+				int mr;
+
+				mr = mbtowc (&nwc, beg + start + len,
+					     end - beg - start - len - 1);
+				if (mr <= 0)
+				  {
+				    memset (&mbs, '\0', sizeof (mbstate_t));
+				    rword_match = 1;
+				  }
+				else if (!iswalnum (nwc) && nwc != L'_')
+				  rword_match = 1;
+			      }
+			    else
+#endif /* MBS_SUPPORT */
+			    if (!WCHAR ((unsigned char) beg[start + len]))
+			      rword_match = 1;
+			  }
+
+			if (rword_match)
+			  {
+			    if (!exact)
+			      /* Returns the whole line. */
+			      goto success_in_beg_and_end;
+			    else
+			      /* Returns just this word match. */
+			      goto success_in_start_and_len;
+			  }
+		      }
 		    if (len > 0)
 		      {
 			/* Try a shorter length anchored at the same place. */
@@ -461,26 +616,154 @@
 	    }
 	} /* for Regex patterns.  */
     } /* for (beg = end ..) */
-#ifdef MBS_SUPPORT
-  if (MB_CUR_MAX > 1 && mb_properties)
-    free (mb_properties);
-#endif /* MBS_SUPPORT */
+
+ failure:
   return (size_t) -1;
 
- success:
-#ifdef MBS_SUPPORT
-  if (MB_CUR_MAX > 1 && mb_properties)
-    free (mb_properties);
-#endif /* MBS_SUPPORT */
-  *match_size = end - beg;
-  return beg - buf;
+ success_in_beg_and_end:
+  len = end - beg;
+  start = beg - buf;
+  /* FALLTHROUGH */
+
+ success_in_start_and_len:
+  *match_size = len;
+  return start;
 }
 
+#ifdef MBS_SUPPORT
+static int f_i_multibyte; /* whether we're using the new -Fi MB method */
+static struct
+{
+  wchar_t **patterns;
+  size_t count, maxlen;
+  unsigned char *match;
+} Fimb;
+#endif
+
 static void
 Fcompile (char const *pattern, size_t size)
 {
+  int mb_cur_max = MB_CUR_MAX;
   char const *beg, *lim, *err;
 
+  check_utf8 ();
+#ifdef MBS_SUPPORT
+  /* Support -F -i for UTF-8 input. */
+  if (match_icase && mb_cur_max > 1)
+    {
+      mbstate_t mbs;
+      wchar_t *wcpattern = xmalloc ((size + 1) * sizeof (wchar_t));
+      const char *patternend = pattern;
+      size_t wcsize;
+      kwset_t fimb_kwset = NULL;
+      char *starts = NULL;
+      wchar_t *wcbeg, *wclim;
+      size_t allocated = 0;
+
+      memset (&mbs, '\0', sizeof (mbs));
+# ifdef __GNU_LIBRARY__
+      wcsize = mbsnrtowcs (wcpattern, &patternend, size, size, &mbs);
+      if (patternend != pattern + size)
+	wcsize = (size_t) -1;
+# else
+      {
+	char *patterncopy = xmalloc (size + 1);
+
+	memcpy (patterncopy, pattern, size);
+	patterncopy[size] = '\0';
+	patternend = patterncopy;
+	wcsize = mbsrtowcs (wcpattern, &patternend, size, &mbs);
+	if (patternend != patterncopy + size)
+	  wcsize = (size_t) -1;
+	free (patterncopy);
+      }
+# endif
+      if (wcsize + 2 <= 2)
+	{
+fimb_fail:
+	  free (wcpattern);
+	  free (starts);
+	  if (fimb_kwset)
+	    kwsfree (fimb_kwset);
+	  free (Fimb.patterns);
+	  Fimb.patterns = NULL;
+	}
+      else
+	{
+	  if (!(fimb_kwset = kwsalloc (NULL)))
+	    error (2, 0, _("memory exhausted"));
+
+	  starts = xmalloc (mb_cur_max * 3);
+	  wcbeg = wcpattern;
+	  do
+	    {
+	      int i;
+	      size_t wclen;
+
+	      if (Fimb.count >= allocated)
+		{
+		  if (allocated == 0)
+		    allocated = 128;
+		  else
+		    allocated *= 2;
+		  Fimb.patterns = xrealloc (Fimb.patterns,
+					    sizeof (wchar_t *) * allocated);
+		}
+	      Fimb.patterns[Fimb.count++] = wcbeg;
+	      for (wclim = wcbeg;
+		   wclim < wcpattern + wcsize && *wclim != L'\n'; ++wclim)
+		*wclim = towlower (*wclim);
+	      *wclim = L'\0';
+	      wclen = wclim - wcbeg;
+	      if (wclen > Fimb.maxlen)
+		Fimb.maxlen = wclen;
+	      if (wclen > 3)
+		wclen = 3;
+	      if (wclen == 0)
+		{
+		  if ((err = kwsincr (fimb_kwset, "", 0)) != 0)
+		    error (2, 0, err);
+		}
+	      else
+		for (i = 0; i < (1 << wclen); i++)
+		  {
+		    char *p = starts;
+		    int j, k;
+
+		    for (j = 0; j < wclen; ++j)
+		      {
+			wchar_t wc = wcbeg[j];
+			if (i & (1 << j))
+			  {
+			    wc = towupper (wc);
+			    if (wc == wcbeg[j])
+			      continue;
+			  }
+			k = wctomb (p, wc);
+			if (k <= 0)
+			  goto fimb_fail;
+			p += k;
+		      }
+		    if ((err = kwsincr (fimb_kwset, starts, p - starts)) != 0)
+		      error (2, 0, err);
+		  }
+	      if (wclim < wcpattern + wcsize)
+		++wclim;
+	      wcbeg = wclim;
+	    }
+	  while (wcbeg < wcpattern + wcsize);
+	  f_i_multibyte = 1;
+	  kwset = fimb_kwset;
+	  free (starts);
+	  Fimb.match = xmalloc (Fimb.count);
+	  if ((err = kwsprep (kwset)) != 0)
+	    error (2, 0, err);
+	  return;
+	}
+    }
+#endif /* MBS_SUPPORT */
+
+
   kwsinit ();
   beg = pattern;
   do
@@ -499,6 +782,76 @@
     error (2, 0, err);
 }
 
+#ifdef MBS_SUPPORT
+static int
+Fimbexec (const char *buf, size_t size, size_t *plen, int exact)
+{
+  size_t len, letter, i;
+  int ret = -1;
+  mbstate_t mbs;
+  wchar_t wc;
+  int patterns_left;
+
+  assert (match_icase && f_i_multibyte == 1);
+  assert (MB_CUR_MAX > 1);
+
+  memset (&mbs, '\0', sizeof (mbs));
+  memset (Fimb.match, '\1', Fimb.count);
+  letter = len = 0;
+  patterns_left = 1;
+  while (patterns_left && len <= size)
+    {
+      size_t c;
+
+      patterns_left = 0;
+      if (len < size)
+	{
+	  c = mbrtowc (&wc, buf + len, size - len, &mbs);
+	  if (c + 2 <= 2)
+	    return ret;
+
+	  wc = towlower (wc);
+	}
+      else
+	{
+	  c = 1;
+	  wc = L'\0';
+	}
+
+      for (i = 0; i < Fimb.count; i++)
+	{
+	  if (Fimb.match[i])
+	    {
+	      if (Fimb.patterns[i][letter] == L'\0')
+		{
+		  /* Found a match. */
+		  *plen = len;
+		  if (!exact && !match_words)
+		    return 0;
+		  else
+		    {
+		      /* For -w or exact look for longest match.  */
+		      ret = 0;
+		      Fimb.match[i] = '\0';
+		      continue;
+		    }
+		}
+
+	      if (Fimb.patterns[i][letter] == wc)
+		patterns_left = 1;
+	      else
+		Fimb.match[i] = '\0';
+	    }
+	}
+
+      len += c;
+      letter++;
+    }
+
+  return ret;
+}
+#endif /* MBS_SUPPORT */
+
 static size_t
 Fexecute (char const *buf, size_t size, size_t *match_size, int exact)
 {
@@ -506,88 +859,268 @@
   register size_t len;
   char eol = eolbyte;
   struct kwsmatch kwsmatch;
+  size_t ret_val;
 #ifdef MBS_SUPPORT
-  char *mb_properties;
-  if (MB_CUR_MAX > 1)
-    mb_properties = check_multibyte_string (buf, size);
+  int mb_cur_max = MB_CUR_MAX;
+  mbstate_t mbs;
+  memset (&mbs, '\0', sizeof (mbstate_t));
+  const char *last_char = NULL;
 #endif /* MBS_SUPPORT */
 
   for (beg = buf; beg <= buf + size; ++beg)
     {
-      size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch);
+      size_t offset;
+      offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch);
+
       if (offset == (size_t) -1)
-	{
+	goto failure;
 #ifdef MBS_SUPPORT
-	  if (MB_CUR_MAX > 1)
-	    free(mb_properties);
-#endif /* MBS_SUPPORT */
-	  return offset;
+      if (mb_cur_max > 1 && !using_utf8)
+	{
+	  size_t bytes_left = offset;
+	  while (bytes_left)
+	    {
+	      size_t mlen = mbrlen (beg, bytes_left, &mbs);
+
+	      last_char = beg;
+	      if (mlen == (size_t) -1 || mlen == 0)
+		{
+		  /* Incomplete character: treat as single-byte. */
+		  memset (&mbs, '\0', sizeof (mbstate_t));
+		  beg++;
+		  bytes_left--;
+		  continue;
+		}
+
+	      if (mlen == (size_t) -2)
+		/* Offset points inside multibyte character: no good. */
+		break;
+
+	      beg += mlen;
+	      bytes_left -= mlen;
+	    }
+
+	  if (bytes_left)
+	    continue;
 	}
-#ifdef MBS_SUPPORT
-      if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0)
-	continue; /* It is a part of multibyte character.  */
+      else
 #endif /* MBS_SUPPORT */
       beg += offset;
-      len = kwsmatch.size[0];
-      if (exact)
-	{
-	  *match_size = len;
 #ifdef MBS_SUPPORT
-	  if (MB_CUR_MAX > 1)
-	    free (mb_properties);
+      /* For f_i_multibyte, the string at beg now matches first 3 chars of
+	 one of the search strings (less if there are shorter search strings).
+	 See if this is a real match.  */
+      if (f_i_multibyte
+	  && Fimbexec (beg, buf + size - beg, &kwsmatch.size[0], exact))
+	goto next_char;
 #endif /* MBS_SUPPORT */
-	  return beg - buf;
-	}
+      len = kwsmatch.size[0];
+      if (exact && !match_words)
+	goto success_in_beg_and_len;
       if (match_lines)
 	{
 	  if (beg > buf && beg[-1] != eol)
-	    continue;
+	    goto next_char;
 	  if (beg + len < buf + size && beg[len] != eol)
-	    continue;
+	    goto next_char;
 	  goto success;
 	}
       else if (match_words)
-	for (try = beg; len; )
-	  {
-	    if (try > buf && WCHAR((unsigned char) try[-1]))
-	      break;
-	    if (try + len < buf + size && WCHAR((unsigned char) try[len]))
-	      {
-		offset = kwsexec (kwset, beg, --len, &kwsmatch);
-		if (offset == (size_t) -1)
-		  {
+	{
+	  while (len)
+	    {
+	      int word_match = 0;
+	      if (beg > buf)
+		{
 #ifdef MBS_SUPPORT
-		    if (MB_CUR_MAX > 1)
-		      free (mb_properties);
+		  if (mb_cur_max > 1)
+		    {
+		      const char *s;
+		      int mr;
+		      wchar_t pwc;
+
+		      if (using_utf8)
+			{
+			  s = beg - 1;
+			  while (s > buf
+				 && (unsigned char) *s >= 0x80
+				 && (unsigned char) *s <= 0xbf)
+			    --s;
+			}
+		      else
+			s = last_char;
+		      mr = mbtowc (&pwc, s, beg - s);
+		      if (mr <= 0)
+			memset (&mbs, '\0', sizeof (mbstate_t));
+		      else if ((iswalnum (pwc) || pwc == L'_')
+			       && mr == (int) (beg - s))
+			goto next_char;
+		    }
+		  else
 #endif /* MBS_SUPPORT */
-		    return offset;
-		  }
-		try = beg + offset;
-		len = kwsmatch.size[0];
-	      }
-	    else
-	      goto success;
-	  }
+		  if (WCHAR ((unsigned char) beg[-1]))
+		    goto next_char;
+		}
+#ifdef MBS_SUPPORT
+	      if (mb_cur_max > 1)
+		{
+		  wchar_t nwc;
+		  int mr;
+
+		  mr = mbtowc (&nwc, beg + len, buf + size - beg - len);
+		  if (mr <= 0)
+		    {
+		      memset (&mbs, '\0', sizeof (mbstate_t));
+		      word_match = 1;
+		    }
+		  else if (!iswalnum (nwc) && nwc != L'_')
+		    word_match = 1;
+		}
+	      else
+#endif /* MBS_SUPPORT */
+		if (beg + len >= buf + size || !WCHAR ((unsigned char) beg[len]))
+		  word_match = 1;
+	      if (word_match)
+		{
+		  if (!exact)
+		    /* Returns the whole line now we know there's a word match. */
+		    goto success;
+		  else
+		    /* Returns just this word match. */
+		    goto success_in_beg_and_len;
+		}
+	      if (len > 0)
+		{
+		  /* Try a shorter length anchored at the same place. */
+		  --len;
+		  offset = kwsexec (kwset, beg, len, &kwsmatch);
+
+		  if (offset == -1)
+		    goto next_char; /* Try a different anchor. */
+#ifdef MBS_SUPPORT
+		  if (mb_cur_max > 1 && !using_utf8)
+		    {
+		      size_t bytes_left = offset;
+		      while (bytes_left)
+			{
+			  size_t mlen = mbrlen (beg, bytes_left, &mbs);
+
+			  last_char = beg;
+			  if (mlen == (size_t) -1 || mlen == 0)
+			    {
+			      /* Incomplete character: treat as single-byte. */
+			      memset (&mbs, '\0', sizeof (mbstate_t));
+			      beg++;
+			      bytes_left--;
+			      continue;
+			    }
+
+			  if (mlen == (size_t) -2)
+			    {
+			      /* Offset points inside multibyte character:
+			       * no good. */
+			      break;
+			    }
+
+			  beg += mlen;
+			  bytes_left -= mlen;
+			}
+
+		      if (bytes_left)
+			{
+			  memset (&mbs, '\0', sizeof (mbstate_t));
+			  goto next_char; /* Try a different anchor. */
+			}
+		    }
+		  else
+#endif /* MBS_SUPPORT */
+		  beg += offset;
+#ifdef MBS_SUPPORT
+		  /* The string at beg now matches first 3 chars of one of
+		     the search strings (less if there are shorter search
+		     strings).  See if this is a real match.  */
+		  if (f_i_multibyte
+		      && Fimbexec (beg, len - offset, &kwsmatch.size[0],
+				   exact))
+		    goto next_char;
+#endif /* MBS_SUPPORT */
+		  len = kwsmatch.size[0];
+		}
+	    }
+	}
       else
 	goto success;
-    }
-
+next_char:;
 #ifdef MBS_SUPPORT
-  if (MB_CUR_MAX > 1)
-    free (mb_properties);
+      /* Advance to next character.  For MB_CUR_MAX == 1 case this is handled
+	 by ++beg above.  */
+      if (mb_cur_max > 1)
+	{
+	  if (using_utf8)
+	    {
+	      unsigned char c = *beg;
+	      if (c >= 0xc2)
+		{
+		  if (c < 0xe0)
+		    ++beg;
+		  else if (c < 0xf0)
+		    beg += 2;
+		  else if (c < 0xf8)
+		    beg += 3;
+		  else if (c < 0xfc)
+		    beg += 4;
+		  else if (c < 0xfe)
+		    beg += 5;
+		}
+	    }
+	  else
+	    {
+	      size_t l = mbrlen (beg, buf + size - beg, &mbs);
+
+	      last_char = beg;
+	      if (l + 2 >= 2)
+		beg += l - 1;
+	      else
+		memset (&mbs, '\0', sizeof (mbstate_t));
+	    }
+	}
 #endif /* MBS_SUPPORT */
+    }
+
+ failure:
   return -1;
 
  success:
+#ifdef MBS_SUPPORT
+  if (mb_cur_max > 1 && !using_utf8)
+    {
+      end = beg + len;
+      while (end < buf + size)
+	{
+	  size_t mlen = mbrlen (end, buf + size - end, &mbs);
+	  if (mlen == (size_t) -1 || mlen == (size_t) -2 || mlen == 0)
+	    {
+	      memset (&mbs, '\0', sizeof (mbstate_t));
+	      mlen = 1;
+	    }
+	  if (mlen == 1 && *end == eol)
+	    break;
+
+	  end += mlen;
+	}
+    }
+  else
+#endif /* MBS_SUPPORT */
   end = memchr (beg + len, eol, (buf + size) - (beg + len));
+
   end++;
   while (buf < beg && beg[-1] != eol)
     --beg;
-  *match_size = end - beg;
-#ifdef MBS_SUPPORT
-  if (MB_CUR_MAX > 1)
-    free (mb_properties);
-#endif /* MBS_SUPPORT */
+  len = end - beg;
+  /* FALLTHROUGH */
+
+ success_in_beg_and_len:
+  *match_size = len;
   return beg - buf;
 }
 
diff -urN grep-2.5.1a.orig/src/search.c.orig grep-2.5.1a/src/search.c.orig
--- grep-2.5.1a.orig/src/search.c.orig	1970-01-01 05:00:00.000000000 +0500
+++ grep-2.5.1a/src/search.c.orig	2005-10-23 09:48:39.000000000 +0600
@@ -0,0 +1,714 @@
+/* search.c - searching subroutines using dfa, kwset and regex for grep.
+   Copyright 1992, 1998, 2000 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+   02111-1307, USA.  */
+
+/* Written August 1992 by Mike Haertel. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+#include <sys/types.h>
+#if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC
+/* We can handle multibyte string.  */
+# define MBS_SUPPORT
+# include <wchar.h>
+# include <wctype.h>
+#endif
+
+#include "system.h"
+#include "grep.h"
+#include "regex.h"
+#include "dfa.h"
+#include "kwset.h"
+#include "error.h"
+#include "xalloc.h"
+#ifdef HAVE_LIBPCRE
+# include <pcre.h>
+#endif
+
+#define NCHAR (UCHAR_MAX + 1)
+
+/* For -w, we also consider _ to be word constituent.  */
+#define WCHAR(C) (ISALNUM(C) || (C) == '_')
+
+/* DFA compiled regexp. */
+static struct dfa dfa;
+
+/* The Regex compiled patterns.  */
+static struct patterns
+{
+  /* Regex compiled regexp. */
+  struct re_pattern_buffer regexbuf;
+  struct re_registers regs; /* This is here on account of a BRAIN-DEAD
+			       Q@#%!# library interface in regex.c.  */
+} patterns0;
+
+struct patterns *patterns;
+size_t pcount;
+
+/* KWset compiled pattern.  For Ecompile and Gcompile, we compile
+   a list of strings, at least one of which is known to occur in
+   any string matching the regexp. */
+static kwset_t kwset;
+
+/* Number of compiled fixed strings known to exactly match the regexp.
+   If kwsexec returns < kwset_exact_matches, then we don't need to
+   call the regexp matcher at all. */
+static int kwset_exact_matches;
+
+#if defined(MBS_SUPPORT)
+static char* check_multibyte_string PARAMS ((char const *buf, size_t size));
+#endif
+static void kwsinit PARAMS ((void));
+static void kwsmusts PARAMS ((void));
+static void Gcompile PARAMS ((char const *, size_t));
+static void Ecompile PARAMS ((char const *, size_t));
+static size_t EGexecute PARAMS ((char const *, size_t, size_t *, int ));
+static void Fcompile PARAMS ((char const *, size_t));
+static size_t Fexecute PARAMS ((char const *, size_t, size_t *, int));
+static void Pcompile PARAMS ((char const *, size_t ));
+static size_t Pexecute PARAMS ((char const *, size_t, size_t *, int));
+
+void
+dfaerror (char const *mesg)
+{
+  error (2, 0, mesg);
+}
+
+static void
+kwsinit (void)
+{
+  static char trans[NCHAR];
+  int i;
+
+  if (match_icase)
+    for (i = 0; i < NCHAR; ++i)
+      trans[i] = TOLOWER (i);
+
+  if (!(kwset = kwsalloc (match_icase ? trans : (char *) 0)))
+    error (2, 0, _("memory exhausted"));
+}
+
+/* If the DFA turns out to have some set of fixed strings one of
+   which must occur in the match, then we build a kwset matcher
+   to find those strings, and thus quickly filter out impossible
+   matches. */
+static void
+kwsmusts (void)
+{
+  struct dfamust const *dm;
+  char const *err;
+
+  if (dfa.musts)
+    {
+      kwsinit ();
+      /* First, we compile in the substrings known to be exact
+	 matches.  The kwset matcher will return the index
+	 of the matching string that it chooses. */
+      for (dm = dfa.musts; dm; dm = dm->next)
+	{
+	  if (!dm->exact)
+	    continue;
+	  ++kwset_exact_matches;
+	  if ((err = kwsincr (kwset, dm->must, strlen (dm->must))) != 0)
+	    error (2, 0, err);
+	}
+      /* Now, we compile the substrings that will require
+	 the use of the regexp matcher.  */
+      for (dm = dfa.musts; dm; dm = dm->next)
+	{
+	  if (dm->exact)
+	    continue;
+	  if ((err = kwsincr (kwset, dm->must, strlen (dm->must))) != 0)
+	    error (2, 0, err);
+	}
+      if ((err = kwsprep (kwset)) != 0)
+	error (2, 0, err);
+    }
+}
+
+#ifdef MBS_SUPPORT
+/* This function allocate the array which correspond to "buf".
+   Then this check multibyte string and mark on the positions which
+   are not singlebyte character nor the first byte of a multibyte
+   character.  Caller must free the array.  */
+static char*
+check_multibyte_string(char const *buf, size_t size)
+{
+  char *mb_properties = malloc(size);
+  mbstate_t cur_state;
+  int i;
+  memset(&cur_state, 0, sizeof(mbstate_t));
+  memset(mb_properties, 0, sizeof(char)*size);
+  for (i = 0; i < size ;)
+    {
+      size_t mbclen;
+      mbclen = mbrlen(buf + i, size - i, &cur_state);
+
+      if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
+	{
+	  /* An invalid sequence, or a truncated multibyte character.
+	     We treat it as a singlebyte character.  */
+	  mbclen = 1;
+	}
+      mb_properties[i] = mbclen;
+      i += mbclen;
+    }
+
+  return mb_properties;
+}
+#endif
+
+static void
+Gcompile (char const *pattern, size_t size)
+{
+  const char *err;
+  char const *sep;
+  size_t total = size;
+  char const *motif = pattern;
+
+  re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE);
+  dfasyntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase, eolbyte);
+
+  /* For GNU regex compiler we have to pass the patterns separately to detect
+     errors like "[\nallo\n]\n".  The patterns here are "[", "allo" and "]"
+     GNU regex should have raise a syntax error.  The same for backref, where
+     the backref should have been local to each pattern.  */
+  do
+    {
+      size_t len;
+      sep = memchr (motif, '\n', total);
+      if (sep)
+	{
+	  len = sep - motif;
+	  sep++;
+	  total -= (len + 1);
+	}
+      else
+	{
+	  len = total;
+	  total = 0;
+	}
+
+      patterns = realloc (patterns, (pcount + 1) * sizeof (*patterns));
+      if (patterns == NULL)
+	error (2, errno, _("memory exhausted"));
+
+      patterns[pcount] = patterns0;
+
+      if ((err = re_compile_pattern (motif, len,
+				    &(patterns[pcount].regexbuf))) != 0)
+	error (2, 0, err);
+      pcount++;
+
+      motif = sep;
+    } while (sep && total != 0);
+
+  /* In the match_words and match_lines cases, we use a different pattern
+     for the DFA matcher that will quickly throw out cases that won't work.
+     Then if DFA succeeds we do some hairy stuff using the regex matcher
+     to decide whether the match should really count. */
+  if (match_words || match_lines)
+    {
+      /* In the whole-word case, we use the pattern:
+	 \(^\|[^[:alnum:]_]\)\(userpattern\)\([^[:alnum:]_]|$\).
+	 In the whole-line case, we use the pattern:
+	 ^\(userpattern\)$.  */
+
+      static char const line_beg[] = "^\\(";
+      static char const line_end[] = "\\)$";
+      static char const word_beg[] = "\\(^\\|[^[:alnum:]_]\\)\\(";
+      static char const word_end[] = "\\)\\([^[:alnum:]_]\\|$\\)";
+      char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end);
+      size_t i;
+      strcpy (n, match_lines ? line_beg : word_beg);
+      i = strlen (n);
+      memcpy (n + i, pattern, size);
+      i += size;
+      strcpy (n + i, match_lines ? line_end : word_end);
+      i += strlen (n + i);
+      pattern = n;
+      size = i;
+    }
+
+  dfacomp (pattern, size, &dfa, 1);
+  kwsmusts ();
+}
+
+static void
+Ecompile (char const *pattern, size_t size)
+{
+  const char *err;
+  const char *sep;
+  size_t total = size;
+  char const *motif = pattern;
+
+  if (strcmp (matcher, "awk") == 0)
+    {
+      re_set_syntax (RE_SYNTAX_AWK);
+      dfasyntax (RE_SYNTAX_AWK, match_icase, eolbyte);
+    }
+  else
+    {
+      re_set_syntax (RE_SYNTAX_POSIX_EGREP);
+      dfasyntax (RE_SYNTAX_POSIX_EGREP, match_icase, eolbyte);
+    }
+
+  /* For GNU regex compiler we have to pass the patterns separately to detect
+     errors like "[\nallo\n]\n".  The patterns here are "[", "allo" and "]"
+     GNU regex should have raise a syntax error.  The same for backref, where
+     the backref should have been local to each pattern.  */
+  do
+    {
+      size_t len;
+      sep = memchr (motif, '\n', total);
+      if (sep)
+	{
+	  len = sep - motif;
+	  sep++;
+	  total -= (len + 1);
+	}
+      else
+	{
+	  len = total;
+	  total = 0;
+	}
+
+      patterns = realloc (patterns, (pcount + 1) * sizeof (*patterns));
+      if (patterns == NULL)
+	error (2, errno, _("memory exhausted"));
+      patterns[pcount] = patterns0;
+
+      if ((err = re_compile_pattern (motif, len,
+				    &(patterns[pcount].regexbuf))) != 0)
+	error (2, 0, err);
+      pcount++;
+
+      motif = sep;
+    } while (sep && total != 0);
+
+  /* In the match_words and match_lines cases, we use a different pattern
+     for the DFA matcher that will quickly throw out cases that won't work.
+     Then if DFA succeeds we do some hairy stuff using the regex matcher
+     to decide whether the match should really count. */
+  if (match_words || match_lines)
+    {
+      /* In the whole-word case, we use the pattern:
+	 (^|[^[:alnum:]_])(userpattern)([^[:alnum:]_]|$).
+	 In the whole-line case, we use the pattern:
+	 ^(userpattern)$.  */
+
+      static char const line_beg[] = "^(";
+      static char const line_end[] = ")$";
+      static char const word_beg[] = "(^|[^[:alnum:]_])(";
+      static char const word_end[] = ")([^[:alnum:]_]|$)";
+      char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end);
+      size_t i;
+      strcpy (n, match_lines ? line_beg : word_beg);
+      i = strlen(n);
+      memcpy (n + i, pattern, size);
+      i += size;
+      strcpy (n + i, match_lines ? line_end : word_end);
+      i += strlen (n + i);
+      pattern = n;
+      size = i;
+    }
+
+  dfacomp (pattern, size, &dfa, 1);
+  kwsmusts ();
+}
+
+static size_t
+EGexecute (char const *buf, size_t size, size_t *match_size, int exact)
+{
+  register char const *buflim, *beg, *end;
+  char eol = eolbyte;
+  int backref, start, len;
+  struct kwsmatch kwsm;
+  size_t i;
+#ifdef MBS_SUPPORT
+  char *mb_properties = NULL;
+#endif /* MBS_SUPPORT */
+
+#ifdef MBS_SUPPORT
+  if (MB_CUR_MAX > 1 && kwset)
+    mb_properties = check_multibyte_string(buf, size);
+#endif /* MBS_SUPPORT */
+
+  buflim = buf + size;
+
+  for (beg = end = buf; end < buflim; beg = end)
+    {
+      if (!exact)
+	{
+	  if (kwset)
+	    {
+	      /* Find a possible match using the KWset matcher. */
+	      size_t offset = kwsexec (kwset, beg, buflim - beg, &kwsm);
+	      if (offset == (size_t) -1)
+	        goto failure;
+	      beg += offset;
+	      /* Narrow down to the line containing the candidate, and
+		 run it through DFA. */
+	      end = memchr(beg, eol, buflim - beg);
+	      end++;
+#ifdef MBS_SUPPORT
+	      if (MB_CUR_MAX > 1 && mb_properties[beg - buf] == 0)
+		continue;
+#endif
+	      while (beg > buf && beg[-1] != eol)
+		--beg;
+	      if (kwsm.index < kwset_exact_matches)
+		goto success_in_beg_and_end;
+	      if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1)
+		continue;
+	    }
+	  else
+	    {
+	      /* No good fixed strings; start with DFA. */
+	      size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref);
+	      if (offset == (size_t) -1)
+		break;
+	      /* Narrow down to the line we've found. */
+	      beg += offset;
+	      end = memchr (beg, eol, buflim - beg);
+	      end++;
+	      while (beg > buf && beg[-1] != eol)
+		--beg;
+	    }
+	  /* Successful, no backreferences encountered! */
+	  if (!backref)
+	    goto success_in_beg_and_end;
+	}
+      else
+	end = beg + size;
+
+      /* If we've made it to this point, this means DFA has seen
+	 a probable match, and we need to run it through Regex. */
+      for (i = 0; i < pcount; i++)
+	{
+	  patterns[i].regexbuf.not_eol = 0;
+	  if (0 <= (start = re_search (&(patterns[i].regexbuf), beg,
+				       end - beg - 1, 0,
+				       end - beg - 1, &(patterns[i].regs))))
+	    {
+	      len = patterns[i].regs.end[0] - start;
+	      if (exact && !match_words)
+	        goto success_in_start_and_len;
+	      if ((!match_lines && !match_words)
+		  || (match_lines && len == end - beg - 1))
+		goto success_in_beg_and_end;
+	      /* If -w, check if the match aligns with word boundaries.
+		 We do this iteratively because:
+		 (a) the line may contain more than one occurence of the
+		 pattern, and
+		 (b) Several alternatives in the pattern might be valid at a
+		 given point, and we may need to consider a shorter one to
+		 find a word boundary.  */
+	      if (match_words)
+		while (start >= 0)
+		  {
+		    if ((start == 0 || !WCHAR ((unsigned char) beg[start - 1]))
+			&& (len == end - beg - 1
+			    || !WCHAR ((unsigned char) beg[start + len])))
+		      goto success_in_beg_and_end;
+		    if (len > 0)
+		      {
+			/* Try a shorter length anchored at the same place. */
+			--len;
+			patterns[i].regexbuf.not_eol = 1;
+			len = re_match (&(patterns[i].regexbuf), beg,
+					start + len, start,
+					&(patterns[i].regs));
+		      }
+		    if (len <= 0)
+		      {
+			/* Try looking further on. */
+			if (start == end - beg - 1)
+			  break;
+			++start;
+			patterns[i].regexbuf.not_eol = 0;
+			start = re_search (&(patterns[i].regexbuf), beg,
+					   end - beg - 1,
+					   start, end - beg - 1 - start,
+					   &(patterns[i].regs));
+			len = patterns[i].regs.end[0] - start;
+		      }
+		  }
+	    }
+	} /* for Regex patterns.  */
+    } /* for (beg = end ..) */
+
+ failure:
+#ifdef MBS_SUPPORT
+  if (MB_CUR_MAX > 1 && mb_properties)
+    free (mb_properties);
+#endif /* MBS_SUPPORT */
+  return (size_t) -1;
+
+ success_in_beg_and_end:
+  len = end - beg;
+  start = beg - buf;
+  /* FALLTHROUGH */
+
+ success_in_start_and_len:
+#ifdef MBS_SUPPORT
+  if (MB_CUR_MAX > 1 && mb_properties)
+    free (mb_properties);
+#endif /* MBS_SUPPORT */
+  *match_size = len;
+  return start;
+}
+
+static void
+Fcompile (char const *pattern, size_t size)
+{
+  char const *beg, *lim, *err;
+
+  kwsinit ();
+  beg = pattern;
+  do
+    {
+      for (lim = beg; lim < pattern + size && *lim != '\n'; ++lim)
+	;
+      if ((err = kwsincr (kwset, beg, lim - beg)) != 0)
+	error (2, 0, err);
+      if (lim < pattern + size)
+	++lim;
+      beg = lim;
+    }
+  while (beg < pattern + size);
+
+  if ((err = kwsprep (kwset)) != 0)
+    error (2, 0, err);
+}
+
+static size_t
+Fexecute (char const *buf, size_t size, size_t *match_size, int exact)
+{
+  register char const *beg, *try, *end;
+  register size_t len;
+  char eol = eolbyte;
+  struct kwsmatch kwsmatch;
+#ifdef MBS_SUPPORT
+  char *mb_properties;
+  if (MB_CUR_MAX > 1)
+    mb_properties = check_multibyte_string (buf, size);
+#endif /* MBS_SUPPORT */
+
+  for (beg = buf; beg <= buf + size; ++beg)
+    {
+      size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch);
+      if (offset == (size_t) -1)
+	goto failure;
+#ifdef MBS_SUPPORT
+      if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0)
+	continue; /* It is a part of multibyte character.  */
+#endif /* MBS_SUPPORT */
+      beg += offset;
+      len = kwsmatch.size[0];
+      if (exact && !match_words)
+	goto success_in_beg_and_len;
+      if (match_lines)
+	{
+	  if (beg > buf && beg[-1] != eol)
+	    continue;
+	  if (beg + len < buf + size && beg[len] != eol)
+	    continue;
+	  goto success;
+	}
+      else if (match_words)
+	for (try = beg; len; )
+	  {
+	    if (try > buf && WCHAR((unsigned char) try[-1]))
+	      break;
+	    if (try + len < buf + size && WCHAR((unsigned char) try[len]))
+	      {
+		offset = kwsexec (kwset, beg, --len, &kwsmatch);
+		if (offset == (size_t) -1)
+		  {
+#ifdef MBS_SUPPORT
+		    if (MB_CUR_MAX > 1)
+		      free (mb_properties);
+#endif /* MBS_SUPPORT */
+		    return offset;
+		  }
+		try = beg + offset;
+		len = kwsmatch.size[0];
+	      }
+	    else
+	      goto success;
+	  }
+      else
+	goto success;
+    }
+
+ failure:
+#ifdef MBS_SUPPORT
+  if (MB_CUR_MAX > 1)
+    free (mb_properties);
+#endif /* MBS_SUPPORT */
+  return -1;
+
+ success:
+  end = memchr (beg + len, eol, (buf + size) - (beg + len));
+  end++;
+  while (buf < beg && beg[-1] != eol)
+    --beg;
+  len = end - beg;
+  /* FALLTHROUGH */
+
+ success_in_beg_and_len:
+  *match_size = len;
+#ifdef MBS_SUPPORT
+  if (MB_CUR_MAX > 1)
+    free (mb_properties);
+#endif /* MBS_SUPPORT */
+  return beg - buf;
+}
+
+#if HAVE_LIBPCRE
+/* Compiled internal form of a Perl regular expression.  */
+static pcre *cre;
+
+/* Additional information about the pattern.  */
+static pcre_extra *extra;
+#endif
+
+static void
+Pcompile (char const *pattern, size_t size)
+{
+#if !HAVE_LIBPCRE
+  error (2, 0, _("The -P option is not supported"));
+#else
+  int e;
+  char const *ep;
+  char *re = xmalloc (4 * size + 7);
+  int flags = PCRE_MULTILINE | (match_icase ? PCRE_CASELESS : 0);
+  char const *patlim = pattern + size;
+  char *n = re;
+  char const *p;
+  char const *pnul;
+
+  /* FIXME: Remove this restriction.  */
+  if (eolbyte != '\n')
+    error (2, 0, _("The -P and -z options cannot be combined"));
+
+  *n = '\0';
+  if (match_lines)
+    strcpy (n, "^(");
+  if (match_words)
+    strcpy (n, "\\b(");
+  n += strlen (n);
+
+  /* The PCRE interface doesn't allow NUL bytes in the pattern, so
+     replace each NUL byte in the pattern with the four characters
+     "\000", removing a preceding backslash if there are an odd
+     number of backslashes before the NUL.
+
+     FIXME: This method does not work with some multibyte character
+     encodings, notably Shift-JIS, where a multibyte character can end
+     in a backslash byte.  */
+  for (p = pattern; (pnul = memchr (p, '\0', patlim - p)); p = pnul + 1)
+    {
+      memcpy (n, p, pnul - p);
+      n += pnul - p;
+      for (p = pnul; pattern < p && p[-1] == '\\'; p--)
+	continue;
+      n -= (pnul - p) & 1;
+      strcpy (n, "\\000");
+      n += 4;
+    }
+
+  memcpy (n, p, patlim - p);
+  n += patlim - p;
+  *n = '\0';
+  if (match_words)
+    strcpy (n, ")\\b");
+  if (match_lines)
+    strcpy (n, ")$");
+
+  cre = pcre_compile (re, flags, &ep, &e, pcre_maketables ());
+  if (!cre)
+    error (2, 0, ep);
+
+  extra = pcre_study (cre, 0, &ep);
+  if (ep)
+    error (2, 0, ep);
+
+  free (re);
+#endif
+}
+
+static size_t
+Pexecute (char const *buf, size_t size, size_t *match_size, int exact)
+{
+#if !HAVE_LIBPCRE
+  abort ();
+  return -1;
+#else
+  /* This array must have at least two elements; everything after that
+     is just for performance improvement in pcre_exec.  */
+  int sub[300];
+
+  int e = pcre_exec (cre, extra, buf, size, 0, 0,
+		     sub, sizeof sub / sizeof *sub);
+
+  if (e <= 0)
+    {
+      switch (e)
+	{
+	case PCRE_ERROR_NOMATCH:
+	  return -1;
+
+	case PCRE_ERROR_NOMEMORY:
+	  error (2, 0, _("Memory exhausted"));
+
+	default:
+	  abort ();
+	}
+    }
+  else
+    {
+      /* Narrow down to the line we've found.  */
+      char const *beg = buf + sub[0];
+      char const *end = buf + sub[1];
+      char const *buflim = buf + size;
+      char eol = eolbyte;
+      if (!exact)
+	{
+	  end = memchr (end, eol, buflim - end);
+	  end++;
+	  while (buf < beg && beg[-1] != eol)
+	    --beg;
+	}
+
+      *match_size = end - beg;
+      return beg - buf;
+    }
+#endif
+}
+
+struct matcher const matchers[] = {
+  { "default", Gcompile, EGexecute },
+  { "grep", Gcompile, EGexecute },
+  { "egrep", Ecompile, EGexecute },
+  { "awk", Ecompile, EGexecute },
+  { "fgrep", Fcompile, Fexecute },
+  { "perl", Pcompile, Pexecute },
+  { "", 0, 0 },
+};
diff -urN grep-2.5.1a.orig/tests/fmbtest.sh grep-2.5.1a/tests/fmbtest.sh
--- grep-2.5.1a.orig/tests/fmbtest.sh	1970-01-01 05:00:00.000000000 +0500
+++ grep-2.5.1a/tests/fmbtest.sh	2005-10-23 09:51:12.000000000 +0600
@@ -0,0 +1,111 @@
+#!/bin/sh
+
+: ${srcdir=.}
+
+# If cs_CZ.UTF-8 locale doesn't work, skip this test silently
+LC_ALL=cs_CZ.UTF-8 locale -k LC_CTYPE 2>/dev/null | ${GREP} -q charmap.*UTF-8 \
+  || exit 77
+
+failures=0
+
+cat > csinput <<EOF
+01 Žluťoučká číše
+ČíŠE 02
+03 Z číší Čiší cosi
+04 Čí
+Še 05
+06 ČČČČČČČíšČÍŠčíš
+07 ČČČ ČČČČíšČÍŠčíšEEEE
+čAs 08
+09Čapka
+10ČaSy se měnÍ
+ČÍšE11
+Čas12
+𝇕ČÍšE𝇓13
+ŽČÍšE𝇓14
+𝇕ČÍšEŽ15
+ŽČÍšEŽ16
+ČÍšE𝇓17
+ČÍšEŽ18
+19𝇕ČÍše
+20ŽČÍše
+EOF
+cat > cspatfile <<EOF
+ČÍšE
+Čas
+EOF
+
+for mode in F G E; do
+
+test1="$(echo `LC_ALL=cs_CZ.UTF-8 ${GREP} -${mode} -f cspatfile csinput \
+	       | LC_ALL=C sed 's/^.*\([0-9][0-9]\).*$/\1/'`)"
+if test "$test1" != "11 12 13 14 15 16 17 18"; then
+  echo "Test #1 ${mode} failed: $test1"
+  failures=1
+fi
+
+test2="$(echo `LC_ALL=cs_CZ.UTF-8 ${GREP} -${mode}i -f cspatfile csinput \
+	       | LC_ALL=C sed 's/^.*\([0-9][0-9]\).*$/\1/'`)"
+if test "$test2" != "01 02 07 08 10 11 12 13 14 15 16 17 18 19 20"; then
+  echo "Test #2 ${mode} failed: $test2"
+  failures=1
+fi
+
+test3="$(echo `LC_ALL=cs_CZ.UTF-8 ${GREP} -${mode}i -e 'ČÍšE' -e 'Čas' csinput \
+	       | LC_ALL=C sed 's/^.*\([0-9][0-9]\).*$/\1/'`)"
+if test "$test3" != "01 02 07 08 10 11 12 13 14 15 16 17 18 19 20"; then
+  echo "Test #3 ${mode} failed: $test3"
+  failures=1
+fi
+
+test4="$(echo `LC_ALL=cs_CZ.UTF-8 ${GREP} -${mode}iw -f cspatfile csinput \
+	       | LC_ALL=C sed 's/^.*\([0-9][0-9]\).*$/\1/'`)"
+if test "$test4" != "01 02 08 13 17 19"; then
+  echo "Test #4 ${mode} failed: $test4"
+  failures=1
+fi
+
+done
+
+# Test that -F --color=always prefers longer matches.
+test5="`echo 'Cosi tu ČišÍ...' \
+	| LC_ALL=cs_CZ.UTF-8 ${GREP} --color=always -Fi -e 'čiš' -e 'čiší'`"
+if echo "$test5" | LC_ALL=C ${GREP} -q 'Cosi tu .*\[.*mČišÍ.*\[.*m\(.\[K\)\?\.\.\.'; then
+  :
+else
+  echo "Test #5 F failed: $test5"
+  failures=1
+fi
+
+for mode in G E; do
+
+# Test that -{G,E} --color=always prefers earlier pattern matches.
+test6="`echo 'Cosi tu ČišÍ...' \
+	| LC_ALL=cs_CZ.UTF-8 ${GREP} --color=always -${mode}i -e 'čiš' -e 'čiší'`"
+if echo "$test6" | LC_ALL=C ${GREP} -q 'Cosi tu .*\[.*mČiš.*\[.*m\(.\[K\)\?Í\.\.\.'; then
+  :
+else
+  echo "Test #6 ${mode} failed: $test6"
+  failures=1
+fi
+
+# Test that -{G,E} --color=always prefers earlier pattern matches.
+test7="`echo 'Cosi tu ČišÍ...' \
+	| LC_ALL=cs_CZ.UTF-8 ${GREP} --color=always -${mode}i -e 'čiší' -e 'čiš'`"
+if echo "$test7" | LC_ALL=C ${GREP} -q 'Cosi tu .*\[.*mČišÍ.*\[.*m\(.\[K\)\?\.\.\.'; then
+  :
+else
+  echo "Test #7 ${mode} failed: $test7"
+  failures=1
+fi
+
+test8="$(echo `LC_ALL=cs_CZ.UTF-8 ${GREP} -${mode}i -e 'Č.šE' -e 'Č[a-f]s' csinput \
+	       | LC_ALL=C sed 's/^.*\([0-9][0-9]\).*$/\1/'`)"
+if test "$test8" != "01 02 07 08 10 11 12 13 14 15 16 17 18 19 20"; then
+  echo "Test #8 ${mode} failed: $test8"
+  failures=1
+fi
+
+done
+
+exit $failures
diff -urN grep-2.5.1a.orig/tests/Makefile.am grep-2.5.1a/tests/Makefile.am
--- grep-2.5.1a.orig/tests/Makefile.am	2001-03-07 09:11:27.000000000 +0500
+++ grep-2.5.1a/tests/Makefile.am	2005-10-23 09:51:12.000000000 +0600
@@ -3,7 +3,8 @@
 AWK=@AWK@
 
 TESTS = warning.sh khadafy.sh spencer1.sh bre.sh ere.sh \
-        status.sh empty.sh options.sh backref.sh file.sh
+        status.sh empty.sh options.sh backref.sh file.sh \
+        fmbtest.sh
 EXTRA_DIST = $(TESTS) \
              khadafy.lines khadafy.regexp \
              spencer1.awk spencer1.tests \
diff -urN grep-2.5.1a.orig/tests/Makefile.in grep-2.5.1a/tests/Makefile.in
--- grep-2.5.1a.orig/tests/Makefile.in	2002-03-26 21:09:36.000000000 +0500
+++ grep-2.5.1a/tests/Makefile.in	2005-10-23 09:51:13.000000000 +0600
@@ -97,7 +97,8 @@
 AWK = @AWK@
 
 TESTS = warning.sh khadafy.sh spencer1.sh bre.sh ere.sh \
-        status.sh empty.sh options.sh backref.sh file.sh
+        status.sh empty.sh options.sh backref.sh file.sh \
+	fmbtest.sh
 
 EXTRA_DIST = $(TESTS) \
              khadafy.lines khadafy.regexp \