1 Submitted by: Alexander E. Patrakov
3 Initial Package Version: 2.5.1a
4 Upstream Status: Partially accepted, partially rejected, but required for LSB >= 2.0 certification
6 Description: Various fixes from RedHat. Individual patches:
9 grep-2.5.1-bracket.patch
12 grep-2.5.1-manpage.patch
13 grep-2.5.1-color.patch
14 grep-2.5.1-icolor.patch
15 grep-2.5.1-egf-speedup.patch
16 grep-2.5.1-dfa-optional.patch
17 grep-2.5.1-tests.patch
22 -fgrep: ???, but required for other patches
23 -bracket: echo "[" | LANG=en_US.UTF-8 grep "[[:space:]]"
24 -i18n: many fixes for multibyte locale support, required for LSB.
25 -oi: echo xxYYzz | LANG=C grep -i -o yy
27 -color: restore the background color correctly
28 -icolor: ??? echo 'spam foo SPAM FOO' | grep -i --color spam
29 (but that's also fixed by -oi. Is this patch just a cleanup?)
30 -egf-speedup: without this, grep is as slow as a snail in UTF-8 locales.
31 -dfa-optional: disables dfa in multibyte locales by default.
32 -w: (echo 'foo';echo 'fo') > /tmp/testfile && grep -F -w fo /tmp/testfile
34 diff -urN grep-2.5.1a.orig/doc/grep.1 grep-2.5.1a/doc/grep.1
35 --- grep-2.5.1a.orig/doc/grep.1 2004-11-12 16:26:37.000000000 +0500
36 +++ grep-2.5.1a/doc/grep.1 2005-10-23 09:49:43.000000000 +0600
39 as a list of fixed strings, separated by newlines,
40 any of which is to be matched.
42 .BR \-P ", " \-\^\-perl-regexp
46 This is especially useful for tools like zgrep, e.g.
47 .B "gzip -cd foo.gz |grep --label=foo something"
49 -.BR \-\^\-line-buffering
50 +.BR \-\^\-line-buffered
51 Use line buffering, it can be a performance penality.
53 .BR \-q ", " \-\^\-quiet ", " \-\^\-silent
54 diff -urN grep-2.5.1a.orig/lib/posix/regex.h grep-2.5.1a/lib/posix/regex.h
55 --- grep-2.5.1a.orig/lib/posix/regex.h 2001-04-02 23:56:50.000000000 +0600
56 +++ grep-2.5.1a/lib/posix/regex.h 2005-10-23 09:49:31.000000000 +0600
58 If not set, \{, \}, {, and } are literals. */
59 #define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
61 +/* If this bit is set, then ignore case when matching.
62 + If not set, then case is significant. */
63 +#define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1)
65 /* If this bit is set, +, ? and | aren't recognized as operators.
66 If not set, they are. */
67 #define RE_LIMITED_OPS (RE_INTERVALS << 1)
68 diff -urN grep-2.5.1a.orig/src/dfa.c grep-2.5.1a/src/dfa.c
69 --- grep-2.5.1a.orig/src/dfa.c 2001-09-26 22:57:55.000000000 +0600
70 +++ grep-2.5.1a/src/dfa.c 2005-10-23 09:49:17.000000000 +0600
73 /* This function fetch a wide character, and update cur_mb_len,
74 used only if the current locale is a multibyte environment. */
77 fetch_wc (char const *eoferr)
88 cur_mb_len = mbrtowc(&wc, lexptr, lexleft, &mbs);
91 parse_bracket_exp_mb ()
93 - wchar_t wc, wc1, wc2;
94 + wint_t wc, wc1, wc2;
96 /* Work area to build a mb_char_classes. */
97 struct mb_char_classes *work_mbc;
102 - wc1 = -1; /* mark wc1 is not initialized". */
103 + wc1 = WEOF; /* mark wc1 is not initialized". */
105 /* Note that if we're looking at some other [:...:] construct,
106 we just treat it as a bunch of ordinary characters. We can do
108 work_mbc->coll_elems[work_mbc->ncoll_elems++] = elem;
115 /* We treat '[' as a normal character here. */
117 wc = fetch_wc(("Unbalanced ["));
122 wc1 = fetch_wc(_("Unbalanced ["));
125 @@ -630,17 +630,17 @@
127 REALLOC_IF_NECESSARY(work_mbc->range_sts, wchar_t,
128 range_sts_al, work_mbc->nranges + 1);
129 - work_mbc->range_sts[work_mbc->nranges] = wc;
130 + work_mbc->range_sts[work_mbc->nranges] = (wchar_t)wc;
131 REALLOC_IF_NECESSARY(work_mbc->range_ends, wchar_t,
132 range_ends_al, work_mbc->nranges + 1);
133 - work_mbc->range_ends[work_mbc->nranges++] = wc2;
134 + work_mbc->range_ends[work_mbc->nranges++] = (wchar_t)wc2;
137 + else if (wc != WEOF)
138 /* build normal characters. */
140 REALLOC_IF_NECESSARY(work_mbc->chars, wchar_t, chars_al,
141 work_mbc->nchars + 1);
142 - work_mbc->chars[work_mbc->nchars++] = wc;
143 + work_mbc->chars[work_mbc->nchars++] = (wchar_t)wc;
146 while ((wc = wc1) != L']');
147 @@ -2552,6 +2552,8 @@
150 /* match with a character? */
152 + wc = towlower (wc);
153 for (i = 0; i<work_mbc->nchars; i++)
155 if (wc == work_mbc->chars[i])
156 diff -urN grep-2.5.1a.orig/src/grep.c grep-2.5.1a/src/grep.c
157 --- grep-2.5.1a.orig/src/grep.c 2004-11-12 16:25:35.000000000 +0500
158 +++ grep-2.5.1a/src/grep.c 2005-10-23 09:50:06.000000000 +0600
160 # include <sys/time.h>
161 # include <sys/resource.h>
163 +#if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC
164 +/* We can handle multibyte string. */
165 +# define MBS_SUPPORT
167 +# include <wctype.h>
178 - /* Yuck, this is tricky */
179 - char *buf = (char*) xmalloc (lim - beg);
181 - char *ilim = ibeg + (lim - beg);
183 - for (i = 0; i < lim - beg; i++)
184 - ibeg[i] = tolower (beg[i]);
185 - while ((match_offset = (*execute) (ibeg, ilim-ibeg, &match_size, 1))
188 - char const *b = beg + match_offset;
191 - fwrite (beg, sizeof (char), match_offset, stdout);
192 - printf ("\33[%sm", grep_color);
193 - fwrite (b, sizeof (char), match_size, stdout);
194 - fputs ("\33[00m", stdout);
195 - beg = b + match_size;
196 - ibeg = ibeg + match_offset + match_size;
198 - fwrite (beg, 1, lim - beg, stdout);
203 while (lim-beg && (match_offset = (*execute) (beg, lim - beg, &match_size, 1))
207 fputs ("\33[00m", stdout);
208 beg = b + match_size;
210 + fputs ("\33[K", stdout);
212 fwrite (beg, 1, lim - beg, stdout);
214 @@ -1697,6 +1677,37 @@
215 if (!install_matcher (matcher) && !install_matcher ("default"))
219 + if (MB_CUR_MAX != 1 && match_icase)
222 + mbstate_t cur_state, prev_state;
223 + int i, len = strlen(keys);
225 + memset(&cur_state, 0, sizeof(mbstate_t));
226 + for (i = 0; i <= len ;)
229 + mbclen = mbrtowc(&wc, keys + i, len - i, &cur_state);
230 + if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
232 + /* An invalid sequence, or a truncated multibyte character.
233 + We treat it as a singlebyte character. */
238 + if (iswupper((wint_t)wc))
240 + wc = towlower((wint_t)wc);
241 + wcrtomb(keys + i, wc, &cur_state);
247 +#endif /* MBS_SUPPORT */
249 (*compile)(keys, keycc);
251 if ((argc - optind > 1 && !no_filenames) || with_filenames)
252 diff -urN grep-2.5.1a.orig/src/search.c grep-2.5.1a/src/search.c
253 --- grep-2.5.1a.orig/src/search.c 2001-04-19 09:42:14.000000000 +0600
254 +++ grep-2.5.1a/src/search.c 2005-10-23 09:51:25.000000000 +0600
257 /* Written August 1992 by Mike Haertel. */
260 +# define _GNU_SOURCE 1
266 #include <sys/types.h>
267 #if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC
268 /* We can handle multibyte string. */
282 +#ifdef HAVE_LANGINFO_CODESET
283 +# include <langinfo.h>
286 #define NCHAR (UCHAR_MAX + 1)
289 call the regexp matcher at all. */
290 static int kwset_exact_matches;
292 -#if defined(MBS_SUPPORT)
293 -static char* check_multibyte_string PARAMS ((char const *buf, size_t size));
295 +/* UTF-8 encoding allows some optimizations that we can't otherwise
296 + assume in a multibyte encoding. */
297 +static int using_utf8;
299 static void kwsinit PARAMS ((void));
300 static void kwsmusts PARAMS ((void));
301 static void Gcompile PARAMS ((char const *, size_t));
303 static size_t Pexecute PARAMS ((char const *, size_t, size_t *, int));
308 +#ifdef HAVE_LANGINFO_CODESET
309 + if (strcmp (nl_langinfo (CODESET), "UTF-8") == 0)
315 dfaerror (char const *mesg)
323 -/* This function allocate the array which correspond to "buf".
324 - Then this check multibyte string and mark on the positions which
325 - are not singlebyte character nor the first byte of a multibyte
326 - character. Caller must free the array. */
328 -check_multibyte_string(char const *buf, size_t size)
330 - char *mb_properties = malloc(size);
331 - mbstate_t cur_state;
333 - memset(&cur_state, 0, sizeof(mbstate_t));
334 - memset(mb_properties, 0, sizeof(char)*size);
335 - for (i = 0; i < size ;)
338 - mbclen = mbrlen(buf + i, size - i, &cur_state);
340 - if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
342 - /* An invalid sequence, or a truncated multibyte character.
343 - We treat it as a singlebyte character. */
346 - mb_properties[i] = mbclen;
350 - return mb_properties;
355 Gcompile (char const *pattern, size_t size)
359 char const *motif = pattern;
361 - re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE);
363 + re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE | (match_icase ? RE_ICASE : 0));
364 dfasyntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase, eolbyte);
366 /* For GNU regex compiler we have to pass the patterns separately to detect
368 static char const line_end[] = "\\)$";
369 static char const word_beg[] = "\\(^\\|[^[:alnum:]_]\\)\\(";
370 static char const word_end[] = "\\)\\([^[:alnum:]_]\\|$\\)";
371 - char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end);
372 + char *n = xmalloc (sizeof word_beg - 1 + size + sizeof word_end);
374 strcpy (n, match_lines ? line_beg : word_beg);
376 @@ -257,14 +243,15 @@
378 char const *motif = pattern;
381 if (strcmp (matcher, "awk") == 0)
383 - re_set_syntax (RE_SYNTAX_AWK);
384 + re_set_syntax (RE_SYNTAX_AWK | (match_icase ? RE_ICASE : 0));
385 dfasyntax (RE_SYNTAX_AWK, match_icase, eolbyte);
389 - re_set_syntax (RE_SYNTAX_POSIX_EGREP);
390 + re_set_syntax (RE_SYNTAX_POSIX_EGREP | (match_icase ? RE_ICASE : 0));
391 dfasyntax (RE_SYNTAX_POSIX_EGREP, match_icase, eolbyte);
395 static char const line_end[] = ")$";
396 static char const word_beg[] = "(^|[^[:alnum:]_])(";
397 static char const word_end[] = ")([^[:alnum:]_]|$)";
398 - char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end);
399 + char *n = xmalloc (sizeof word_beg - 1 + size + sizeof word_end);
401 strcpy (n, match_lines ? line_beg : word_beg);
403 @@ -339,15 +326,35 @@
405 int backref, start, len;
406 struct kwsmatch kwsm;
409 + static int use_dfa;
410 + static int use_dfa_checked = 0;
412 - char *mb_properties = NULL;
413 + const char *last_char = NULL;
414 + int mb_cur_max = MB_CUR_MAX;
416 + memset (&mbs, '\0', sizeof (mbstate_t));
417 #endif /* MBS_SUPPORT */
419 + if (!use_dfa_checked)
421 + char *grep_use_dfa = getenv ("GREP_USE_DFA");
425 - if (MB_CUR_MAX > 1 && kwset)
426 - mb_properties = check_multibyte_string(buf, size);
427 + /* Turn off DFA when processing multibyte input. */
428 + use_dfa = (MB_CUR_MAX == 1);
431 #endif /* MBS_SUPPORT */
435 + use_dfa = atoi (grep_use_dfa);
438 + use_dfa_checked = 1;
443 @@ -358,47 +365,124 @@
446 /* Find a possible match using the KWset matcher. */
447 - size_t offset = kwsexec (kwset, beg, buflim - beg, &kwsm);
449 + size_t bytes_left = 0;
450 +#endif /* MBS_SUPPORT */
453 + /* kwsexec doesn't work with match_icase and multibyte input. */
454 + if (match_icase && mb_cur_max > 1)
458 +#endif /* MBS_SUPPORT */
459 + offset = kwsexec (kwset, beg, buflim - beg, &kwsm);
460 if (offset == (size_t) -1)
464 - if (MB_CUR_MAX > 1)
465 - free(mb_properties);
468 + if (mb_cur_max > 1 && !using_utf8)
470 + bytes_left = offset;
473 + size_t mlen = mbrlen (beg, bytes_left, &mbs);
476 + if (mlen == (size_t) -1 || mlen == 0)
478 + /* Incomplete character: treat as single-byte. */
479 + memset (&mbs, '\0', sizeof (mbstate_t));
485 + if (mlen == (size_t) -2)
486 + /* Offset points inside multibyte character:
491 + bytes_left -= mlen;
495 +#endif /* MBS_SUPPORT */
497 /* Narrow down to the line containing the candidate, and
498 run it through DFA. */
499 end = memchr(beg, eol, buflim - beg);
502 - if (MB_CUR_MAX > 1 && mb_properties[beg - buf] == 0)
503 + if (mb_cur_max > 1 && bytes_left)
506 +#endif /* MBS_SUPPORT */
507 while (beg > buf && beg[-1] != eol)
509 - if (kwsm.index < kwset_exact_matches)
511 - if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1)
514 + !(match_icase && mb_cur_max > 1) &&
515 +#endif /* MBS_SUPPORT */
516 + (kwsm.index < kwset_exact_matches))
517 + goto success_in_beg_and_end;
519 + dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1)
524 /* No good fixed strings; start with DFA. */
525 - size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref);
527 + size_t bytes_left = 0;
528 +#endif /* MBS_SUPPORT */
531 + offset = dfaexec (&dfa, beg, buflim - beg, &backref);
532 if (offset == (size_t) -1)
534 /* Narrow down to the line we've found. */
536 + if (mb_cur_max > 1 && !using_utf8)
538 + bytes_left = offset;
541 + size_t mlen = mbrlen (beg, bytes_left, &mbs);
544 + if (mlen == (size_t) -1 || mlen == 0)
546 + /* Incomplete character: treat as single-byte. */
547 + memset (&mbs, '\0', sizeof (mbstate_t));
553 + if (mlen == (size_t) -2)
554 + /* Offset points inside multibyte character:
559 + bytes_left -= mlen;
563 +#endif /* MBS_SUPPORT */
565 end = memchr (beg, eol, buflim - beg);
568 + if (mb_cur_max > 1 && bytes_left)
570 +#endif /* MBS_SUPPORT */
571 while (beg > buf && beg[-1] != eol)
574 /* Successful, no backreferences encountered! */
577 + if (use_dfa && !backref)
578 + goto success_in_beg_and_end;
582 @@ -413,14 +497,11 @@
583 end - beg - 1, &(patterns[i].regs))))
585 len = patterns[i].regs.end[0] - start;
591 + if (exact && !match_words)
592 + goto success_in_start_and_len;
593 if ((!match_lines && !match_words)
594 || (match_lines && len == end - beg - 1))
596 + goto success_in_beg_and_end;
597 /* If -w, check if the match aligns with word boundaries.
598 We do this iteratively because:
599 (a) the line may contain more than one occurence of the
600 @@ -431,10 +512,84 @@
604 - if ((start == 0 || !WCHAR ((unsigned char) beg[start - 1]))
605 - && (len == end - beg - 1
606 - || !WCHAR ((unsigned char) beg[start + len])))
608 + int lword_match = 0;
613 + assert (start > 0);
615 + if (mb_cur_max > 1)
623 + s = beg + start - 1;
625 + && (unsigned char) *s >= 0x80
626 + && (unsigned char) *s <= 0xbf)
631 + mr = mbtowc (&pwc, s, beg + start - s);
634 + memset (&mbs, '\0', sizeof (mbstate_t));
637 + else if (!(iswalnum (pwc) || pwc == L'_')
638 + && mr == (int) (beg + start - s))
642 +#endif /* MBS_SUPPORT */
643 + if (!WCHAR ((unsigned char) beg[start - 1]))
649 + int rword_match = 0;
650 + if (start + len == end - beg - 1)
655 + if (mb_cur_max > 1)
660 + mr = mbtowc (&nwc, beg + start + len,
661 + end - beg - start - len - 1);
664 + memset (&mbs, '\0', sizeof (mbstate_t));
667 + else if (!iswalnum (nwc) && nwc != L'_')
671 +#endif /* MBS_SUPPORT */
672 + if (!WCHAR ((unsigned char) beg[start + len]))
679 + /* Returns the whole line. */
680 + goto success_in_beg_and_end;
682 + /* Returns just this word match. */
683 + goto success_in_start_and_len;
688 /* Try a shorter length anchored at the same place. */
689 @@ -461,26 +616,154 @@
691 } /* for Regex patterns. */
692 } /* for (beg = end ..) */
694 - if (MB_CUR_MAX > 1 && mb_properties)
695 - free (mb_properties);
696 -#endif /* MBS_SUPPORT */
703 - if (MB_CUR_MAX > 1 && mb_properties)
704 - free (mb_properties);
705 -#endif /* MBS_SUPPORT */
706 - *match_size = end - beg;
708 + success_in_beg_and_end:
713 + success_in_start_and_len:
719 +static int f_i_multibyte; /* whether we're using the new -Fi MB method */
722 + wchar_t **patterns;
723 + size_t count, maxlen;
724 + unsigned char *match;
729 Fcompile (char const *pattern, size_t size)
731 + int mb_cur_max = MB_CUR_MAX;
732 char const *beg, *lim, *err;
736 + /* Support -F -i for UTF-8 input. */
737 + if (match_icase && mb_cur_max > 1)
740 + wchar_t *wcpattern = xmalloc ((size + 1) * sizeof (wchar_t));
741 + const char *patternend = pattern;
743 + kwset_t fimb_kwset = NULL;
744 + char *starts = NULL;
745 + wchar_t *wcbeg, *wclim;
746 + size_t allocated = 0;
748 + memset (&mbs, '\0', sizeof (mbs));
749 +# ifdef __GNU_LIBRARY__
750 + wcsize = mbsnrtowcs (wcpattern, &patternend, size, size, &mbs);
751 + if (patternend != pattern + size)
752 + wcsize = (size_t) -1;
755 + char *patterncopy = xmalloc (size + 1);
757 + memcpy (patterncopy, pattern, size);
758 + patterncopy[size] = '\0';
759 + patternend = patterncopy;
760 + wcsize = mbsrtowcs (wcpattern, &patternend, size, &mbs);
761 + if (patternend != patterncopy + size)
762 + wcsize = (size_t) -1;
763 + free (patterncopy);
766 + if (wcsize + 2 <= 2)
772 + kwsfree (fimb_kwset);
773 + free (Fimb.patterns);
774 + Fimb.patterns = NULL;
778 + if (!(fimb_kwset = kwsalloc (NULL)))
779 + error (2, 0, _("memory exhausted"));
781 + starts = xmalloc (mb_cur_max * 3);
788 + if (Fimb.count >= allocated)
790 + if (allocated == 0)
794 + Fimb.patterns = xrealloc (Fimb.patterns,
795 + sizeof (wchar_t *) * allocated);
797 + Fimb.patterns[Fimb.count++] = wcbeg;
798 + for (wclim = wcbeg;
799 + wclim < wcpattern + wcsize && *wclim != L'\n'; ++wclim)
800 + *wclim = towlower (*wclim);
802 + wclen = wclim - wcbeg;
803 + if (wclen > Fimb.maxlen)
804 + Fimb.maxlen = wclen;
809 + if ((err = kwsincr (fimb_kwset, "", 0)) != 0)
813 + for (i = 0; i < (1 << wclen); i++)
818 + for (j = 0; j < wclen; ++j)
820 + wchar_t wc = wcbeg[j];
823 + wc = towupper (wc);
824 + if (wc == wcbeg[j])
827 + k = wctomb (p, wc);
832 + if ((err = kwsincr (fimb_kwset, starts, p - starts)) != 0)
835 + if (wclim < wcpattern + wcsize)
839 + while (wcbeg < wcpattern + wcsize);
841 + kwset = fimb_kwset;
843 + Fimb.match = xmalloc (Fimb.count);
844 + if ((err = kwsprep (kwset)) != 0)
849 +#endif /* MBS_SUPPORT */
861 +Fimbexec (const char *buf, size_t size, size_t *plen, int exact)
863 + size_t len, letter, i;
869 + assert (match_icase && f_i_multibyte == 1);
870 + assert (MB_CUR_MAX > 1);
872 + memset (&mbs, '\0', sizeof (mbs));
873 + memset (Fimb.match, '\1', Fimb.count);
876 + while (patterns_left && len <= size)
883 + c = mbrtowc (&wc, buf + len, size - len, &mbs);
887 + wc = towlower (wc);
895 + for (i = 0; i < Fimb.count; i++)
899 + if (Fimb.patterns[i][letter] == L'\0')
901 + /* Found a match. */
903 + if (!exact && !match_words)
907 + /* For -w or exact look for longest match. */
909 + Fimb.match[i] = '\0';
914 + if (Fimb.patterns[i][letter] == wc)
917 + Fimb.match[i] = '\0';
927 +#endif /* MBS_SUPPORT */
930 Fexecute (char const *buf, size_t size, size_t *match_size, int exact)
932 @@ -506,88 +859,268 @@
935 struct kwsmatch kwsmatch;
938 - char *mb_properties;
939 - if (MB_CUR_MAX > 1)
940 - mb_properties = check_multibyte_string (buf, size);
941 + int mb_cur_max = MB_CUR_MAX;
943 + memset (&mbs, '\0', sizeof (mbstate_t));
944 + const char *last_char = NULL;
945 #endif /* MBS_SUPPORT */
947 for (beg = buf; beg <= buf + size; ++beg)
949 - size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch);
951 + offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch);
953 if (offset == (size_t) -1)
957 - if (MB_CUR_MAX > 1)
958 - free(mb_properties);
959 -#endif /* MBS_SUPPORT */
961 + if (mb_cur_max > 1 && !using_utf8)
963 + size_t bytes_left = offset;
966 + size_t mlen = mbrlen (beg, bytes_left, &mbs);
969 + if (mlen == (size_t) -1 || mlen == 0)
971 + /* Incomplete character: treat as single-byte. */
972 + memset (&mbs, '\0', sizeof (mbstate_t));
978 + if (mlen == (size_t) -2)
979 + /* Offset points inside multibyte character: no good. */
983 + bytes_left -= mlen;
990 - if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0)
991 - continue; /* It is a part of multibyte character. */
993 #endif /* MBS_SUPPORT */
995 - len = kwsmatch.size[0];
1000 - if (MB_CUR_MAX > 1)
1001 - free (mb_properties);
1002 + /* For f_i_multibyte, the string at beg now matches first 3 chars of
1003 + one of the search strings (less if there are shorter search strings).
1004 + See if this is a real match. */
1006 + && Fimbexec (beg, buf + size - beg, &kwsmatch.size[0], exact))
1008 #endif /* MBS_SUPPORT */
1011 + len = kwsmatch.size[0];
1012 + if (exact && !match_words)
1013 + goto success_in_beg_and_len;
1016 if (beg > buf && beg[-1] != eol)
1019 if (beg + len < buf + size && beg[len] != eol)
1024 else if (match_words)
1025 - for (try = beg; len; )
1027 - if (try > buf && WCHAR((unsigned char) try[-1]))
1029 - if (try + len < buf + size && WCHAR((unsigned char) try[len]))
1031 - offset = kwsexec (kwset, beg, --len, &kwsmatch);
1032 - if (offset == (size_t) -1)
1037 + int word_match = 0;
1041 - if (MB_CUR_MAX > 1)
1042 - free (mb_properties);
1043 + if (mb_cur_max > 1)
1053 + && (unsigned char) *s >= 0x80
1054 + && (unsigned char) *s <= 0xbf)
1059 + mr = mbtowc (&pwc, s, beg - s);
1061 + memset (&mbs, '\0', sizeof (mbstate_t));
1062 + else if ((iswalnum (pwc) || pwc == L'_')
1063 + && mr == (int) (beg - s))
1067 #endif /* MBS_SUPPORT */
1070 - try = beg + offset;
1071 - len = kwsmatch.size[0];
1076 + if (WCHAR ((unsigned char) beg[-1]))
1080 + if (mb_cur_max > 1)
1085 + mr = mbtowc (&nwc, beg + len, buf + size - beg - len);
1088 + memset (&mbs, '\0', sizeof (mbstate_t));
1091 + else if (!iswalnum (nwc) && nwc != L'_')
1095 +#endif /* MBS_SUPPORT */
1096 + if (beg + len >= buf + size || !WCHAR ((unsigned char) beg[len]))
1101 + /* Returns the whole line now we know there's a word match. */
1104 + /* Returns just this word match. */
1105 + goto success_in_beg_and_len;
1109 + /* Try a shorter length anchored at the same place. */
1111 + offset = kwsexec (kwset, beg, len, &kwsmatch);
1114 + goto next_char; /* Try a different anchor. */
1116 + if (mb_cur_max > 1 && !using_utf8)
1118 + size_t bytes_left = offset;
1119 + while (bytes_left)
1121 + size_t mlen = mbrlen (beg, bytes_left, &mbs);
1124 + if (mlen == (size_t) -1 || mlen == 0)
1126 + /* Incomplete character: treat as single-byte. */
1127 + memset (&mbs, '\0', sizeof (mbstate_t));
1133 + if (mlen == (size_t) -2)
1135 + /* Offset points inside multibyte character:
1141 + bytes_left -= mlen;
1146 + memset (&mbs, '\0', sizeof (mbstate_t));
1147 + goto next_char; /* Try a different anchor. */
1151 +#endif /* MBS_SUPPORT */
1154 + /* The string at beg now matches first 3 chars of one of
1155 + the search strings (less if there are shorter search
1156 + strings). See if this is a real match. */
1158 + && Fimbexec (beg, len - offset, &kwsmatch.size[0],
1161 +#endif /* MBS_SUPPORT */
1162 + len = kwsmatch.size[0];
1172 - if (MB_CUR_MAX > 1)
1173 - free (mb_properties);
1174 + /* Advance to next character. For MB_CUR_MAX == 1 case this is handled
1175 + by ++beg above. */
1176 + if (mb_cur_max > 1)
1180 + unsigned char c = *beg;
1185 + else if (c < 0xf0)
1187 + else if (c < 0xf8)
1189 + else if (c < 0xfc)
1191 + else if (c < 0xfe)
1197 + size_t l = mbrlen (beg, buf + size - beg, &mbs);
1203 + memset (&mbs, '\0', sizeof (mbstate_t));
1206 #endif /* MBS_SUPPORT */
1214 + if (mb_cur_max > 1 && !using_utf8)
1217 + while (end < buf + size)
1219 + size_t mlen = mbrlen (end, buf + size - end, &mbs);
1220 + if (mlen == (size_t) -1 || mlen == (size_t) -2 || mlen == 0)
1222 + memset (&mbs, '\0', sizeof (mbstate_t));
1225 + if (mlen == 1 && *end == eol)
1232 +#endif /* MBS_SUPPORT */
1233 end = memchr (beg + len, eol, (buf + size) - (beg + len));
1236 while (buf < beg && beg[-1] != eol)
1238 - *match_size = end - beg;
1240 - if (MB_CUR_MAX > 1)
1241 - free (mb_properties);
1242 -#endif /* MBS_SUPPORT */
1246 + success_in_beg_and_len:
1247 + *match_size = len;
1251 diff -urN grep-2.5.1a.orig/src/search.c.orig grep-2.5.1a/src/search.c.orig
1252 --- grep-2.5.1a.orig/src/search.c.orig 1970-01-01 05:00:00.000000000 +0500
1253 +++ grep-2.5.1a/src/search.c.orig 2005-10-23 09:48:39.000000000 +0600
1255 +/* search.c - searching subroutines using dfa, kwset and regex for grep.
1256 + Copyright 1992, 1998, 2000 Free Software Foundation, Inc.
1258 + This program is free software; you can redistribute it and/or modify
1259 + it under the terms of the GNU General Public License as published by
1260 + the Free Software Foundation; either version 2, or (at your option)
1261 + any later version.
1263 + This program is distributed in the hope that it will be useful,
1264 + but WITHOUT ANY WARRANTY; without even the implied warranty of
1265 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1266 + GNU General Public License for more details.
1268 + You should have received a copy of the GNU General Public License
1269 + along with this program; if not, write to the Free Software
1270 + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
1271 + 02111-1307, USA. */
1273 +/* Written August 1992 by Mike Haertel. */
1275 +#ifdef HAVE_CONFIG_H
1276 +# include <config.h>
1278 +#include <sys/types.h>
1279 +#if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC
1280 +/* We can handle multibyte string. */
1281 +# define MBS_SUPPORT
1282 +# include <wchar.h>
1283 +# include <wctype.h>
1286 +#include "system.h"
1292 +#include "xalloc.h"
1293 +#ifdef HAVE_LIBPCRE
1297 +#define NCHAR (UCHAR_MAX + 1)
1299 +/* For -w, we also consider _ to be word constituent. */
1300 +#define WCHAR(C) (ISALNUM(C) || (C) == '_')
1302 +/* DFA compiled regexp. */
1303 +static struct dfa dfa;
1305 +/* The Regex compiled patterns. */
1306 +static struct patterns
1308 + /* Regex compiled regexp. */
1309 + struct re_pattern_buffer regexbuf;
1310 + struct re_registers regs; /* This is here on account of a BRAIN-DEAD
1311 + Q@#%!# library interface in regex.c. */
1314 +struct patterns *patterns;
1317 +/* KWset compiled pattern. For Ecompile and Gcompile, we compile
1318 + a list of strings, at least one of which is known to occur in
1319 + any string matching the regexp. */
1320 +static kwset_t kwset;
1322 +/* Number of compiled fixed strings known to exactly match the regexp.
1323 + If kwsexec returns < kwset_exact_matches, then we don't need to
1324 + call the regexp matcher at all. */
1325 +static int kwset_exact_matches;
1327 +#if defined(MBS_SUPPORT)
1328 +static char* check_multibyte_string PARAMS ((char const *buf, size_t size));
1330 +static void kwsinit PARAMS ((void));
1331 +static void kwsmusts PARAMS ((void));
1332 +static void Gcompile PARAMS ((char const *, size_t));
1333 +static void Ecompile PARAMS ((char const *, size_t));
1334 +static size_t EGexecute PARAMS ((char const *, size_t, size_t *, int ));
1335 +static void Fcompile PARAMS ((char const *, size_t));
1336 +static size_t Fexecute PARAMS ((char const *, size_t, size_t *, int));
1337 +static void Pcompile PARAMS ((char const *, size_t ));
1338 +static size_t Pexecute PARAMS ((char const *, size_t, size_t *, int));
1341 +dfaerror (char const *mesg)
1343 + error (2, 0, mesg);
1349 + static char trans[NCHAR];
1353 + for (i = 0; i < NCHAR; ++i)
1354 + trans[i] = TOLOWER (i);
1356 + if (!(kwset = kwsalloc (match_icase ? trans : (char *) 0)))
1357 + error (2, 0, _("memory exhausted"));
1360 +/* If the DFA turns out to have some set of fixed strings one of
1361 + which must occur in the match, then we build a kwset matcher
1362 + to find those strings, and thus quickly filter out impossible
1367 + struct dfamust const *dm;
1373 + /* First, we compile in the substrings known to be exact
1374 + matches. The kwset matcher will return the index
1375 + of the matching string that it chooses. */
1376 + for (dm = dfa.musts; dm; dm = dm->next)
1380 + ++kwset_exact_matches;
1381 + if ((err = kwsincr (kwset, dm->must, strlen (dm->must))) != 0)
1382 + error (2, 0, err);
1384 + /* Now, we compile the substrings that will require
1385 + the use of the regexp matcher. */
1386 + for (dm = dfa.musts; dm; dm = dm->next)
1390 + if ((err = kwsincr (kwset, dm->must, strlen (dm->must))) != 0)
1391 + error (2, 0, err);
1393 + if ((err = kwsprep (kwset)) != 0)
1394 + error (2, 0, err);
1399 +/* This function allocate the array which correspond to "buf".
1400 + Then this check multibyte string and mark on the positions which
1401 + are not singlebyte character nor the first byte of a multibyte
1402 + character. Caller must free the array. */
1404 +check_multibyte_string(char const *buf, size_t size)
1406 + char *mb_properties = malloc(size);
1407 + mbstate_t cur_state;
1409 + memset(&cur_state, 0, sizeof(mbstate_t));
1410 + memset(mb_properties, 0, sizeof(char)*size);
1411 + for (i = 0; i < size ;)
1414 + mbclen = mbrlen(buf + i, size - i, &cur_state);
1416 + if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
1418 + /* An invalid sequence, or a truncated multibyte character.
1419 + We treat it as a singlebyte character. */
1422 + mb_properties[i] = mbclen;
1426 + return mb_properties;
1431 +Gcompile (char const *pattern, size_t size)
1435 + size_t total = size;
1436 + char const *motif = pattern;
1438 + re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE);
1439 + dfasyntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase, eolbyte);
1441 + /* For GNU regex compiler we have to pass the patterns separately to detect
1442 + errors like "[\nallo\n]\n". The patterns here are "[", "allo" and "]"
1443 + GNU regex should have raise a syntax error. The same for backref, where
1444 + the backref should have been local to each pattern. */
1448 + sep = memchr (motif, '\n', total);
1451 + len = sep - motif;
1453 + total -= (len + 1);
1461 + patterns = realloc (patterns, (pcount + 1) * sizeof (*patterns));
1462 + if (patterns == NULL)
1463 + error (2, errno, _("memory exhausted"));
1465 + patterns[pcount] = patterns0;
1467 + if ((err = re_compile_pattern (motif, len,
1468 + &(patterns[pcount].regexbuf))) != 0)
1469 + error (2, 0, err);
1473 + } while (sep && total != 0);
1475 + /* In the match_words and match_lines cases, we use a different pattern
1476 + for the DFA matcher that will quickly throw out cases that won't work.
1477 + Then if DFA succeeds we do some hairy stuff using the regex matcher
1478 + to decide whether the match should really count. */
1479 + if (match_words || match_lines)
1481 + /* In the whole-word case, we use the pattern:
1482 + \(^\|[^[:alnum:]_]\)\(userpattern\)\([^[:alnum:]_]|$\).
1483 + In the whole-line case, we use the pattern:
1484 + ^\(userpattern\)$. */
1486 + static char const line_beg[] = "^\\(";
1487 + static char const line_end[] = "\\)$";
1488 + static char const word_beg[] = "\\(^\\|[^[:alnum:]_]\\)\\(";
1489 + static char const word_end[] = "\\)\\([^[:alnum:]_]\\|$\\)";
1490 + char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end);
1492 + strcpy (n, match_lines ? line_beg : word_beg);
1494 + memcpy (n + i, pattern, size);
1496 + strcpy (n + i, match_lines ? line_end : word_end);
1497 + i += strlen (n + i);
1502 + dfacomp (pattern, size, &dfa, 1);
1507 +Ecompile (char const *pattern, size_t size)
1511 + size_t total = size;
1512 + char const *motif = pattern;
1514 + if (strcmp (matcher, "awk") == 0)
1516 + re_set_syntax (RE_SYNTAX_AWK);
1517 + dfasyntax (RE_SYNTAX_AWK, match_icase, eolbyte);
1521 + re_set_syntax (RE_SYNTAX_POSIX_EGREP);
1522 + dfasyntax (RE_SYNTAX_POSIX_EGREP, match_icase, eolbyte);
1525 + /* For GNU regex compiler we have to pass the patterns separately to detect
1526 + errors like "[\nallo\n]\n". The patterns here are "[", "allo" and "]"
1527 + GNU regex should have raise a syntax error. The same for backref, where
1528 + the backref should have been local to each pattern. */
1532 + sep = memchr (motif, '\n', total);
1535 + len = sep - motif;
1537 + total -= (len + 1);
1545 + patterns = realloc (patterns, (pcount + 1) * sizeof (*patterns));
1546 + if (patterns == NULL)
1547 + error (2, errno, _("memory exhausted"));
1548 + patterns[pcount] = patterns0;
1550 + if ((err = re_compile_pattern (motif, len,
1551 + &(patterns[pcount].regexbuf))) != 0)
1552 + error (2, 0, err);
1556 + } while (sep && total != 0);
1558 + /* In the match_words and match_lines cases, we use a different pattern
1559 + for the DFA matcher that will quickly throw out cases that won't work.
1560 + Then if DFA succeeds we do some hairy stuff using the regex matcher
1561 + to decide whether the match should really count. */
1562 + if (match_words || match_lines)
1564 + /* In the whole-word case, we use the pattern:
1565 + (^|[^[:alnum:]_])(userpattern)([^[:alnum:]_]|$).
1566 + In the whole-line case, we use the pattern:
1567 + ^(userpattern)$. */
1569 + static char const line_beg[] = "^(";
1570 + static char const line_end[] = ")$";
1571 + static char const word_beg[] = "(^|[^[:alnum:]_])(";
1572 + static char const word_end[] = ")([^[:alnum:]_]|$)";
1573 + char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end);
1575 + strcpy (n, match_lines ? line_beg : word_beg);
1577 + memcpy (n + i, pattern, size);
1579 + strcpy (n + i, match_lines ? line_end : word_end);
1580 + i += strlen (n + i);
1585 + dfacomp (pattern, size, &dfa, 1);
1590 +EGexecute (char const *buf, size_t size, size_t *match_size, int exact)
1592 + register char const *buflim, *beg, *end;
1593 + char eol = eolbyte;
1594 + int backref, start, len;
1595 + struct kwsmatch kwsm;
1598 + char *mb_properties = NULL;
1599 +#endif /* MBS_SUPPORT */
1602 + if (MB_CUR_MAX > 1 && kwset)
1603 + mb_properties = check_multibyte_string(buf, size);
1604 +#endif /* MBS_SUPPORT */
1606 + buflim = buf + size;
1608 + for (beg = end = buf; end < buflim; beg = end)
1614 + /* Find a possible match using the KWset matcher. */
1615 + size_t offset = kwsexec (kwset, beg, buflim - beg, &kwsm);
1616 + if (offset == (size_t) -1)
1619 + /* Narrow down to the line containing the candidate, and
1620 + run it through DFA. */
1621 + end = memchr(beg, eol, buflim - beg);
1624 + if (MB_CUR_MAX > 1 && mb_properties[beg - buf] == 0)
1627 + while (beg > buf && beg[-1] != eol)
1629 + if (kwsm.index < kwset_exact_matches)
1630 + goto success_in_beg_and_end;
1631 + if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1)
1636 + /* No good fixed strings; start with DFA. */
1637 + size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref);
1638 + if (offset == (size_t) -1)
1640 + /* Narrow down to the line we've found. */
1642 + end = memchr (beg, eol, buflim - beg);
1644 + while (beg > buf && beg[-1] != eol)
1647 + /* Successful, no backreferences encountered! */
1649 + goto success_in_beg_and_end;
1654 + /* If we've made it to this point, this means DFA has seen
1655 + a probable match, and we need to run it through Regex. */
1656 + for (i = 0; i < pcount; i++)
1658 + patterns[i].regexbuf.not_eol = 0;
1659 + if (0 <= (start = re_search (&(patterns[i].regexbuf), beg,
1661 + end - beg - 1, &(patterns[i].regs))))
1663 + len = patterns[i].regs.end[0] - start;
1664 + if (exact && !match_words)
1665 + goto success_in_start_and_len;
1666 + if ((!match_lines && !match_words)
1667 + || (match_lines && len == end - beg - 1))
1668 + goto success_in_beg_and_end;
1669 + /* If -w, check if the match aligns with word boundaries.
1670 + We do this iteratively because:
1671 + (a) the line may contain more than one occurence of the
1673 + (b) Several alternatives in the pattern might be valid at a
1674 + given point, and we may need to consider a shorter one to
1675 + find a word boundary. */
1677 + while (start >= 0)
1679 + if ((start == 0 || !WCHAR ((unsigned char) beg[start - 1]))
1680 + && (len == end - beg - 1
1681 + || !WCHAR ((unsigned char) beg[start + len])))
1682 + goto success_in_beg_and_end;
1685 + /* Try a shorter length anchored at the same place. */
1687 + patterns[i].regexbuf.not_eol = 1;
1688 + len = re_match (&(patterns[i].regexbuf), beg,
1689 + start + len, start,
1690 + &(patterns[i].regs));
1694 + /* Try looking further on. */
1695 + if (start == end - beg - 1)
1698 + patterns[i].regexbuf.not_eol = 0;
1699 + start = re_search (&(patterns[i].regexbuf), beg,
1701 + start, end - beg - 1 - start,
1702 + &(patterns[i].regs));
1703 + len = patterns[i].regs.end[0] - start;
1707 + } /* for Regex patterns. */
1708 + } /* for (beg = end ..) */
1712 + if (MB_CUR_MAX > 1 && mb_properties)
1713 + free (mb_properties);
1714 +#endif /* MBS_SUPPORT */
1715 + return (size_t) -1;
1717 + success_in_beg_and_end:
1719 + start = beg - buf;
1722 + success_in_start_and_len:
1724 + if (MB_CUR_MAX > 1 && mb_properties)
1725 + free (mb_properties);
1726 +#endif /* MBS_SUPPORT */
1727 + *match_size = len;
1732 +Fcompile (char const *pattern, size_t size)
1734 + char const *beg, *lim, *err;
1740 + for (lim = beg; lim < pattern + size && *lim != '\n'; ++lim)
1742 + if ((err = kwsincr (kwset, beg, lim - beg)) != 0)
1743 + error (2, 0, err);
1744 + if (lim < pattern + size)
1748 + while (beg < pattern + size);
1750 + if ((err = kwsprep (kwset)) != 0)
1751 + error (2, 0, err);
1755 +Fexecute (char const *buf, size_t size, size_t *match_size, int exact)
1757 + register char const *beg, *try, *end;
1758 + register size_t len;
1759 + char eol = eolbyte;
1760 + struct kwsmatch kwsmatch;
1762 + char *mb_properties;
1763 + if (MB_CUR_MAX > 1)
1764 + mb_properties = check_multibyte_string (buf, size);
1765 +#endif /* MBS_SUPPORT */
1767 + for (beg = buf; beg <= buf + size; ++beg)
1769 + size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch);
1770 + if (offset == (size_t) -1)
1773 + if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0)
1774 + continue; /* It is a part of multibyte character. */
1775 +#endif /* MBS_SUPPORT */
1777 + len = kwsmatch.size[0];
1778 + if (exact && !match_words)
1779 + goto success_in_beg_and_len;
1782 + if (beg > buf && beg[-1] != eol)
1784 + if (beg + len < buf + size && beg[len] != eol)
1788 + else if (match_words)
1789 + for (try = beg; len; )
1791 + if (try > buf && WCHAR((unsigned char) try[-1]))
1793 + if (try + len < buf + size && WCHAR((unsigned char) try[len]))
1795 + offset = kwsexec (kwset, beg, --len, &kwsmatch);
1796 + if (offset == (size_t) -1)
1799 + if (MB_CUR_MAX > 1)
1800 + free (mb_properties);
1801 +#endif /* MBS_SUPPORT */
1804 + try = beg + offset;
1805 + len = kwsmatch.size[0];
1816 + if (MB_CUR_MAX > 1)
1817 + free (mb_properties);
1818 +#endif /* MBS_SUPPORT */
1822 + end = memchr (beg + len, eol, (buf + size) - (beg + len));
1824 + while (buf < beg && beg[-1] != eol)
1829 + success_in_beg_and_len:
1830 + *match_size = len;
1832 + if (MB_CUR_MAX > 1)
1833 + free (mb_properties);
1834 +#endif /* MBS_SUPPORT */
1839 +/* Compiled internal form of a Perl regular expression. */
1842 +/* Additional information about the pattern. */
1843 +static pcre_extra *extra;
1847 +Pcompile (char const *pattern, size_t size)
1850 + error (2, 0, _("The -P option is not supported"));
1854 + char *re = xmalloc (4 * size + 7);
1855 + int flags = PCRE_MULTILINE | (match_icase ? PCRE_CASELESS : 0);
1856 + char const *patlim = pattern + size;
1861 + /* FIXME: Remove this restriction. */
1862 + if (eolbyte != '\n')
1863 + error (2, 0, _("The -P and -z options cannot be combined"));
1869 + strcpy (n, "\\b(");
1872 + /* The PCRE interface doesn't allow NUL bytes in the pattern, so
1873 + replace each NUL byte in the pattern with the four characters
1874 + "\000", removing a preceding backslash if there are an odd
1875 + number of backslashes before the NUL.
1877 + FIXME: This method does not work with some multibyte character
1878 + encodings, notably Shift-JIS, where a multibyte character can end
1879 + in a backslash byte. */
1880 + for (p = pattern; (pnul = memchr (p, '\0', patlim - p)); p = pnul + 1)
1882 + memcpy (n, p, pnul - p);
1884 + for (p = pnul; pattern < p && p[-1] == '\\'; p--)
1886 + n -= (pnul - p) & 1;
1887 + strcpy (n, "\\000");
1891 + memcpy (n, p, patlim - p);
1895 + strcpy (n, ")\\b");
1899 + cre = pcre_compile (re, flags, &ep, &e, pcre_maketables ());
1903 + extra = pcre_study (cre, 0, &ep);
1912 +Pexecute (char const *buf, size_t size, size_t *match_size, int exact)
1918 + /* This array must have at least two elements; everything after that
1919 + is just for performance improvement in pcre_exec. */
1922 + int e = pcre_exec (cre, extra, buf, size, 0, 0,
1923 + sub, sizeof sub / sizeof *sub);
1929 + case PCRE_ERROR_NOMATCH:
1932 + case PCRE_ERROR_NOMEMORY:
1933 + error (2, 0, _("Memory exhausted"));
1941 + /* Narrow down to the line we've found. */
1942 + char const *beg = buf + sub[0];
1943 + char const *end = buf + sub[1];
1944 + char const *buflim = buf + size;
1945 + char eol = eolbyte;
1948 + end = memchr (end, eol, buflim - end);
1950 + while (buf < beg && beg[-1] != eol)
1954 + *match_size = end - beg;
1960 +struct matcher const matchers[] = {
1961 + { "default", Gcompile, EGexecute },
1962 + { "grep", Gcompile, EGexecute },
1963 + { "egrep", Ecompile, EGexecute },
1964 + { "awk", Ecompile, EGexecute },
1965 + { "fgrep", Fcompile, Fexecute },
1966 + { "perl", Pcompile, Pexecute },
1969 diff -urN grep-2.5.1a.orig/tests/fmbtest.sh grep-2.5.1a/tests/fmbtest.sh
1970 --- grep-2.5.1a.orig/tests/fmbtest.sh 1970-01-01 05:00:00.000000000 +0500
1971 +++ grep-2.5.1a/tests/fmbtest.sh 2005-10-23 09:51:12.000000000 +0600
1977 +# If cs_CZ.UTF-8 locale doesn't work, skip this test silently
1978 +LC_ALL=cs_CZ.UTF-8 locale -k LC_CTYPE 2>/dev/null | ${GREP} -q charmap.*UTF-8 \
1983 +cat > csinput <<EOF
1986 +03 Z číší Čiší cosi
1990 +07 ČČČ ČČČČíšČÍŠčíšEEEE
2005 +cat > cspatfile <<EOF
2010 +for mode in F G E; do
2012 +test1="$(echo `LC_ALL=cs_CZ.UTF-8 ${GREP} -${mode} -f cspatfile csinput \
2013 + | LC_ALL=C sed 's/^.*\([0-9][0-9]\).*$/\1/'`)"
2014 +if test "$test1" != "11 12 13 14 15 16 17 18"; then
2015 + echo "Test #1 ${mode} failed: $test1"
2019 +test2="$(echo `LC_ALL=cs_CZ.UTF-8 ${GREP} -${mode}i -f cspatfile csinput \
2020 + | LC_ALL=C sed 's/^.*\([0-9][0-9]\).*$/\1/'`)"
2021 +if test "$test2" != "01 02 07 08 10 11 12 13 14 15 16 17 18 19 20"; then
2022 + echo "Test #2 ${mode} failed: $test2"
2026 +test3="$(echo `LC_ALL=cs_CZ.UTF-8 ${GREP} -${mode}i -e 'ČÍšE' -e 'Čas' csinput \
2027 + | LC_ALL=C sed 's/^.*\([0-9][0-9]\).*$/\1/'`)"
2028 +if test "$test3" != "01 02 07 08 10 11 12 13 14 15 16 17 18 19 20"; then
2029 + echo "Test #3 ${mode} failed: $test3"
2033 +test4="$(echo `LC_ALL=cs_CZ.UTF-8 ${GREP} -${mode}iw -f cspatfile csinput \
2034 + | LC_ALL=C sed 's/^.*\([0-9][0-9]\).*$/\1/'`)"
2035 +if test "$test4" != "01 02 08 13 17 19"; then
2036 + echo "Test #4 ${mode} failed: $test4"
2042 +# Test that -F --color=always prefers longer matches.
2043 +test5="`echo 'Cosi tu ČišÍ...' \
2044 + | LC_ALL=cs_CZ.UTF-8 ${GREP} --color=always -Fi -e 'čiš' -e 'čiší'`"
2045 +if echo "$test5" | LC_ALL=C ${GREP} -q 'Cosi tu .*\[.*mČišÍ.*\[.*m\(.\[K\)\?\.\.\.'; then
2048 + echo "Test #5 F failed: $test5"
2052 +for mode in G E; do
2054 +# Test that -{G,E} --color=always prefers earlier pattern matches.
2055 +test6="`echo 'Cosi tu ČišÍ...' \
2056 + | LC_ALL=cs_CZ.UTF-8 ${GREP} --color=always -${mode}i -e 'čiš' -e 'čiší'`"
2057 +if echo "$test6" | LC_ALL=C ${GREP} -q 'Cosi tu .*\[.*mČiš.*\[.*m\(.\[K\)\?Í\.\.\.'; then
2060 + echo "Test #6 ${mode} failed: $test6"
2064 +# Test that -{G,E} --color=always prefers earlier pattern matches.
2065 +test7="`echo 'Cosi tu ČišÍ...' \
2066 + | LC_ALL=cs_CZ.UTF-8 ${GREP} --color=always -${mode}i -e 'čiší' -e 'čiš'`"
2067 +if echo "$test7" | LC_ALL=C ${GREP} -q 'Cosi tu .*\[.*mČišÍ.*\[.*m\(.\[K\)\?\.\.\.'; then
2070 + echo "Test #7 ${mode} failed: $test7"
2074 +test8="$(echo `LC_ALL=cs_CZ.UTF-8 ${GREP} -${mode}i -e 'Č.šE' -e 'Č[a-f]s' csinput \
2075 + | LC_ALL=C sed 's/^.*\([0-9][0-9]\).*$/\1/'`)"
2076 +if test "$test8" != "01 02 07 08 10 11 12 13 14 15 16 17 18 19 20"; then
2077 + echo "Test #8 ${mode} failed: $test8"
2084 diff -urN grep-2.5.1a.orig/tests/Makefile.am grep-2.5.1a/tests/Makefile.am
2085 --- grep-2.5.1a.orig/tests/Makefile.am 2001-03-07 09:11:27.000000000 +0500
2086 +++ grep-2.5.1a/tests/Makefile.am 2005-10-23 09:51:12.000000000 +0600
2090 TESTS = warning.sh khadafy.sh spencer1.sh bre.sh ere.sh \
2091 - status.sh empty.sh options.sh backref.sh file.sh
2092 + status.sh empty.sh options.sh backref.sh file.sh \
2094 EXTRA_DIST = $(TESTS) \
2095 khadafy.lines khadafy.regexp \
2096 spencer1.awk spencer1.tests \
2097 diff -urN grep-2.5.1a.orig/tests/Makefile.in grep-2.5.1a/tests/Makefile.in
2098 --- grep-2.5.1a.orig/tests/Makefile.in 2002-03-26 21:09:36.000000000 +0500
2099 +++ grep-2.5.1a/tests/Makefile.in 2005-10-23 09:51:13.000000000 +0600
2103 TESTS = warning.sh khadafy.sh spencer1.sh bre.sh ere.sh \
2104 - status.sh empty.sh options.sh backref.sh file.sh
2105 + status.sh empty.sh options.sh backref.sh file.sh \
2108 EXTRA_DIST = $(TESTS) \
2109 khadafy.lines khadafy.regexp \