/* Byte-wise substring search, using the Two-Way algorithm.
- Copyright (C) 2008-2012 Free Software Foundation, Inc.
+ Copyright (C) 2008-2019 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Written by Eric Blake <ebb9@byu.net>, 2008.
/* Before including this file, you need to include <string.h> (and
<config.h> before that, if not part of libc), and define:
- RESULT_TYPE A macro that expands to the return type.
+ RETURN_TYPE A macro that expands to the return type.
AVAILABLE(h, h_l, j, n_l)
A macro that returns nonzero if there are
at least N_L bytes left starting at H[J].
The argument is an 'unsigned char'; the result
must be an 'unsigned char' as well.
- This file undefines the macros documented above, and defines
+ Other macros you may optionally define:
+ RET0_IF_0(a) Documented below at default definition.
+ CHECK_EOL Same.
+
+ This file undefines the macros listed above, and defines
LONG_NEEDLE_THRESHOLD.
*/
# define CMP_FUNC memcmp
#endif
-#ifndef AVAILABLE1
-# define AVAILABLE1(h, h_l, j, n_l) AVAILABLE (h, h_l, j, n_l)
-#endif
-#ifndef AVAILABLE2
-# define AVAILABLE2(h, h_l, j, n_l) (1)
+/* Check for end-of-line in strstr and strcasestr routines.
+ We piggy-back matching procedure for detecting EOL where possible,
+ and use AVAILABLE macro otherwise. */
+#ifndef CHECK_EOL
+# define CHECK_EOL (0)
#endif
+
+/* Return NULL if argument is '\0'. */
#ifndef RET0_IF_0
# define RET0_IF_0(a) /* nothing */
#endif
j = 0;
while (AVAILABLE (haystack, haystack_len, j, needle_len))
{
+ const unsigned char *pneedle;
+ const unsigned char *phaystack;
+
/* Scan for matches in right half. */
i = MAX (suffix, memory);
- while (i < needle_len && (CANON_ELEMENT (needle[i])
- == CANON_ELEMENT (haystack[i + j])))
+ pneedle = &needle[i];
+ phaystack = &haystack[i + j];
+ while (i < needle_len && (CANON_ELEMENT (*pneedle++)
+ == CANON_ELEMENT (*phaystack++)))
++i;
if (needle_len <= i)
{
/* Scan for matches in left half. */
i = suffix - 1;
- while (memory < i + 1 && (CANON_ELEMENT (needle[i])
- == CANON_ELEMENT (haystack[i + j])))
+ pneedle = &needle[i];
+ phaystack = &haystack[i + j];
+ while (memory < i + 1 && (CANON_ELEMENT (*pneedle--)
+ == CANON_ELEMENT (*phaystack--)))
--i;
if (i + 1 < memory + 1)
return (RETURN_TYPE) (haystack + j);
}
else
{
+ const unsigned char *phaystack;
/* The comparison always starts from needle[suffix], so cache it
and use an optimized first-character loop. */
unsigned char needle_suffix = CANON_ELEMENT (needle[suffix]);
required, and any mismatch results in a maximal shift. */
period = MAX (suffix, needle_len - suffix) + 1;
j = 0;
- while (AVAILABLE1 (haystack, haystack_len, j, needle_len))
+ while (AVAILABLE (haystack, haystack_len, j, needle_len))
{
unsigned char haystack_char;
+ const unsigned char *pneedle;
+
+ phaystack = &haystack[suffix + j];
- /* TODO: The first-character loop can be sped up by adapting
- longword-at-a-time implementation of memchr/strchr. */
- if (needle_suffix
- != (haystack_char = CANON_ELEMENT (haystack[suffix + j])))
+#ifdef FASTSEARCH
+ if (*phaystack++ != needle_suffix)
+ {
+ phaystack = FASTSEARCH (phaystack, needle_suffix,
+ haystack_len - needle_len - j);
+ if (phaystack == NULL)
+ goto ret0;
+ j = phaystack - &haystack[suffix];
+ phaystack++;
+ }
+#else
+ while (needle_suffix
+ != (haystack_char = CANON_ELEMENT (*phaystack++)))
{
RET0_IF_0 (haystack_char);
+# if !CHECK_EOL
++j;
- continue;
+ if (!AVAILABLE (haystack, haystack_len, j, needle_len))
+ goto ret0;
+# endif
}
+# if CHECK_EOL
+ /* Calculate J if it wasn't kept up-to-date in the first-character
+ loop. */
+ j = phaystack - &haystack[suffix] - 1;
+# endif
+#endif
/* Scan for matches in right half. */
i = suffix + 1;
+ pneedle = &needle[i];
while (i < needle_len)
{
- if (CANON_ELEMENT (needle[i])
- != (haystack_char = CANON_ELEMENT (haystack[i + j])))
+ if (CANON_ELEMENT (*pneedle++)
+ != (haystack_char = CANON_ELEMENT (*phaystack++)))
{
RET0_IF_0 (haystack_char);
break;
}
++i;
}
+#if CHECK_EOL
+ /* Update minimal length of haystack. */
+ if (phaystack > haystack + haystack_len)
+ haystack_len = phaystack - haystack;
+#endif
if (needle_len <= i)
{
/* Scan for matches in left half. */
i = suffix - 1;
+ pneedle = &needle[i];
+ phaystack = &haystack[i + j];
while (i != SIZE_MAX)
{
- if (CANON_ELEMENT (needle[i])
- != (haystack_char = CANON_ELEMENT (haystack[i + j])))
+ if (CANON_ELEMENT (*pneedle--)
+ != (haystack_char = CANON_ELEMENT (*phaystack--)))
{
RET0_IF_0 (haystack_char);
break;
}
else
j += i - suffix + 1;
-
- if (!AVAILABLE2 (haystack, haystack_len, j, needle_len))
- break;
}
}
ret0: __attribute__ ((unused))
j = 0;
while (AVAILABLE (haystack, haystack_len, j, needle_len))
{
+ const unsigned char *pneedle;
+ const unsigned char *phaystack;
+
/* Check the last byte first; if it does not match, then
shift to the next possible match location. */
shift = shift_table[CANON_ELEMENT (haystack[j + needle_len - 1])];
/* Scan for matches in right half. The last byte has
already been matched, by virtue of the shift table. */
i = MAX (suffix, memory);
- while (i < needle_len - 1 && (CANON_ELEMENT (needle[i])
- == CANON_ELEMENT (haystack[i + j])))
+ pneedle = &needle[i];
+ phaystack = &haystack[i + j];
+ while (i < needle_len - 1 && (CANON_ELEMENT (*pneedle++)
+ == CANON_ELEMENT (*phaystack++)))
++i;
if (needle_len - 1 <= i)
{
/* Scan for matches in left half. */
i = suffix - 1;
- while (memory < i + 1 && (CANON_ELEMENT (needle[i])
- == CANON_ELEMENT (haystack[i + j])))
+ pneedle = &needle[i];
+ phaystack = &haystack[i + j];
+ while (memory < i + 1 && (CANON_ELEMENT (*pneedle--)
+ == CANON_ELEMENT (*phaystack--)))
--i;
if (i + 1 < memory + 1)
return (RETURN_TYPE) (haystack + j);
j = 0;
while (AVAILABLE (haystack, haystack_len, j, needle_len))
{
+ const unsigned char *pneedle;
+ const unsigned char *phaystack;
+
/* Check the last byte first; if it does not match, then
shift to the next possible match location. */
shift = shift_table[CANON_ELEMENT (haystack[j + needle_len - 1])];
/* Scan for matches in right half. The last byte has
already been matched, by virtue of the shift table. */
i = suffix;
- while (i < needle_len - 1 && (CANON_ELEMENT (needle[i])
- == CANON_ELEMENT (haystack[i + j])))
+ pneedle = &needle[i];
+ phaystack = &haystack[i + j];
+ while (i < needle_len - 1 && (CANON_ELEMENT (*pneedle++)
+ == CANON_ELEMENT (*phaystack++)))
++i;
if (needle_len - 1 <= i)
{
/* Scan for matches in left half. */
i = suffix - 1;
- while (i != SIZE_MAX && (CANON_ELEMENT (needle[i])
- == CANON_ELEMENT (haystack[i + j])))
+ pneedle = &needle[i];
+ phaystack = &haystack[i + j];
+ while (i != SIZE_MAX && (CANON_ELEMENT (*pneedle--)
+ == CANON_ELEMENT (*phaystack--)))
--i;
if (i == SIZE_MAX)
return (RETURN_TYPE) (haystack + j);
}
#undef AVAILABLE
-#undef AVAILABLE1
-#undef AVAILABLE2
#undef CANON_ELEMENT
#undef CMP_FUNC
#undef RET0_IF_0
#undef RETURN_TYPE
+#undef CHECK_EOL