#endif
}
+/* Return the number of trailing bytes in BUF that could be the initial
+ bytes of a delimiter split across buffers. */
+
+ATTRIBUTE_PURE
+static idx_t
+field_delim_overlap (char const *buf, idx_t len)
+{
+ idx_t overlap = MIN (len, delim_length - 1);
+
+ while (0 < overlap)
+ {
+ if (memcmp (buf + len - overlap, delim_bytes, overlap) == 0)
+ return overlap;
+ overlap--;
+ }
+
+ return 0;
+}
+
/* Byte search for line end or delimiter in BUF,
returning results in CTX. */
idx_t field_len = terminator ? terminator - (chunk + processed)
: n_avail - processed;
+ if (terminator_kind == FIELD_DATA
+ && !search.at_eof
+ && !whitespace_delimited
+ && !field_delim_is_line_delim ())
+ field_len -= field_delim_overlap (chunk + processed, field_len);
+
if (field_len || terminator)
have_pending_line = true;
['mb-delim-8', '-d', "\xff", '-f2', # Note 0xF5-0xFF is efficient
{IN=>"a\xffb\n"}, {OUT=>"b\n"},
{ENV => "LC_ALL=$mb_locale"}],
+ ['mb-delim-9', '-d', "\xc3\xa9", '-f2',
+ {IN=>('a' x ($IO_BUFSIZE - 1)) . "\xc3\xa9b\n"}, {OUT=>"b\n"},
+ {ENV => "LC_ALL=$mb_locale"}],
['mb-w-delim-1', '-w', '-f2', {IN=>"a\xe2\x80\x83b\n"}, {OUT=>"b\n"},
{ENV => "LC_ALL=$mb_locale"}],
['mb-w-delim-2', '-sw', '-f2', {IN=>"a\xc2\xa0b\n"}, {OUT=>""},