From: Lennart Poettering Date: Wed, 12 Dec 2018 12:41:25 +0000 (+0100) Subject: fileio: make read_line() handle various line endings correctly X-Git-Tag: v240~63 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=838894b0c67d89f100aafd500f4d754ebd7cdb29;p=thirdparty%2Fsystemd.git fileio: make read_line() handle various line endings correctly This adds support for windows line endings. More importantly though with this change a newline followed by EOF is considered a single line end. --- diff --git a/src/basic/fileio.c b/src/basic/fileio.c index 11972a62138..83f1f508dd4 100644 --- a/src/basic/fileio.c +++ b/src/basic/fileio.c @@ -694,18 +694,48 @@ int read_nul_string(FILE *f, char **ret) { return 0; } +/* A bitmask of the EOL markers we know */ +typedef enum EndOfLineMarker { + EOL_NONE = 0, + EOL_ZERO = 1 << 0, /* \0 (aka NUL) */ + EOL_TEN = 1 << 1, /* \n (aka NL, aka LF) */ + EOL_THIRTEEN = 1 << 2, /* \r (aka CR) */ +} EndOfLineMarker; + +static EndOfLineMarker categorize_eol(char c) { + if (c == '\n') + return EOL_TEN; + if (c == '\r') + return EOL_THIRTEEN; + if (c == '\0') + return EOL_ZERO; + + return EOL_NONE; +} + DEFINE_TRIVIAL_CLEANUP_FUNC(FILE*, funlockfile); int read_line(FILE *f, size_t limit, char **ret) { - _cleanup_free_ char *buffer = NULL; size_t n = 0, allocated = 0, count = 0; + _cleanup_free_ char *buffer = NULL; assert(f); /* Something like a bounded version of getline(). * - * Considers EOF, \n and \0 end of line delimiters, and does not include these delimiters in the string - * returned. + * Considers EOF, \n, \r and \0 end of line delimiters (or combinations of these), and does not include these + * delimiters in the string returned. Specifically, recognizes the following combinations of markers as line + * endings: + * + * • \n (UNIX) + * • \r (old MacOS) + * • \0 (C strings) + * • \n\0 + * • \r\0 + * • \r\n (Windows) + * • \n\r + * • \r\n\0 + * • \n\r\0 * * Returns the number of bytes read from the files (i.e. including delimiters — this hence usually differs from * the number of characters in the returned string). When EOF is hit, 0 is returned. @@ -722,9 +752,11 @@ int read_line(FILE *f, size_t limit, char **ret) { { _unused_ _cleanup_(funlockfilep) FILE *flocked = f; + EndOfLineMarker previous_eol = EOL_NONE; flockfile(f); for (;;) { + EndOfLineMarker eol; int c; if (n >= limit) @@ -737,13 +769,29 @@ int read_line(FILE *f, size_t limit, char **ret) { if (ferror_unlocked(f) && n == 0) return errno > 0 ? -errno : -EIO; + /* EOF is line ending too. */ break; } count++; - if (IN_SET(c, '\n', 0)) /* Reached a delimiter */ + eol = categorize_eol(c); + + if (FLAGS_SET(previous_eol, EOL_ZERO) || + (eol == EOL_NONE && previous_eol != EOL_NONE) || + (eol != EOL_NONE && (previous_eol & eol) != 0)) { + /* Previous char was a NUL? This is not an EOL, but the previous char was? This type of + * EOL marker has been seen right before? In either of these three cases we are + * done. But first, let's put this character back in the queue. */ + assert_se(ungetc(c, f) != EOF); + count--; break; + } + + if (eol != EOL_NONE) { + previous_eol |= eol; + continue; + } if (ret) { if (!GREEDY_REALLOC(buffer, allocated, n + 2)) diff --git a/src/test/test-fileio.c b/src/test/test-fileio.c index 44047f2e5a2..53c791814ef 100644 --- a/src/test/test-fileio.c +++ b/src/test/test-fileio.c @@ -612,6 +612,13 @@ static void test_tempfn(void) { static const char buffer[] = "Some test data\n" + "Some weird line\r" + "terminators\r\n" + "and even more\n\r" + "now the same with a NUL\n\0" + "and more\r\0" + "and even more\r\n\0" + "and yet even more\n\r\0" "With newlines, and a NUL byte\0" "\n" "an empty line\n" @@ -624,6 +631,27 @@ static void test_read_line_one_file(FILE *f) { assert_se(read_line(f, (size_t) -1, &line) == 15 && streq(line, "Some test data")); line = mfree(line); + assert_se(read_line(f, (size_t) -1, &line) == 16 && streq(line, "Some weird line")); + line = mfree(line); + + assert_se(read_line(f, (size_t) -1, &line) == 13 && streq(line, "terminators")); + line = mfree(line); + + assert_se(read_line(f, (size_t) -1, &line) == 15 && streq(line, "and even more")); + line = mfree(line); + + assert_se(read_line(f, (size_t) -1, &line) == 25 && streq(line, "now the same with a NUL")); + line = mfree(line); + + assert_se(read_line(f, (size_t) -1, &line) == 10 && streq(line, "and more")); + line = mfree(line); + + assert_se(read_line(f, (size_t) -1, &line) == 16 && streq(line, "and even more")); + line = mfree(line); + + assert_se(read_line(f, (size_t) -1, &line) == 20 && streq(line, "and yet even more")); + line = mfree(line); + assert_se(read_line(f, 1024, &line) == 30 && streq(line, "With newlines, and a NUL byte")); line = mfree(line); @@ -640,10 +668,7 @@ static void test_read_line_one_file(FILE *f) { /* read_line() stopped when it hit the limit, that means when we continue reading we'll read at the first * character after the previous limit. Let's make use of tha to continue our test. */ - assert_se(read_line(f, 1024, &line) == 61 && streq(line, "line that is supposed to be truncated, because it is so long")); - line = mfree(line); - - assert_se(read_line(f, 1024, &line) == 1 && streq(line, "")); + assert_se(read_line(f, 1024, &line) == 62 && streq(line, "line that is supposed to be truncated, because it is so long")); line = mfree(line); assert_se(read_line(f, 1024, &line) == 0 && streq(line, ""));