From: Zbigniew Jędrzejewski-Szmek Date: Wed, 15 May 2019 09:20:26 +0000 (+0200) Subject: Rework cmdline printing to use unicode X-Git-Tag: v243-rc1~383^2~6 X-Git-Url: http://git.ipfire.org/?p=thirdparty%2Fsystemd.git;a=commitdiff_plain;h=bc28751ed2372065fd6f6f7ba6137edf21dee88b Rework cmdline printing to use unicode The functions to retrieve and print process cmdlines were based on the assumption that they contain printable ASCII, and everything else should be filtered out. That assumption doesn't hold in today's world, where people are free to use unicode everywhere. This replaces the custom cmdline reading code with a more generic approach using utf8_escape_non_printable_full(). For kernel threads, truncation is done on the parenthesized name, so we'll get "[worker]", "[worker…]", …, "[w…]", "[…", "…" as we reduce the number of available columns. This implementation is most likely slower for very long cmdlines, but I don't think this is very important. The common case is to have short commandlines, and should print those properly. Absurdly long cmdlines are the exception, which needs to be handled correctly and safely, but speed is not too important. Fixes #12532. v2: - use size_t for the number of columns. This change propagates into various other functions that call get_process_cmdline(), increasing the size of the patch, but the changes are rather trivial. --- diff --git a/src/basic/proc-cmdline.c b/src/basic/proc-cmdline.c index 16700013641..7dca9e60b61 100644 --- a/src/basic/proc-cmdline.c +++ b/src/basic/proc-cmdline.c @@ -34,7 +34,7 @@ int proc_cmdline(char **ret) { } if (detect_container() > 0) - return get_process_cmdline(1, 0, false, ret); + return get_process_cmdline(1, SIZE_MAX, false, ret); else return read_one_line_file("/proc/cmdline", ret); } diff --git a/src/basic/process-util.c b/src/basic/process-util.c index 21af88f5f88..4c05f16db52 100644 --- a/src/basic/process-util.c +++ b/src/basic/process-util.c @@ -44,6 +44,7 @@ #include "string-util.h" #include "terminal-util.h" #include "user-util.h" +#include "utf8.h" static int get_process_state(pid_t pid) { const char *p; @@ -100,22 +101,24 @@ int get_process_comm(pid_t pid, char **ret) { return 0; } -int get_process_cmdline(pid_t pid, size_t max_length, bool comm_fallback, char **line) { +int get_process_cmdline(pid_t pid, size_t max_columns, bool comm_fallback, char **line) { _cleanup_fclose_ FILE *f = NULL; - bool space = false; - char *k; - _cleanup_free_ char *ans = NULL; + _cleanup_free_ char *t = NULL, *ans = NULL; const char *p; - int c, r; + int r; + size_t k; + + /* This is supposed to be a safety guard against runaway command lines. */ + size_t max_length = sc_arg_max(); assert(line); assert(pid >= 0); - /* Retrieves a process' command line. Replaces unprintable characters while doing so by whitespace (coalescing - * multiple sequential ones into one). If max_length is != 0 will return a string of the specified size at most - * (the trailing NUL byte does count towards the length here!), abbreviated with a "..." ellipsis. If - * comm_fallback is true and the process has no command line set (the case for kernel threads), or has a - * command line that resolves to the empty string will return the "comm" name of the process instead. + /* Retrieves a process' command line. Replaces non-utf8 bytes by replacement character (�). If + * max_columns is != -1 will return a string of the specified console width at most, abbreviated with + * an ellipsis. If comm_fallback is true and the process has no command line set (the case for kernel + * threads), or has a command line that resolves to the empty string will return the "comm" name of + * the process instead. This will use at most _SC_ARG_MAX bytes of input data. * * Returns -ESRCH if the process doesn't exist, and -ENOENT if the process has no command line (and * comm_fallback is false). Returns 0 and sets *line otherwise. */ @@ -127,127 +130,54 @@ int get_process_cmdline(pid_t pid, size_t max_length, bool comm_fallback, char * if (r < 0) return r; - if (max_length == 0) - /* This is supposed to be a safety guard against runaway command lines. */ - max_length = sc_arg_max(); - - if (max_length == 1) { + /* We assume that each four-byte character uses one or two columns. If we ever check for combining + * characters, this assumption will need to be adjusted. */ + if ((size_t) 4 * max_columns + 1 < max_columns) + max_length = MIN(max_length, (size_t) 4 * max_columns + 1); - /* If there's only room for one byte, return the empty string */ - ans = new0(char, 1); - if (!ans) - return -ENOMEM; + t = new(char, max_length); + if (!t) + return -ENOMEM; - *line = TAKE_PTR(ans); - return 0; + k = fread(t, 1, max_length, f); + if (k > 0) { + /* Arguments are separated by NULs. Let's replace those with spaces. */ + for (size_t i = 0; i < k - 1; i++) + if (t[i] == '\0') + t[i] = ' '; + t[k] = '\0'; /* Normally, t[k] is already NUL, so this is just a guard in case of short read */ } else { - bool dotdotdot = false; - size_t left; - - ans = new(char, max_length); - if (!ans) - return -ENOMEM; - - k = ans; - left = max_length; - while ((c = getc(f)) != EOF) { - - if (isprint(c)) { - - if (space) { - if (left <= 2) { - dotdotdot = true; - break; - } - - *(k++) = ' '; - left--; - space = false; - } - - if (left <= 1) { - dotdotdot = true; - break; - } - - *(k++) = (char) c; - left--; - } else if (k > ans) - space = true; - } - - if (dotdotdot) { - if (max_length <= 4) { - k = ans; - left = max_length; - } else { - k = ans + max_length - 4; - left = 4; - - /* Eat up final spaces */ - while (k > ans && isspace(k[-1])) { - k--; - left++; - } - } - - strncpy(k, "...", left-1); - k[left-1] = 0; - } else - *k = 0; - } - - /* Kernel threads have no argv[] */ - if (isempty(ans)) { - _cleanup_free_ char *t = NULL; - int h; - - ans = mfree(ans); + /* We only treat getting nothing as an error. We *could* also get an error after reading some + * data, but we ignore that case, as such an error is rather unlikely and we prefer to get + * some data rather than none. */ + if (ferror(f)) + return -errno; if (!comm_fallback) return -ENOENT; - h = get_process_comm(pid, &t); - if (h < 0) - return h; - - size_t l = strlen(t); - - if (l + 3 <= max_length) { - ans = strjoin("[", t, "]"); - if (!ans) - return -ENOMEM; - - } else if (max_length <= 6) { - ans = new(char, max_length); - if (!ans) - return -ENOMEM; - - memcpy(ans, "[...]", max_length-1); - ans[max_length-1] = 0; - } else { - t[max_length - 6] = 0; + /* Kernel threads have no argv[] */ + _cleanup_free_ char *t2 = NULL; - /* Chop off final spaces */ - delete_trailing_chars(t, WHITESPACE); - - ans = strjoin("[", t, "...]"); - if (!ans) - return -ENOMEM; - } + r = get_process_comm(pid, &t2); + if (r < 0) + return r; - *line = TAKE_PTR(ans); - return 0; + mfree(t); + t = strjoin("[", t2, "]"); + if (!t) + return -ENOMEM; } - k = realloc(ans, strlen(ans) + 1); - if (!k) - return -ENOMEM; + delete_trailing_chars(t, WHITESPACE); - ans = NULL; - *line = k; + ans = utf8_escape_non_printable_full(t, max_columns); + if (!ans) + return -ENOMEM; + (void) str_realloc(&ans); + *line = TAKE_PTR(ans); return 0; } diff --git a/src/basic/process-util.h b/src/basic/process-util.h index 83ba93d0d74..dec584606b6 100644 --- a/src/basic/process-util.h +++ b/src/basic/process-util.h @@ -32,7 +32,7 @@ }) int get_process_comm(pid_t pid, char **name); -int get_process_cmdline(pid_t pid, size_t max_length, bool comm_fallback, char **line); +int get_process_cmdline(pid_t pid, size_t max_columns, bool comm_fallback, char **line); int get_process_exe(pid_t pid, char **name); int get_process_uid(pid_t pid, uid_t *uid); int get_process_gid(pid_t pid, gid_t *gid); diff --git a/src/basic/string-util.h b/src/basic/string-util.h index a630856236a..47b17c9d3e7 100644 --- a/src/basic/string-util.h +++ b/src/basic/string-util.h @@ -257,3 +257,16 @@ static inline void *memory_startswith_no_case(const void *p, size_t sz, const ch return (uint8_t*) p + n; } + +static inline char* str_realloc(char **p) { + /* Reallocate *p to actual size */ + + if (!*p) + return NULL; + + char *t = realloc(*p, strlen(*p) + 1); + if (!t) + return NULL; + + return (*p = t); +} diff --git a/src/core/dbus-unit.c b/src/core/dbus-unit.c index 16b4a6b1332..1e996a2f1b0 100644 --- a/src/core/dbus-unit.c +++ b/src/core/dbus-unit.c @@ -901,7 +901,7 @@ static int append_process(sd_bus_message *reply, const char *p, pid_t pid, Set * p = buf; } - (void) get_process_cmdline(pid, 0, true, &cmdline); + (void) get_process_cmdline(pid, SIZE_MAX, true, &cmdline); return sd_bus_message_append(reply, "(sus)", diff --git a/src/coredump/coredump.c b/src/coredump/coredump.c index 9ab704e2076..c0bd03762f1 100644 --- a/src/coredump/coredump.c +++ b/src/coredump/coredump.c @@ -661,7 +661,7 @@ static int get_process_container_parent_cmdline(pid_t pid, char** cmdline) { if (r < 0) return r; - r = get_process_cmdline(container_pid, 0, false, cmdline); + r = get_process_cmdline(container_pid, SIZE_MAX, false, cmdline); if (r < 0) return r; @@ -1154,7 +1154,7 @@ static int gather_pid_metadata( if (sd_pid_get_slice(pid, &t) >= 0) set_iovec_field_free(iovec, n_iovec, "COREDUMP_SLICE=", t); - if (get_process_cmdline(pid, 0, false, &t) >= 0) + if (get_process_cmdline(pid, SIZE_MAX, false, &t) >= 0) set_iovec_field_free(iovec, n_iovec, "COREDUMP_CMDLINE=", t); if (cg_pid_get_path_shifted(pid, NULL, &t) >= 0) diff --git a/src/journal/journald-context.c b/src/journal/journald-context.c index e41e4fc5a72..684ae9aaae0 100644 --- a/src/journal/journald-context.c +++ b/src/journal/journald-context.c @@ -230,7 +230,7 @@ static void client_context_read_basic(ClientContext *c) { if (get_process_exe(c->pid, &t) >= 0) free_and_replace(c->exe, t); - if (get_process_cmdline(c->pid, 0, false, &t) >= 0) + if (get_process_cmdline(c->pid, SIZE_MAX, false, &t) >= 0) free_and_replace(c->cmdline, t); if (get_process_capeff(c->pid, &t) >= 0) diff --git a/src/shared/cgroup-show.c b/src/shared/cgroup-show.c index cac7922ec0e..2bd0e09caee 100644 --- a/src/shared/cgroup-show.c +++ b/src/shared/cgroup-show.c @@ -29,7 +29,7 @@ static void show_pid_array( pid_t pids[], unsigned n_pids, const char *prefix, - unsigned n_columns, + size_t n_columns, bool extra, bool more, OutputFlags flags) { @@ -51,7 +51,7 @@ static void show_pid_array( pid_width = DECIMAL_STR_WIDTH(pids[j]); if (flags & OUTPUT_FULL_WIDTH) - n_columns = 0; + n_columns = SIZE_MAX; else { if (n_columns > pid_width + 3) /* something like "├─1114784 " */ n_columns -= pid_width + 3; @@ -75,7 +75,7 @@ static void show_pid_array( static int show_cgroup_one_by_path( const char *path, const char *prefix, - unsigned n_columns, + size_t n_columns, bool more, OutputFlags flags) { @@ -119,7 +119,7 @@ static int show_cgroup_one_by_path( int show_cgroup_by_path( const char *path, const char *prefix, - unsigned n_columns, + size_t n_columns, OutputFlags flags) { _cleanup_free_ char *fn = NULL, *p1 = NULL, *last = NULL, *p2 = NULL; @@ -199,7 +199,7 @@ int show_cgroup_by_path( int show_cgroup(const char *controller, const char *path, const char *prefix, - unsigned n_columns, + size_t n_columns, OutputFlags flags) { _cleanup_free_ char *p = NULL; int r; @@ -217,7 +217,7 @@ static int show_extra_pids( const char *controller, const char *path, const char *prefix, - unsigned n_columns, + size_t n_columns, const pid_t pids[], unsigned n_pids, OutputFlags flags) { @@ -262,7 +262,7 @@ int show_cgroup_and_extra( const char *controller, const char *path, const char *prefix, - unsigned n_columns, + size_t n_columns, const pid_t extra_pids[], unsigned n_extra_pids, OutputFlags flags) { diff --git a/src/shared/cgroup-show.h b/src/shared/cgroup-show.h index 3593e9dcf43..dfb26f82910 100644 --- a/src/shared/cgroup-show.h +++ b/src/shared/cgroup-show.h @@ -9,10 +9,10 @@ #include "logs-show.h" #include "output-mode.h" -int show_cgroup_by_path(const char *path, const char *prefix, unsigned columns, OutputFlags flags); -int show_cgroup(const char *controller, const char *path, const char *prefix, unsigned columns, OutputFlags flags); +int show_cgroup_by_path(const char *path, const char *prefix, size_t n_columns, OutputFlags flags); +int show_cgroup(const char *controller, const char *path, const char *prefix, size_t n_columns, OutputFlags flags); -int show_cgroup_and_extra(const char *controller, const char *path, const char *prefix, unsigned n_columns, const pid_t extra_pids[], unsigned n_extra_pids, OutputFlags flags); +int show_cgroup_and_extra(const char *controller, const char *path, const char *prefix, size_t n_columns, const pid_t extra_pids[], unsigned n_extra_pids, OutputFlags flags); int show_cgroup_get_unit_path_and_warn( sd_bus *bus, diff --git a/src/test/test-process-util.c b/src/test/test-process-util.c index 89f6618e2e8..aaa8041ec35 100644 --- a/src/test/test-process-util.c +++ b/src/test/test-process-util.c @@ -237,117 +237,116 @@ static void test_get_process_cmdline_harder(void) { assert_se(prctl(PR_SET_NAME, "testa") >= 0); - assert_se(get_process_cmdline(getpid_cached(), 0, false, &line) == -ENOENT); + assert_se(get_process_cmdline(getpid_cached(), SIZE_MAX, false, &line) == -ENOENT); - assert_se(get_process_cmdline(getpid_cached(), 0, true, &line) >= 0); + assert_se(get_process_cmdline(getpid_cached(), SIZE_MAX, true, &line) >= 0); assert_se(streq(line, "[testa]")); line = mfree(line); - assert_se(get_process_cmdline(getpid_cached(), 1, true, &line) >= 0); + assert_se(get_process_cmdline(getpid_cached(), 0, true, &line) >= 0); + log_info("'%s'", line); assert_se(streq(line, "")); line = mfree(line); + assert_se(get_process_cmdline(getpid_cached(), 1, true, &line) >= 0); + assert_se(streq(line, "…")); + line = mfree(line); + assert_se(get_process_cmdline(getpid_cached(), 2, true, &line) >= 0); - assert_se(streq(line, "[")); + assert_se(streq(line, "[…")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 3, true, &line) >= 0); - assert_se(streq(line, "[.")); + assert_se(streq(line, "[t…")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 4, true, &line) >= 0); - assert_se(streq(line, "[..")); + assert_se(streq(line, "[te…")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 5, true, &line) >= 0); - assert_se(streq(line, "[...")); + assert_se(streq(line, "[tes…")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 6, true, &line) >= 0); - assert_se(streq(line, "[...]")); + assert_se(streq(line, "[test…")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 7, true, &line) >= 0); - assert_se(streq(line, "[t...]")); - line = mfree(line); - - assert_se(get_process_cmdline(getpid_cached(), 8, true, &line) >= 0); assert_se(streq(line, "[testa]")); line = mfree(line); - assert_se(write(fd, "\0\0\0\0\0\0\0\0\0", 10) == 10); - - assert_se(get_process_cmdline(getpid_cached(), 0, false, &line) == -ENOENT); - - assert_se(get_process_cmdline(getpid_cached(), 0, true, &line) >= 0); + assert_se(get_process_cmdline(getpid_cached(), 8, true, &line) >= 0); assert_se(streq(line, "[testa]")); line = mfree(line); - assert_se(write(fd, "foo\0bar\0\0\0\0\0", 10) == 10); + assert_se(write(fd, "foo\0bar", 8) == 8); - assert_se(get_process_cmdline(getpid_cached(), 0, false, &line) >= 0); + assert_se(get_process_cmdline(getpid_cached(), SIZE_MAX, false, &line) >= 0); + log_info("'%s'", line); assert_se(streq(line, "foo bar")); line = mfree(line); - assert_se(get_process_cmdline(getpid_cached(), 0, true, &line) >= 0); + assert_se(get_process_cmdline(getpid_cached(), SIZE_MAX, true, &line) >= 0); assert_se(streq(line, "foo bar")); line = mfree(line); assert_se(write(fd, "quux", 4) == 4); - assert_se(get_process_cmdline(getpid_cached(), 0, false, &line) >= 0); + assert_se(get_process_cmdline(getpid_cached(), SIZE_MAX, false, &line) >= 0); + log_info("'%s'", line); assert_se(streq(line, "foo bar quux")); line = mfree(line); - assert_se(get_process_cmdline(getpid_cached(), 0, true, &line) >= 0); + assert_se(get_process_cmdline(getpid_cached(), SIZE_MAX, true, &line) >= 0); assert_se(streq(line, "foo bar quux")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 1, true, &line) >= 0); - assert_se(streq(line, "")); + assert_se(streq(line, "…")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 2, true, &line) >= 0); - assert_se(streq(line, ".")); + assert_se(streq(line, "f…")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 3, true, &line) >= 0); - assert_se(streq(line, "..")); + assert_se(streq(line, "fo…")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 4, true, &line) >= 0); - assert_se(streq(line, "...")); + assert_se(streq(line, "foo…")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 5, true, &line) >= 0); - assert_se(streq(line, "f...")); + assert_se(streq(line, "foo …")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 6, true, &line) >= 0); - assert_se(streq(line, "fo...")); + assert_se(streq(line, "foo b…")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 7, true, &line) >= 0); - assert_se(streq(line, "foo...")); + assert_se(streq(line, "foo ba…")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 8, true, &line) >= 0); - assert_se(streq(line, "foo...")); + assert_se(streq(line, "foo bar…")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 9, true, &line) >= 0); - assert_se(streq(line, "foo b...")); + assert_se(streq(line, "foo bar …")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 10, true, &line) >= 0); - assert_se(streq(line, "foo ba...")); + assert_se(streq(line, "foo bar q…")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 11, true, &line) >= 0); - assert_se(streq(line, "foo bar...")); + assert_se(streq(line, "foo bar qu…")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 12, true, &line) >= 0); - assert_se(streq(line, "foo bar...")); + assert_se(streq(line, "foo bar quux")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 13, true, &line) >= 0); @@ -365,22 +364,22 @@ static void test_get_process_cmdline_harder(void) { assert_se(ftruncate(fd, 0) >= 0); assert_se(prctl(PR_SET_NAME, "aaaa bbbb cccc") >= 0); - assert_se(get_process_cmdline(getpid_cached(), 0, false, &line) == -ENOENT); + assert_se(get_process_cmdline(getpid_cached(), SIZE_MAX, false, &line) == -ENOENT); - assert_se(get_process_cmdline(getpid_cached(), 0, true, &line) >= 0); + assert_se(get_process_cmdline(getpid_cached(), SIZE_MAX, true, &line) >= 0); assert_se(streq(line, "[aaaa bbbb cccc]")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 10, true, &line) >= 0); - assert_se(streq(line, "[aaaa...]")); + assert_se(streq(line, "[aaaa bbb…")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 11, true, &line) >= 0); - assert_se(streq(line, "[aaaa...]")); + assert_se(streq(line, "[aaaa bbbb…")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 12, true, &line) >= 0); - assert_se(streq(line, "[aaaa b...]")); + assert_se(streq(line, "[aaaa bbbb …")); line = mfree(line); safe_close(fd); @@ -409,7 +408,7 @@ static void test_rename_process_now(const char *p, int ret) { log_info("comm = <%s>", comm); assert_se(strneq(comm, p, TASK_COMM_LEN-1)); - r = get_process_cmdline(0, 0, false, &cmdline); + r = get_process_cmdline(0, SIZE_MAX, false, &cmdline); assert_se(r >= 0); /* we cannot expect cmdline to be renamed properly without privileges */ if (geteuid() == 0) {