[PATCH v3 0/2] Fix /proc/*/stat parsing

While working on some polkit stuff I found out that we are inconsistent with the way we parse /proc/*/stat files, so I added a new helper instead along with some tests. Unfortunately using it for the thing I wanted is not really viable in the end, so it "violates" the Rule of three, but at least it does something correctly. v3: - Added an enum with names for the fields of the file - Made this not limited to linux as that was the way it worked in qemu before v2: - https://listman.redhat.com/archives/libvir-list/2021-November/msg00606.html - Fixed open64 by just using virFileReadAllQuiet instead of g_file_get_contents - Removed some leftover unused variables - Still do not know why my cirrus builds fail v1: - https://listman.redhat.com/archives/libvir-list/2021-November/msg00580.html Martin Kletzander (2): util: Add virProcessGetStat Use virProcessGetStat src/libvirt_private.syms | 1 + src/qemu/qemu_driver.c | 34 ++----- src/util/virprocess.c | 125 +++++++++++++++++--------- src/util/virprocess.h | 66 ++++++++++++++ tests/meson.build | 1 + tests/virprocessstatdata/complex/stat | 2 + tests/virprocessstatdata/simple/stat | 1 + tests/virprocessstattest.c | 88 ++++++++++++++++++ 8 files changed, 249 insertions(+), 69 deletions(-) create mode 100644 tests/virprocessstatdata/complex/stat create mode 100644 tests/virprocessstatdata/simple/stat create mode 100644 tests/virprocessstattest.c -- 2.34.0

This reads and separates all fields from /proc/<pid>/stat or /proc/<pid>/task/<tid>/stat as there are easy mistakes to be done in the implementation. Some tests are added to show it works correctly. No number parsing is done as it would be unused for most of the fields most, if not all, of the time. No struct is used for the result as the length can vary (new fields can be added in the future). Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- src/libvirt_private.syms | 1 + src/util/virprocess.c | 77 +++++++++++++++++++++++ src/util/virprocess.h | 66 ++++++++++++++++++++ tests/meson.build | 1 + tests/virprocessstatdata/complex/stat | 2 + tests/virprocessstatdata/simple/stat | 1 + tests/virprocessstattest.c | 88 +++++++++++++++++++++++++++ 7 files changed, 236 insertions(+) create mode 100644 tests/virprocessstatdata/complex/stat create mode 100644 tests/virprocessstatdata/simple/stat create mode 100644 tests/virprocessstattest.c diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index a7bc50a4d16d..7a4e7d7d6f40 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -3099,6 +3099,7 @@ virProcessGetMaxMemLock; virProcessGetNamespaces; virProcessGetPids; virProcessGetStartTime; +virProcessGetStat; virProcessGroupGet; virProcessGroupKill; virProcessKill; diff --git a/src/util/virprocess.c b/src/util/virprocess.c index 6de3f36f529c..aab51962acfc 100644 --- a/src/util/virprocess.c +++ b/src/util/virprocess.c @@ -1721,3 +1721,80 @@ virProcessSetScheduler(pid_t pid G_GNUC_UNUSED, } #endif /* !WITH_SCHED_SETSCHEDULER */ + +/* + * Get all stat fields for a process based on pid and tid: + * - pid == 0 && tid == 0 => /proc/self/stat + * - pid != 0 && tid == 0 => /proc/<pid>/stat + * - pid == 0 && tid != 0 => /proc/self/task/<tid>/stat + * - pid != 0 && tid != 0 => /proc/<pid>/task/<tid>/stat + * and return them as array of strings. + */ +GStrv +virProcessGetStat(pid_t pid, + pid_t tid) +{ + int len = 10 * 1024; /* 10kB ought to be enough for everyone */ + g_autofree char *buf = NULL; + g_autofree char *path = NULL; + GStrv rest = NULL; + GStrv ret = NULL; + char *comm = NULL; + char *rparen = NULL; + size_t nrest = 0; + + if (pid) { + if (tid) + path = g_strdup_printf("/proc/%d/task/%d/stat", (int)pid, (int)tid); + else + path = g_strdup_printf("/proc/%d/stat", (int)pid); + } else { + if (tid) + path = g_strdup_printf("/proc/self/task/%d/stat", (int)tid); + else + path = g_strdup("/proc/self/stat"); + } + + len = virFileReadAllQuiet(path, len, &buf); + if (len < 0) + return NULL; + + /* eliminate trailing spaces */ + while (len > 0 && g_ascii_isspace(buf[--len])) + buf[len] = '\0'; + + /* Find end of the first field */ + if (!(comm = strchr(buf, ' '))) + return NULL; + *comm = '\0'; + + /* Check start of the second field (filename of the executable, in + * parentheses) */ + comm++; + if (*comm != '(') + return NULL; + comm++; + + /* Check end of the second field (last closing parenthesis) */ + rparen = strrchr(comm, ')'); + if (!rparen) + return NULL; + *rparen = '\0'; + + /* We need to check that the next char is not '\0', but why not just opt in + * for the safer way of checking whether it is ' ' (space) instead */ + if (rparen[1] != ' ') + return NULL; + + rest = g_strsplit(rparen + 2, " ", 0); + nrest = g_strv_length(rest); + ret = g_new0(char *, nrest + 3); + ret[0] = g_strdup(buf); + ret[1] = g_strdup(comm); + memcpy(ret + 2, rest, nrest * sizeof(char *)); + + /* Do not use g_strfreev() as individual elements they were moved to @ret. */ + VIR_FREE(rest); + + return ret; +} diff --git a/src/util/virprocess.h b/src/util/virprocess.h index 9910331a0caa..82b740396499 100644 --- a/src/util/virprocess.h +++ b/src/util/virprocess.h @@ -117,6 +117,72 @@ int virProcessSetupPrivateMountNS(void); int virProcessSetScheduler(pid_t pid, virProcessSchedPolicy policy, int priority); + +GStrv virProcessGetStat(pid_t pid, pid_t tid); + +/* These constants are modelled after proc(5) */ +enum { + VIR_PROCESS_STAT_PID, + VIR_PROCESS_STAT_COMM, + VIR_PROCESS_STAT_STATE, + VIR_PROCESS_STAT_PPID, + VIR_PROCESS_STAT_PGRP, + VIR_PROCESS_STAT_SESSION, + VIR_PROCESS_STAT_TTY_NR, + VIR_PROCESS_STAT_TPGID, + VIR_PROCESS_STAT_FLAGS, + VIR_PROCESS_STAT_MINFLT, + VIR_PROCESS_STAT_CMINFLT, + VIR_PROCESS_STAT_MAJFLT, + VIR_PROCESS_STAT_CMAJFLT, + VIR_PROCESS_STAT_UTIME, + VIR_PROCESS_STAT_STIME, + VIR_PROCESS_STAT_CUTIME, + VIR_PROCESS_STAT_CSTIME, + VIR_PROCESS_STAT_PRIORITY, + VIR_PROCESS_STAT_NICE, + VIR_PROCESS_STAT_NUM_THREADS, + VIR_PROCESS_STAT_ITREALVALUE, + VIR_PROCESS_STAT_STARTTIME, + VIR_PROCESS_STAT_VSIZE, + VIR_PROCESS_STAT_RSS, + VIR_PROCESS_STAT_RSSLIM, + VIR_PROCESS_STAT_STARTCODE, + VIR_PROCESS_STAT_ENDCODE, + VIR_PROCESS_STAT_STARTSTACK, + VIR_PROCESS_STAT_KSTKESP, + VIR_PROCESS_STAT_KSTKEIP, + VIR_PROCESS_STAT_SIGNAL, + VIR_PROCESS_STAT_BLOCKED, + VIR_PROCESS_STAT_SIGIGNORE, + VIR_PROCESS_STAT_SIGCATCH, + VIR_PROCESS_STAT_WCHAN, + VIR_PROCESS_STAT_NSWAP, + VIR_PROCESS_STAT_CNSWAP, + VIR_PROCESS_STAT_EXIT_SIGNAL, + VIR_PROCESS_STAT_PROCESSOR, + VIR_PROCESS_STAT_RT_PRIORITY, + VIR_PROCESS_STAT_POLICY, + VIR_PROCESS_STAT_DELAYACCT_BLKIO_TICKS, + VIR_PROCESS_STAT_GUEST_TIME, + VIR_PROCESS_STAT_CGUEST_TIME, + VIR_PROCESS_STAT_START_DATA, + VIR_PROCESS_STAT_END_DATA, + VIR_PROCESS_STAT_START_BRK, + VIR_PROCESS_STAT_ARG_START, + VIR_PROCESS_STAT_ARG_END, + VIR_PROCESS_STAT_ENV_START, + VIR_PROCESS_STAT_ENV_END, + VIR_PROCESS_STAT_EXIT_CODE, +}; + +/* + * At the time of writing there are 52 values reported in /proc/.../stat, the + * line below checks that the last one has the right value, increase accordingly + * based on proc(5) whenever adding new fields. +*/ +G_STATIC_ASSERT(VIR_PROCESS_STAT_EXIT_CODE == 51); + typedef enum { VIR_PROCESS_NAMESPACE_MNT = (1 << 1), VIR_PROCESS_NAMESPACE_IPC = (1 << 2), diff --git a/tests/meson.build b/tests/meson.build index 1948c07ae385..f75c24872086 100644 --- a/tests/meson.build +++ b/tests/meson.build @@ -347,6 +347,7 @@ if host_machine.system() == 'linux' { 'name': 'scsihosttest' }, { 'name': 'vircaps2xmltest', 'link_whole': [ test_file_wrapper_lib ] }, { 'name': 'virnetdevbandwidthtest' }, + { 'name': 'virprocessstattest', 'link_whole': [ test_file_wrapper_lib ] }, { 'name': 'virresctrltest', 'link_whole': [ test_file_wrapper_lib ] }, { 'name': 'virscsitest' }, { 'name': 'virusbtest' }, diff --git a/tests/virprocessstatdata/complex/stat b/tests/virprocessstatdata/complex/stat new file mode 100644 index 000000000000..df0ecc149c6f --- /dev/null +++ b/tests/virprocessstatdata/complex/stat @@ -0,0 +1,2 @@ +1 (this) is ( a weird ) +)( (command ( ) 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 diff --git a/tests/virprocessstatdata/simple/stat b/tests/virprocessstatdata/simple/stat new file mode 100644 index 000000000000..97d68735370d --- /dev/null +++ b/tests/virprocessstatdata/simple/stat @@ -0,0 +1 @@ +1 (command) 3 4 5 diff --git a/tests/virprocessstattest.c b/tests/virprocessstattest.c new file mode 100644 index 000000000000..8fb82c48ef33 --- /dev/null +++ b/tests/virprocessstattest.c @@ -0,0 +1,88 @@ +#include <config.h> + +#include "testutils.h" +#include "virfilewrapper.h" +#include "virprocess.h" + + +struct testData { + const char *filename; + const char *command; + size_t count; + bool self; +}; + + +static int +test_virProcessGetStat(const void *opaque) +{ + struct testData *data = (struct testData *) opaque; + g_autofree char *data_dir = NULL; + g_auto(GStrv) proc_stat = NULL; + size_t len = 0; + id_t id = data->self ? 0 : -1; + const char *command = NULL; + + data_dir = g_strdup_printf("%s/virprocessstatdata/%s/", + abs_srcdir, data->filename); + + /* We are using predictable id of -1 because this case we will clearly see + * that the test failed in case of virFileWrapper failure */ + if (id) + virFileWrapperAddPrefix("/proc/-1/task/-1/", data_dir); + else + virFileWrapperAddPrefix("/proc/self/", data_dir); + + proc_stat = virProcessGetStat(id, id); + + virFileWrapperClearPrefixes(); + + if (!proc_stat) { + fprintf(stderr, "Could not get process stats\n"); + return -1; + } + + len = g_strv_length(proc_stat); + if (data->count != len) { + fprintf(stderr, "Count incorrect, expected %zu, got %zu\n", + data->count, len); + return -1; + } + + command = proc_stat[VIR_PROCESS_STAT_COMM]; + if (!STREQ_NULLABLE(data->command, command)) { + fprintf(stderr, "Command incorrect, expected %s, got %s\n", + data->command, command); + return -1; + } + + return 0; +} + + +static int +mymain(void) +{ + struct testData data = {0}; + int ret = 0; + +#define DO_TEST(_filename, _command, _count, _self) \ + do { \ + data = (struct testData){ \ + .filename = _filename, \ + .command = _command, \ + .count = _count, \ + .self = _self, \ + }; \ + if (virTestRun("Reading process stat: " _filename, \ + test_virProcessGetStat, &data) < 0) \ + ret = -1; \ + } while (0) + + DO_TEST("simple", "command", 5, true); + DO_TEST("complex", "this) is ( a \t weird )\n)( (command ( ", 100, false); + + return ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE; +} + +VIR_TEST_MAIN(mymain) -- 2.34.0

On a Tuesday in 2021, Martin Kletzander wrote:
This reads and separates all fields from /proc/<pid>/stat or /proc/<pid>/task/<tid>/stat as there are easy mistakes to be done in the implementation. Some tests are added to show it works correctly. No number parsing is done as it would be unused for most of the fields most, if not all, of the time. No struct is used for the result as the length can vary (new fields can be added in the future).
Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- src/libvirt_private.syms | 1 + src/util/virprocess.c | 77 +++++++++++++++++++++++ src/util/virprocess.h | 66 ++++++++++++++++++++ tests/meson.build | 1 + tests/virprocessstatdata/complex/stat | 2 + tests/virprocessstatdata/simple/stat | 1 + tests/virprocessstattest.c | 88 +++++++++++++++++++++++++++ 7 files changed, 236 insertions(+) create mode 100644 tests/virprocessstatdata/complex/stat create mode 100644 tests/virprocessstatdata/simple/stat create mode 100644 tests/virprocessstattest.c
Reviewed-by: Ján Tomko <jtomko@redhat.com> Jano

This eliminates one incorrect parsing implementation which relied on the command field not having a closing bracket. This possibility is already tested against in the virProcessGetStat() tests. Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- src/qemu/qemu_driver.c | 34 ++++++------------------------ src/util/virprocess.c | 48 ++++++------------------------------------ 2 files changed, 13 insertions(+), 69 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index d954635dde2a..16d449913ca7 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -1399,36 +1399,18 @@ qemuGetSchedInfo(unsigned long long *cpuWait, static int qemuGetProcessInfo(unsigned long long *cpuTime, int *lastCpu, long *vm_rss, - pid_t pid, int tid) + pid_t pid, pid_t tid) { - g_autofree char *proc = NULL; - FILE *pidinfo; + g_auto(GStrv) proc_stat = virProcessGetStat(pid, tid); unsigned long long usertime = 0, systime = 0; long rss = 0; int cpu = 0; - /* In general, we cannot assume pid_t fits in int; but /proc parsing - * is specific to Linux where int works fine. */ - if (tid) - proc = g_strdup_printf("/proc/%d/task/%d/stat", (int)pid, tid); - else - proc = g_strdup_printf("/proc/%d/stat", (int)pid); - if (!proc) - return -1; - - pidinfo = fopen(proc, "r"); - - /* See 'man proc' for information about what all these fields are. We're - * only interested in a very few of them */ - if (!pidinfo || - fscanf(pidinfo, - /* pid -> stime */ - "%*d (%*[^)]) %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %llu %llu" - /* cutime -> endcode */ - "%*d %*d %*d %*d %*d %*d %*u %*u %ld %*u %*u %*u" - /* startstack -> processor */ - "%*u %*u %*u %*u %*u %*u %*u %*u %*u %*u %*d %d", - &usertime, &systime, &rss, &cpu) != 4) { + if (!proc_stat || + virStrToLong_ullp(proc_stat[VIR_PROCESS_STAT_UTIME], NULL, 10, &usertime) < 0 || + virStrToLong_ullp(proc_stat[VIR_PROCESS_STAT_STIME], NULL, 10, &systime) < 0 || + virStrToLong_l(proc_stat[VIR_PROCESS_STAT_RSS], NULL, 10, &rss) < 0 || + virStrToLong_i(proc_stat[VIR_PROCESS_STAT_PROCESSOR], NULL, 10, &cpu) < 0) { VIR_WARN("cannot parse process status data"); } @@ -1450,8 +1432,6 @@ qemuGetProcessInfo(unsigned long long *cpuTime, int *lastCpu, long *vm_rss, VIR_DEBUG("Got status for %d/%d user=%llu sys=%llu cpu=%d rss=%ld", (int)pid, tid, usertime, systime, cpu, rss); - VIR_FORCE_FCLOSE(pidinfo); - return 0; } diff --git a/src/util/virprocess.c b/src/util/virprocess.c index aab51962acfc..20d04bad6f26 100644 --- a/src/util/virprocess.c +++ b/src/util/virprocess.c @@ -1153,56 +1153,20 @@ virProcessSetMaxCoreSize(pid_t pid G_GNUC_UNUSED, int virProcessGetStartTime(pid_t pid, unsigned long long *timestamp) { - char *tmp; - int len; - g_autofree char *filename = NULL; - g_autofree char *buf = NULL; - g_auto(GStrv) tokens = NULL; - - filename = g_strdup_printf("/proc/%llu/stat", (long long)pid); - - if ((len = virFileReadAll(filename, 1024, &buf)) < 0) - return -1; + g_auto(GStrv) proc_stat = virProcessGetStat(pid, 0); - /* start time is the token at index 19 after the '(process name)' entry - since only this - * field can contain the ')' character, search backwards for this to avoid malicious - * processes trying to fool us - */ - - if (!(tmp = strrchr(buf, ')'))) { + if (!proc_stat || g_strv_length(proc_stat) < 22) { virReportError(VIR_ERR_INTERNAL_ERROR, - _("Cannot find start time in %s"), - filename); + _("Cannot find start time for pid %d"), (int)pid); return -1; } - tmp += 2; /* skip ') ' */ - if ((tmp - buf) >= len) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Cannot find start time in %s"), - filename); - return -1; - } - - tokens = g_strsplit(tmp, " ", 0); - if (!tokens || - g_strv_length(tokens) < 20) { + if (virStrToLong_ull(proc_stat[21], NULL, 10, timestamp) < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, - _("Cannot find start time in %s"), - filename); + _("Cannot parse start time %s for pid %d"), + proc_stat[21], (int)pid); return -1; } - - if (virStrToLong_ull(tokens[19], - NULL, - 10, - timestamp) < 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Cannot parse start time %s in %s"), - tokens[19], filename); - return -1; - } - return 0; } #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) -- 2.34.0

On a Tuesday in 2021, Martin Kletzander wrote:
This eliminates one incorrect parsing implementation which relied on the command field not having a closing bracket. This possibility is already tested against in the virProcessGetStat() tests.
Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- src/qemu/qemu_driver.c | 34 ++++++------------------------ src/util/virprocess.c | 48 ++++++------------------------------------ 2 files changed, 13 insertions(+), 69 deletions(-)
[..]
- tokens = g_strsplit(tmp, " ", 0);
- if (!tokens || - g_strv_length(tokens) < 20) { + if (virStrToLong_ull(proc_stat[21], NULL, 10, timestamp) < 0) {
VIR_PROCESS_STAT_STARTTIME
virReportError(VIR_ERR_INTERNAL_ERROR, - _("Cannot find start time in %s"), - filename); + _("Cannot parse start time %s for pid %d"), + proc_stat[21], (int)pid); return -1; }
Reviewed-by: Ján Tomko <jtomko@redhat.com> Jano
participants (2)
-
Ján Tomko
-
Martin Kletzander