[PATCH 0/2] Fix /proc/*/stat parsing

While working on some polkit stuff I found out that we are inconsistent with the way we parse /proc/*/stat files, so I added a new helper instead along with some tests. Unfortunately using it for the thing I wanted is not really viable in the end, so it "violates" the Rule of three, but at least it does something correctly. Martin Kletzander (2): util: Add virProcessGetStat Use virProcessGetStat src/libvirt_linux.syms | 3 + src/qemu/qemu_driver.c | 29 ++---- src/util/virprocess.c | 126 +++++++++++++++++--------- src/util/virprocess.h | 4 + tests/meson.build | 1 + tests/virprocessstatdata/complex/stat | 2 + tests/virprocessstatdata/simple/stat | 1 + tests/virprocessstattest.c | 84 +++++++++++++++++ 8 files changed, 185 insertions(+), 65 deletions(-) create mode 100644 tests/virprocessstatdata/complex/stat create mode 100644 tests/virprocessstatdata/simple/stat create mode 100644 tests/virprocessstattest.c -- 2.34.0

This reads and separates all fields from /proc/<pid>/stat or /proc/<pid>/task/<tid>/stat as there are easy mistakes to be done in the implementation. Some tests are added to show it works correctly. No number parsing is done as it would be unused for most of the fields most, if not all, of the time. No struct is used for the result as the length can vary (new fields can be added in the future). Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- src/libvirt_linux.syms | 3 + src/util/virprocess.c | 78 +++++++++++++++++++++++++ src/util/virprocess.h | 4 ++ tests/meson.build | 1 + tests/virprocessstatdata/complex/stat | 2 + tests/virprocessstatdata/simple/stat | 1 + tests/virprocessstattest.c | 84 +++++++++++++++++++++++++++ 7 files changed, 173 insertions(+) create mode 100644 tests/virprocessstatdata/complex/stat create mode 100644 tests/virprocessstatdata/simple/stat create mode 100644 tests/virprocessstattest.c diff --git a/src/libvirt_linux.syms b/src/libvirt_linux.syms index 55649ae39cec..14422fae7286 100644 --- a/src/libvirt_linux.syms +++ b/src/libvirt_linux.syms @@ -10,6 +10,9 @@ virHostCPUGetSiblingsList; virHostCPUGetSocket; virHostCPUGetStatsLinux; +# util/virprocess.h +virProcessGetStat; + # Let emacs know we want case-insensitive sorting # Local Variables: # sort-fold-case: t diff --git a/src/util/virprocess.c b/src/util/virprocess.c index 6de3f36f529c..4325f5116e8d 100644 --- a/src/util/virprocess.c +++ b/src/util/virprocess.c @@ -1721,3 +1721,81 @@ virProcessSetScheduler(pid_t pid G_GNUC_UNUSED, } #endif /* !WITH_SCHED_SETSCHEDULER */ + +#ifdef __linux__ +/* + * Get all stat fields for a process based on pid and tid: + * - pid == 0 && tid == 0 => /proc/self/stat + * - pid != 0 && tid == 0 => /proc/<pid>/stat + * - pid == 0 && tid != 0 => /proc/self/task/<tid>/stat + * - pid != 0 && tid != 0 => /proc/<pid>/task/<tid>/stat + * and return them as array of strings. + */ +GStrv +virProcessGetStat(pid_t pid, + pid_t tid) +{ + size_t buflen = 0; + g_autofree char *buf = NULL; + g_autofree char *path = NULL; + GStrv rest = NULL; + GStrv ret = NULL; + char *comm = NULL; + char *rparen = NULL; + size_t nrest = 0; + + if (pid) { + if (tid) + path = g_strdup_printf("/proc/%d/task/%d/stat", (int)pid, (int)tid); + else + path = g_strdup_printf("/proc/%d/stat", (int)pid); + } else { + if (tid) + path = g_strdup_printf("/proc/self/task/%d/stat", (int)tid); + else + path = g_strdup("/proc/self/stat"); + } + + if (!g_file_get_contents(path, &buf, &buflen, NULL)) + return NULL; + + /* eliminate trailing spaces */ + while (g_ascii_isspace(buf[--buflen])) + buf[buflen] = '\0'; + + /* Find end of the first field */ + if (!(comm = strchr(buf, ' '))) + return NULL; + *comm = '\0'; + + /* Check start of the second field (filename of the executable, in + * parentheses) */ + comm++; + if (*comm != '(') + return NULL; + comm++; + + /* Check end of the second field (last closing parenthesis) */ + rparen = strrchr(comm, ')'); + if (!rparen) + return NULL; + *rparen = '\0'; + + /* We need to check that the next char is not '\0', but why not just opt in + * for the safer way of checking whether it is ' ' (space) instead */ + if (rparen[1] != ' ') + return NULL; + + rest = g_strsplit(rparen + 2, " ", 0); + nrest = g_strv_length(rest); + ret = g_new0(char *, nrest + 3); + ret[0] = g_strdup(buf); + ret[1] = g_strdup(comm); + memcpy(ret + 2, rest, nrest * sizeof(char *)); + + /* Do not use g_strfreev() as individual elements they were moved to @ret. */ + VIR_FREE(rest); + + return ret; +} +#endif diff --git a/src/util/virprocess.h b/src/util/virprocess.h index 9910331a0caa..74dad1f3b15e 100644 --- a/src/util/virprocess.h +++ b/src/util/virprocess.h @@ -117,6 +117,10 @@ int virProcessSetupPrivateMountNS(void); int virProcessSetScheduler(pid_t pid, virProcessSchedPolicy policy, int priority); +#ifdef __linux__ +GStrv virProcessGetStat(pid_t pid, pid_t tid); +#endif + typedef enum { VIR_PROCESS_NAMESPACE_MNT = (1 << 1), VIR_PROCESS_NAMESPACE_IPC = (1 << 2), diff --git a/tests/meson.build b/tests/meson.build index 1948c07ae385..f75c24872086 100644 --- a/tests/meson.build +++ b/tests/meson.build @@ -347,6 +347,7 @@ if host_machine.system() == 'linux' { 'name': 'scsihosttest' }, { 'name': 'vircaps2xmltest', 'link_whole': [ test_file_wrapper_lib ] }, { 'name': 'virnetdevbandwidthtest' }, + { 'name': 'virprocessstattest', 'link_whole': [ test_file_wrapper_lib ] }, { 'name': 'virresctrltest', 'link_whole': [ test_file_wrapper_lib ] }, { 'name': 'virscsitest' }, { 'name': 'virusbtest' }, diff --git a/tests/virprocessstatdata/complex/stat b/tests/virprocessstatdata/complex/stat new file mode 100644 index 000000000000..df0ecc149c6f --- /dev/null +++ b/tests/virprocessstatdata/complex/stat @@ -0,0 +1,2 @@ +1 (this) is ( a weird ) +)( (command ( ) 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 diff --git a/tests/virprocessstatdata/simple/stat b/tests/virprocessstatdata/simple/stat new file mode 100644 index 000000000000..97d68735370d --- /dev/null +++ b/tests/virprocessstatdata/simple/stat @@ -0,0 +1 @@ +1 (command) 3 4 5 diff --git a/tests/virprocessstattest.c b/tests/virprocessstattest.c new file mode 100644 index 000000000000..c973dbc629d7 --- /dev/null +++ b/tests/virprocessstattest.c @@ -0,0 +1,84 @@ +#include <config.h> + +#include "testutils.h" +#include "virfilewrapper.h" +#include "virprocess.h" + + +struct testData { + const char *filename; + const char *command; + size_t count; + bool self; +}; + + +static int +test_virProcessGetStat(const void *opaque) +{ + struct testData *data = (struct testData *) opaque; + g_autofree char *data_dir = NULL; + g_auto(GStrv) proc_stat = NULL; + size_t len = 0; + id_t id = data->self ? 0 : -1; + + data_dir = g_strdup_printf("%s/virprocessstatdata/%s/", + abs_srcdir, data->filename); + + if (data->self) + virFileWrapperAddPrefix("/proc/self/", data_dir); + else + virFileWrapperAddPrefix("/proc/-1/task/-1/", data_dir); + + proc_stat = virProcessGetStat(id, id); + + virFileWrapperClearPrefixes(); + + if (!proc_stat) { + fprintf(stderr, "Could not get process stats\n"); + return -1; + } + + len = g_strv_length(proc_stat); + if (data->count != len) { + fprintf(stderr, "Count incorrect, expected %zu, got %zu\n", + data->count, len); + return -1; + } + + if (!STREQ_NULLABLE(data->command, proc_stat[1])) { + fprintf(stderr, "Command incorrect, expected %s, got %s\n", + data->command, proc_stat[1]); + return -1; + } + + return 0; +} + + +static int +mymain(void) +{ + struct testData data = {0}; + int ret = 0; + +#define DO_TEST(_filename, _command, _count, _self) \ + do { \ + data = (struct testData){ \ + .filename = _filename, \ + .command = _command, \ + .count = _count, \ + .self = _self, \ + }; \ + if (virTestRun("Reading process stat: " _filename, \ + test_virProcessGetStat, &data) < 0) \ + ret = -1; \ + } while (0) + + DO_TEST("simple", "command", 5, true); + DO_TEST("complex", "this) is ( a \t weird )\n)( (command ( ", 100, false); + + return ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE; +} + +VIR_TEST_MAIN(mymain) -- 2.34.0

This eliminates one incorrect parsing implementation. Signed-off-by: Martin Kletzander <mkletzan@redhat.com> --- src/qemu/qemu_driver.c | 29 ++++++------------------- src/util/virprocess.c | 48 ++++++------------------------------------ 2 files changed, 12 insertions(+), 65 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index d954635dde2a..511ff078ce98 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -1399,36 +1399,19 @@ qemuGetSchedInfo(unsigned long long *cpuWait, static int qemuGetProcessInfo(unsigned long long *cpuTime, int *lastCpu, long *vm_rss, - pid_t pid, int tid) + pid_t pid, pid_t tid) { g_autofree char *proc = NULL; FILE *pidinfo; unsigned long long usertime = 0, systime = 0; long rss = 0; int cpu = 0; + g_auto(GStrv) proc_stat = virProcessGetStat(pid, tid); - /* In general, we cannot assume pid_t fits in int; but /proc parsing - * is specific to Linux where int works fine. */ - if (tid) - proc = g_strdup_printf("/proc/%d/task/%d/stat", (int)pid, tid); - else - proc = g_strdup_printf("/proc/%d/stat", (int)pid); - if (!proc) - return -1; - - pidinfo = fopen(proc, "r"); - - /* See 'man proc' for information about what all these fields are. We're - * only interested in a very few of them */ - if (!pidinfo || - fscanf(pidinfo, - /* pid -> stime */ - "%*d (%*[^)]) %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %llu %llu" - /* cutime -> endcode */ - "%*d %*d %*d %*d %*d %*d %*u %*u %ld %*u %*u %*u" - /* startstack -> processor */ - "%*u %*u %*u %*u %*u %*u %*u %*u %*u %*u %*d %d", - &usertime, &systime, &rss, &cpu) != 4) { + if (virStrToLong_ullp(proc_stat[13], NULL, 10, &usertime) < 0 || + virStrToLong_ullp(proc_stat[14], NULL, 10, &systime) < 0 || + virStrToLong_l(proc_stat[23], NULL, 10, &rss) < 0 || + virStrToLong_i(proc_stat[38], NULL, 10, &cpu) < 0) { VIR_WARN("cannot parse process status data"); } diff --git a/src/util/virprocess.c b/src/util/virprocess.c index 4325f5116e8d..6b60b979d2d5 100644 --- a/src/util/virprocess.c +++ b/src/util/virprocess.c @@ -1153,56 +1153,20 @@ virProcessSetMaxCoreSize(pid_t pid G_GNUC_UNUSED, int virProcessGetStartTime(pid_t pid, unsigned long long *timestamp) { - char *tmp; - int len; - g_autofree char *filename = NULL; - g_autofree char *buf = NULL; - g_auto(GStrv) tokens = NULL; - - filename = g_strdup_printf("/proc/%llu/stat", (long long)pid); - - if ((len = virFileReadAll(filename, 1024, &buf)) < 0) - return -1; + g_auto(GStrv) proc_stat = virProcessGetStat(pid, 0); - /* start time is the token at index 19 after the '(process name)' entry - since only this - * field can contain the ')' character, search backwards for this to avoid malicious - * processes trying to fool us - */ - - if (!(tmp = strrchr(buf, ')'))) { + if (!proc_stat || g_strv_length(proc_stat) < 22) { virReportError(VIR_ERR_INTERNAL_ERROR, - _("Cannot find start time in %s"), - filename); + _("Cannot find start time for pid %d"), (int)pid); return -1; } - tmp += 2; /* skip ') ' */ - if ((tmp - buf) >= len) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Cannot find start time in %s"), - filename); - return -1; - } - - tokens = g_strsplit(tmp, " ", 0); - if (!tokens || - g_strv_length(tokens) < 20) { + if (virStrToLong_ull(proc_stat[21], NULL, 10, timestamp) < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, - _("Cannot find start time in %s"), - filename); + _("Cannot parse start time %s for pid %d"), + proc_stat[21], (int)pid); return -1; } - - if (virStrToLong_ull(tokens[19], - NULL, - 10, - timestamp) < 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Cannot parse start time %s in %s"), - tokens[19], filename); - return -1; - } - return 0; } #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) -- 2.34.0

On Fri, Nov 19, 2021 at 06:31:34PM +0100, Martin Kletzander wrote:
While working on some polkit stuff I found out that we are inconsistent with the way we parse /proc/*/stat files, so I added a new helper instead along with some tests. Unfortunately using it for the thing I wanted is not really viable in the end, so it "violates" the Rule of three, but at least it does something correctly.
Self-NACK, v2 coming up.
Martin Kletzander (2): util: Add virProcessGetStat Use virProcessGetStat
src/libvirt_linux.syms | 3 + src/qemu/qemu_driver.c | 29 ++---- src/util/virprocess.c | 126 +++++++++++++++++--------- src/util/virprocess.h | 4 + tests/meson.build | 1 + tests/virprocessstatdata/complex/stat | 2 + tests/virprocessstatdata/simple/stat | 1 + tests/virprocessstattest.c | 84 +++++++++++++++++ 8 files changed, 185 insertions(+), 65 deletions(-) create mode 100644 tests/virprocessstatdata/complex/stat create mode 100644 tests/virprocessstatdata/simple/stat create mode 100644 tests/virprocessstattest.c
-- 2.34.0
participants (1)
-
Martin Kletzander