[libvirt] [PATCH v2 0/4] Optimize mass closing of FDs on child spwaning

v2 of: https://www.redhat.com/archives/libvir-list/2019-July/msg00089.html diff to v1: - Added a test for stats parsing, - Added big fat warning as requested by Eric that malloc() in between fork() and exec() is okay with glibc (I guess we don't care about other libc-s like musl or uclibc). Michal Prívozník (4): virNetDevOpenvswitchInterfaceStats: Optimize for speed test: Introduce virnetdevopenvswitchtest vircommand: Separate mass FD closing into a function virCommand: use procfs to learn opened FDs src/libvirt_private.syms | 1 + src/util/vircommand.c | 122 +++++++++++++++--- src/util/virnetdevopenvswitch.c | 136 +++++++++++++++------ src/util/virnetdevopenvswitch.h | 4 + tests/Makefile.am | 13 +- tests/virnetdevopenvswitchdata/stats1.json | 1 + tests/virnetdevopenvswitchdata/stats2.json | 1 + tests/virnetdevopenvswitchtest.c | 101 +++++++++++++++ 8 files changed, 321 insertions(+), 58 deletions(-) create mode 100644 tests/virnetdevopenvswitchdata/stats1.json create mode 100644 tests/virnetdevopenvswitchdata/stats2.json create mode 100644 tests/virnetdevopenvswitchtest.c -- 2.21.0

We run 'ovs-vsctl' nine times (first to find if interface is there and then eight times = for each stats member separately). This is very inefficient. I've found a way to run it once and with a bit of help from virJSON module we can parse out stats we need. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/util/virnetdevopenvswitch.c | 111 +++++++++++++++++++++----------- 1 file changed, 74 insertions(+), 37 deletions(-) diff --git a/src/util/virnetdevopenvswitch.c b/src/util/virnetdevopenvswitch.c index c99ecfbf15..0fe64bedab 100644 --- a/src/util/virnetdevopenvswitch.c +++ b/src/util/virnetdevopenvswitch.c @@ -28,6 +28,7 @@ #include "virmacaddr.h" #include "virstring.h" #include "virlog.h" +#include "virjson.h" #define VIR_FROM_THIS VIR_FROM_NONE @@ -311,58 +312,94 @@ int virNetDevOpenvswitchInterfaceStats(const char *ifname, virDomainInterfaceStatsPtr stats) { - char *tmp; - bool gotStats = false; VIR_AUTOPTR(virCommand) cmd = NULL; VIR_AUTOFREE(char *) output = NULL; + VIR_AUTOPTR(virJSONValue) jsonStats = NULL; + virJSONValuePtr jsonMap = NULL; + size_t i; - /* Just ensure the interface exists in ovs */ cmd = virCommandNew(OVSVSCTL); virNetDevOpenvswitchAddTimeout(cmd); - virCommandAddArgList(cmd, "get", "Interface", ifname, "name", NULL); + virCommandAddArgList(cmd, "--if-exists", "--format=list", "--data=json", + "--no-headings", "--columns=statistics", "list", + "Interface", ifname, NULL); virCommandSetOutputBuffer(cmd, &output); - if (virCommandRun(cmd, NULL) < 0) { + /* The above command returns either: + * 1) empty string if @ifname doesn't exist, or + * 2) a JSON array, for instance: + * ["map",[["collisions",0],["rx_bytes",0],["rx_crc_err",0],["rx_dropped",0], + * ["rx_errors",0],["rx_frame_err",0],["rx_over_err",0],["rx_packets",0], + * ["tx_bytes",12406],["tx_dropped",0],["tx_errors",0],["tx_packets",173]]] + */ + + if (virCommandRun(cmd, NULL) < 0 || + STREQ_NULLABLE(output, "")) { /* no ovs-vsctl or interface 'ifname' doesn't exists in ovs */ virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("Interface not found")); return -1; } -#define GET_STAT(name, member) \ - do { \ - VIR_FREE(output); \ - virCommandFree(cmd); \ - cmd = virCommandNew(OVSVSCTL); \ - virNetDevOpenvswitchAddTimeout(cmd); \ - virCommandAddArgList(cmd, "--if-exists", "get", "Interface", \ - ifname, "statistics:" name, NULL); \ - virCommandSetOutputBuffer(cmd, &output); \ - if (virCommandRun(cmd, NULL) < 0 || !output || !*output || *output == '\n') { \ - stats->member = -1; \ - } else { \ - if (virStrToLong_ll(output, &tmp, 10, &stats->member) < 0 || \ - *tmp != '\n') { \ - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", \ - _("Fail to parse ovs-vsctl output")); \ - return -1; \ - } \ - gotStats = true; \ - } \ - } while (0) + if (!(jsonStats = virJSONValueFromString(output)) || + !virJSONValueIsArray(jsonStats) || + !(jsonMap = virJSONValueArrayGet(jsonStats, 1))) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Unable to parse ovs-vsctl output")); + return -1; + } - /* The TX/RX fields appear to be swapped here - * because this is the host view. */ - GET_STAT("rx_bytes", tx_bytes); - GET_STAT("rx_packets", tx_packets); - GET_STAT("rx_errors", tx_errs); - GET_STAT("rx_dropped", tx_drop); - GET_STAT("tx_bytes", rx_bytes); - GET_STAT("tx_packets", rx_packets); - GET_STAT("tx_errors", rx_errs); - GET_STAT("tx_dropped", rx_drop); + stats->rx_bytes = stats->rx_packets = stats->rx_errs = stats->rx_drop = -1; + stats->tx_bytes = stats->tx_packets = stats->tx_errs = stats->tx_drop = -1; - if (!gotStats) { + for (i = 0; i < virJSONValueArraySize(jsonMap); i++) { + virJSONValuePtr item = virJSONValueArrayGet(jsonMap, i); + virJSONValuePtr jsonKey; + virJSONValuePtr jsonVal; + const char *key; + long long val; + + if (!item || + (!(jsonKey = virJSONValueArrayGet(item, 0))) || + (!(jsonVal = virJSONValueArrayGet(item, 1))) || + (!(key = virJSONValueGetString(jsonKey))) || + (virJSONValueGetNumberLong(jsonVal, &val) < 0)) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Malformed ovs-vsctl output")); + return -1; + } + + /* The TX/RX fields appear to be swapped here + * because this is the host view. */ + if (STREQ(key, "rx_bytes")) { + stats->tx_bytes = val; + } else if (STREQ(key, "rx_packets")) { + stats->tx_packets = val; + } else if (STREQ(key, "rx_errors")) { + stats->tx_errs = val; + } else if (STREQ(key, "rx_dropped")) { + stats->tx_drop = val; + } else if (STREQ(key, "tx_bytes")) { + stats->rx_bytes = val; + } else if (STREQ(key, "tx_packets")) { + stats->rx_packets = val; + } else if (STREQ(key, "tx_errors")) { + stats->rx_errs = val; + } else if (STREQ(key, "tx_dropped")) { + stats->rx_drop = val; + } else { + VIR_DEBUG("Unused ovs-vsctl stat key=%s val=%lld", key, val); + } + } + + if (stats->rx_bytes == -1 && + stats->rx_packets == -1 && + stats->rx_errs == -1 && + stats->rx_drop == -1 && + stats->tx_bytes == -1 && + stats->tx_packets == -1 && + stats->tx_errs == -1 && + stats->tx_drop == -1) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("Interface doesn't have any statistics")); return -1; -- 2.21.0

On Tue, Jul 16, 2019 at 10:54:33AM +0200, Michal Privoznik wrote:
We run 'ovs-vsctl' nine times (first to find if interface is there and then eight times = for each stats member separately). This is very inefficient. I've found a way to run it once and with a bit of help from virJSON module we can parse out stats we need.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/util/virnetdevopenvswitch.c | 111 +++++++++++++++++++++----------- 1 file changed, 74 insertions(+), 37 deletions(-)
Reviewed-by: Ján Tomko <jtomko@redhat.com> Jano

Test if our parsing of interface stats as returned by ovs-vsctl works as expected. To achieve this without having to mock virCommand* I'm separating parsing of stats into a separate function. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/libvirt_private.syms | 1 + src/util/virnetdevopenvswitch.c | 93 ++++++++++++------- src/util/virnetdevopenvswitch.h | 4 + tests/Makefile.am | 13 ++- tests/virnetdevopenvswitchdata/stats1.json | 1 + tests/virnetdevopenvswitchdata/stats2.json | 1 + tests/virnetdevopenvswitchtest.c | 101 +++++++++++++++++++++ 7 files changed, 177 insertions(+), 37 deletions(-) create mode 100644 tests/virnetdevopenvswitchdata/stats1.json create mode 100644 tests/virnetdevopenvswitchdata/stats2.json create mode 100644 tests/virnetdevopenvswitchtest.c diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 7dfa5af3b3..4e77cf53ea 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -2501,6 +2501,7 @@ virNetDevOpenvswitchAddPort; virNetDevOpenvswitchGetMigrateData; virNetDevOpenvswitchGetVhostuserIfname; virNetDevOpenvswitchInterfaceGetMaster; +virNetDevOpenvswitchInterfaceParseStats; virNetDevOpenvswitchInterfaceStats; virNetDevOpenvswitchRemovePort; virNetDevOpenvswitchSetMigrateData; diff --git a/src/util/virnetdevopenvswitch.c b/src/util/virnetdevopenvswitch.c index 0fe64bedab..2afc30f485 100644 --- a/src/util/virnetdevopenvswitch.c +++ b/src/util/virnetdevopenvswitch.c @@ -299,49 +299,30 @@ int virNetDevOpenvswitchSetMigrateData(char *migrate, const char *ifname) return 0; } + /** - * virNetDevOpenvswitchInterfaceStats: - * @ifname: the name of the interface - * @stats: the retrieved domain interface stat + * virNetDevOpenvswitchInterfaceParseStats: + * @json: Input string in JSON format + * @stats: parsed stats * - * Retrieves the OVS interfaces stats + * For given input string @json parse interface statistics and store them into + * @stats. * - * Returns 0 in case of success or -1 in case of failure + * Returns: 0 on success, + * -1 otherwise (with error reported). */ int -virNetDevOpenvswitchInterfaceStats(const char *ifname, - virDomainInterfaceStatsPtr stats) +virNetDevOpenvswitchInterfaceParseStats(const char *json, + virDomainInterfaceStatsPtr stats) { - VIR_AUTOPTR(virCommand) cmd = NULL; - VIR_AUTOFREE(char *) output = NULL; VIR_AUTOPTR(virJSONValue) jsonStats = NULL; virJSONValuePtr jsonMap = NULL; size_t i; - cmd = virCommandNew(OVSVSCTL); - virNetDevOpenvswitchAddTimeout(cmd); - virCommandAddArgList(cmd, "--if-exists", "--format=list", "--data=json", - "--no-headings", "--columns=statistics", "list", - "Interface", ifname, NULL); - virCommandSetOutputBuffer(cmd, &output); + stats->rx_bytes = stats->rx_packets = stats->rx_errs = stats->rx_drop = -1; + stats->tx_bytes = stats->tx_packets = stats->tx_errs = stats->tx_drop = -1; - /* The above command returns either: - * 1) empty string if @ifname doesn't exist, or - * 2) a JSON array, for instance: - * ["map",[["collisions",0],["rx_bytes",0],["rx_crc_err",0],["rx_dropped",0], - * ["rx_errors",0],["rx_frame_err",0],["rx_over_err",0],["rx_packets",0], - * ["tx_bytes",12406],["tx_dropped",0],["tx_errors",0],["tx_packets",173]]] - */ - - if (virCommandRun(cmd, NULL) < 0 || - STREQ_NULLABLE(output, "")) { - /* no ovs-vsctl or interface 'ifname' doesn't exists in ovs */ - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("Interface not found")); - return -1; - } - - if (!(jsonStats = virJSONValueFromString(output)) || + if (!(jsonStats = virJSONValueFromString(json)) || !virJSONValueIsArray(jsonStats) || !(jsonMap = virJSONValueArrayGet(jsonStats, 1))) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", @@ -349,9 +330,6 @@ virNetDevOpenvswitchInterfaceStats(const char *ifname, return -1; } - stats->rx_bytes = stats->rx_packets = stats->rx_errs = stats->rx_drop = -1; - stats->tx_bytes = stats->tx_packets = stats->tx_errs = stats->tx_drop = -1; - for (i = 0; i < virJSONValueArraySize(jsonMap); i++) { virJSONValuePtr item = virJSONValueArrayGet(jsonMap, i); virJSONValuePtr jsonKey; @@ -392,6 +370,51 @@ virNetDevOpenvswitchInterfaceStats(const char *ifname, } } + return 0; +} + +/** + * virNetDevOpenvswitchInterfaceStats: + * @ifname: the name of the interface + * @stats: the retrieved domain interface stat + * + * Retrieves the OVS interfaces stats + * + * Returns 0 in case of success or -1 in case of failure + */ +int +virNetDevOpenvswitchInterfaceStats(const char *ifname, + virDomainInterfaceStatsPtr stats) +{ + VIR_AUTOPTR(virCommand) cmd = NULL; + VIR_AUTOFREE(char *) output = NULL; + + cmd = virCommandNew(OVSVSCTL); + virNetDevOpenvswitchAddTimeout(cmd); + virCommandAddArgList(cmd, "--if-exists", "--format=list", "--data=json", + "--no-headings", "--columns=statistics", "list", + "Interface", ifname, NULL); + virCommandSetOutputBuffer(cmd, &output); + + /* The above command returns either: + * 1) empty string if @ifname doesn't exist, or + * 2) a JSON array, for instance: + * ["map",[["collisions",0],["rx_bytes",0],["rx_crc_err",0],["rx_dropped",0], + * ["rx_errors",0],["rx_frame_err",0],["rx_over_err",0],["rx_packets",0], + * ["tx_bytes",12406],["tx_dropped",0],["tx_errors",0],["tx_packets",173]]] + */ + + if (virCommandRun(cmd, NULL) < 0 || + STREQ_NULLABLE(output, "")) { + /* no ovs-vsctl or interface 'ifname' doesn't exists in ovs */ + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Interface not found")); + return -1; + } + + if (virNetDevOpenvswitchInterfaceParseStats(output, stats) < 0) + return -1; + if (stats->rx_bytes == -1 && stats->rx_packets == -1 && stats->rx_errs == -1 && diff --git a/src/util/virnetdevopenvswitch.h b/src/util/virnetdevopenvswitch.h index 07496fb07d..5bc18f851f 100644 --- a/src/util/virnetdevopenvswitch.h +++ b/src/util/virnetdevopenvswitch.h @@ -49,6 +49,10 @@ int virNetDevOpenvswitchGetMigrateData(char **migrate, const char *ifname) int virNetDevOpenvswitchSetMigrateData(char *migrate, const char *ifname) ATTRIBUTE_NONNULL(2) ATTRIBUTE_RETURN_CHECK; +int virNetDevOpenvswitchInterfaceParseStats(const char *json, + virDomainInterfaceStatsPtr stats) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_RETURN_CHECK; + int virNetDevOpenvswitchInterfaceStats(const char *ifname, virDomainInterfaceStatsPtr stats) ATTRIBUTE_NONNULL(1) ATTRIBUTE_RETURN_CHECK; diff --git a/tests/Makefile.am b/tests/Makefile.am index 107f2de859..2cb78c1310 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -147,6 +147,7 @@ EXTRA_DIST = \ virmockstathelpers.c \ virnetdaemondata \ virnetdevtestdata \ + virnetdevopenvswitchdata \ virnetworkportxml2xmldata \ virnwfilterbindingxml2xmldata \ virpcitestdata \ @@ -1271,9 +1272,17 @@ virmacmaptest_SOURCES = \ virmacmaptest.c testutils.h testutils.c virmacmaptest_LDADD = $(LDADDS) -test_programs += virmacmaptest +virnetdevopenvswitchtest_SOURCES = \ + virnetdevopenvswitchtest.c testutils.h testutils.c +virnetdevopenvswitchtest_LDADD = $(LDADDS) + +test_programs += \ + virmacmaptest \ + virnetdevopenvswitchtest else ! WITH_YAJL -EXTRA_DIST += virmacmaptest.c +EXTRA_DIST += \ + virmacmaptest.c \ + virnetdevopenvswitchtest.c endif ! WITH_YAJL virnetdevtest_SOURCES = \ diff --git a/tests/virnetdevopenvswitchdata/stats1.json b/tests/virnetdevopenvswitchdata/stats1.json new file mode 100644 index 0000000000..1138c6271e --- /dev/null +++ b/tests/virnetdevopenvswitchdata/stats1.json @@ -0,0 +1 @@ +["map",[["collisions",1],["rx_bytes",2],["rx_crc_err",3],["rx_dropped",4],["rx_errors",5],["rx_frame_err",6],["rx_over_err",7],["rx_packets",8],["tx_bytes",9],["tx_dropped",10],["tx_errors",11],["tx_packets",12]]] diff --git a/tests/virnetdevopenvswitchdata/stats2.json b/tests/virnetdevopenvswitchdata/stats2.json new file mode 100644 index 0000000000..d84be7e011 --- /dev/null +++ b/tests/virnetdevopenvswitchdata/stats2.json @@ -0,0 +1 @@ +["map",[["collisions",0],["rx_bytes",0],["rx_crc_err",0],["rx_dropped",0],["rx_errors",0],["rx_frame_err",0],["rx_over_err",0],["rx_packets",0],["tx_bytes",12406],["tx_dropped",0],["tx_errors",0],["tx_packets",173]]] diff --git a/tests/virnetdevopenvswitchtest.c b/tests/virnetdevopenvswitchtest.c new file mode 100644 index 0000000000..f01e77cbba --- /dev/null +++ b/tests/virnetdevopenvswitchtest.c @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2019 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#include "testutils.h" +#include "virnetdevopenvswitch.h" + +#define VIR_FROM_THIS VIR_FROM_NONE + +typedef struct _InterfaceParseStatsData InterfaceParseStatsData; +struct _InterfaceParseStatsData { + const char *filename; + const virDomainInterfaceStatsStruct stats; +}; + + +static int +testInterfaceParseStats(const void *opaque) +{ + const InterfaceParseStatsData *data = opaque; + VIR_AUTOFREE(char *) filename = NULL; + VIR_AUTOFREE(char *) buf = NULL; + virDomainInterfaceStatsStruct actual; + + if (virAsprintf(&filename, "%s/virnetdevopenvswitchdata/%s", + abs_srcdir, data->filename) < 0) + return -1; + + if (virFileReadAll(filename, 1024, &buf) < 0) + return -1; + + if (virNetDevOpenvswitchInterfaceParseStats(buf, &actual) < 0) + return -1; + + if (memcmp(&actual, &data->stats, sizeof(actual)) != 0) { + fprintf(stderr, + "Expected stats: %lld %lld %lld %lld %lld %lld %lld %lld\n" + "Actual stats: %lld %lld %lld %lld %lld %lld %lld %lld", + data->stats.rx_bytes, + data->stats.rx_packets, + data->stats.rx_errs, + data->stats.rx_drop, + data->stats.tx_bytes, + data->stats.tx_packets, + data->stats.tx_errs, + data->stats.tx_drop, + actual.rx_bytes, + actual.rx_packets, + actual.rx_errs, + actual.rx_drop, + actual.tx_bytes, + actual.tx_packets, + actual.tx_errs, + actual.tx_drop); + + return -1; + } + + return 0; +} + + +static int +mymain(void) +{ + int ret = 0; + +#define TEST_INTERFACE_STATS(file, \ + rxBytes, rxPackets, rxErrs, rxDrop, \ + txBytes, txPackets, txErrs, txDrop) \ + do { \ + const InterfaceParseStatsData data = {.filename = file, .stats = { \ + rxBytes, rxPackets, rxErrs, rxDrop, \ + txBytes, txPackets, txErrs, txDrop}}; \ + if (virTestRun("Interface stats " file, testInterfaceParseStats, &data) < 0) \ + ret = -1; \ + } while (0) + + TEST_INTERFACE_STATS("stats1.json", 9, 12, 11, 10, 2, 8, 5, 4); + TEST_INTERFACE_STATS("stats2.json", 12406, 173, 0, 0, 0, 0, 0, 0); + + return ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE; +} + +VIR_TEST_MAIN(mymain); -- 2.21.0

On Tue, Jul 16, 2019 at 10:54:34AM +0200, Michal Privoznik wrote:
Test if our parsing of interface stats as returned by ovs-vsctl works as expected. To achieve this without having to mock virCommand* I'm separating parsing of stats into a separate function.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/libvirt_private.syms | 1 + src/util/virnetdevopenvswitch.c | 93 ++++++++++++------- src/util/virnetdevopenvswitch.h | 4 + tests/Makefile.am | 13 ++- tests/virnetdevopenvswitchdata/stats1.json | 1 + tests/virnetdevopenvswitchdata/stats2.json | 1 + tests/virnetdevopenvswitchtest.c | 101 +++++++++++++++++++++ 7 files changed, 177 insertions(+), 37 deletions(-) create mode 100644 tests/virnetdevopenvswitchdata/stats1.json create mode 100644 tests/virnetdevopenvswitchdata/stats2.json create mode 100644 tests/virnetdevopenvswitchtest.c
Reviewed-by: Ján Tomko <jtomko@redhat.com> Jano

I will optimize this code a bit in the next commit. But for that it is better if the code lives in a separate function. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> Reviewed-by: Ján Tomko <jtomko@redhat.com> --- src/util/vircommand.c | 52 ++++++++++++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/src/util/vircommand.c b/src/util/vircommand.c index c81ddfc0d0..6cd7cbe065 100644 --- a/src/util/vircommand.c +++ b/src/util/vircommand.c @@ -418,6 +418,37 @@ virExecCommon(virCommandPtr cmd, gid_t *groups, int ngroups) return ret; } +static int +virCommandMassClose(virCommandPtr cmd, + int childin, + int childout, + int childerr) +{ + int openmax = sysconf(_SC_OPEN_MAX); + int fd; + int tmpfd; + + if (openmax < 0) { + virReportSystemError(errno, "%s", + _("sysconf(_SC_OPEN_MAX) failed")); + return -1; + } + + for (fd = 3; fd < openmax; fd++) { + if (fd == childin || fd == childout || fd == childerr) + continue; + if (!virCommandFDIsSet(cmd, fd)) { + tmpfd = fd; + VIR_MASS_CLOSE(tmpfd); + } else if (virSetInherit(fd, true) < 0) { + virReportSystemError(errno, _("failed to preserve fd %d"), fd); + return -1; + } + } + + return 0; +} + /* * virExec: * @cmd virCommandPtr containing all information about the program to @@ -427,13 +458,12 @@ static int virExec(virCommandPtr cmd) { pid_t pid; - int null = -1, fd, openmax; + int null = -1; int pipeout[2] = {-1, -1}; int pipeerr[2] = {-1, -1}; int childin = cmd->infd; int childout = -1; int childerr = -1; - int tmpfd; VIR_AUTOFREE(char *) binarystr = NULL; const char *binary = NULL; int ret; @@ -539,23 +569,9 @@ virExec(virCommandPtr cmd) if (cmd->mask) umask(cmd->mask); ret = EXIT_CANCELED; - openmax = sysconf(_SC_OPEN_MAX); - if (openmax < 0) { - virReportSystemError(errno, "%s", - _("sysconf(_SC_OPEN_MAX) failed")); + + if (virCommandMassClose(cmd, childin, childout, childerr) < 0) goto fork_error; - } - for (fd = 3; fd < openmax; fd++) { - if (fd == childin || fd == childout || fd == childerr) - continue; - if (!virCommandFDIsSet(cmd, fd)) { - tmpfd = fd; - VIR_MASS_CLOSE(tmpfd); - } else if (virSetInherit(fd, true) < 0) { - virReportSystemError(errno, _("failed to preserve fd %d"), fd); - goto fork_error; - } - } if (prepareStdFd(childin, STDIN_FILENO) < 0) { virReportSystemError(errno, -- 2.21.0

When spawning a child process, between fork() and exec() we close all file descriptors and keep only those the caller wants us to pass onto the child. The problem is how we do that. Currently, we get the limit of opened files and then iterate through each one of them and either close() it or make it survive exec(). This approach is suboptimal (although, not that much in default configurations where the limit is pretty low - 1024). We have /proc where we can learn what FDs we hold open and thus we can selectively close only those. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/util/vircommand.c | 86 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 78 insertions(+), 8 deletions(-) diff --git a/src/util/vircommand.c b/src/util/vircommand.c index 6cd7cbe065..bfc6c15cfb 100644 --- a/src/util/vircommand.c +++ b/src/util/vircommand.c @@ -418,27 +418,97 @@ virExecCommon(virCommandPtr cmd, gid_t *groups, int ngroups) return ret; } +# ifdef __linux__ +/* On Linux, we can utilize procfs and read the table of opened + * FDs and selectively close only those FDs we don't want to pass + * onto child process (well, the one we will exec soon since this + * is called from the child). */ +static int +virCommandMassCloseGetFDsLinux(virCommandPtr cmd ATTRIBUTE_UNUSED, + virBitmapPtr fds) +{ + DIR *dp = NULL; + struct dirent *entry; + const char *dirName = "/proc/self/fd"; + int rc; + int ret = -1; + + if (virDirOpen(&dp, dirName) < 0) + return -1; + + while ((rc = virDirRead(dp, &entry, dirName)) > 0) { + int fd; + + if (virStrToLong_i(entry->d_name, NULL, 10, &fd) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("unable to parse FD: %s"), + entry->d_name); + goto cleanup; + } + + if (virBitmapSetBit(fds, fd) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("unable to set FD as open: %d"), + fd); + goto cleanup; + } + } + + if (rc < 0) + goto cleanup; + + ret = 0; + cleanup: + VIR_DIR_CLOSE(dp); + return ret; +} + +# else /* !__linux__ */ + +static int +virCommandMassCloseGetFDsGeneric(virCommandPtr cmd ATTRIBUTE_UNUSED, + virBitmapPtr fds) +{ + virBitmapSetAll(fds); + return 0; +} +# endif /* !__linux__ */ + static int virCommandMassClose(virCommandPtr cmd, int childin, int childout, int childerr) { + VIR_AUTOPTR(virBitmap) fds = NULL; int openmax = sysconf(_SC_OPEN_MAX); - int fd; - int tmpfd; + int fd = -1; - if (openmax < 0) { - virReportSystemError(errno, "%s", - _("sysconf(_SC_OPEN_MAX) failed")); + /* In general, it is not save to call malloc() between fork() and exec() + * because the child might have forked at the worst possible time, i.e. + * when another thread was in malloc() and thus held its lock. That is to + * say, POSIX does not mandate malloc() to be async-safe. Fortunately, + * glibc developers are aware of this and made malloc() async-safe. + * Therefore we can safely allocate memory here (and transitively call + * opendir/readdir) without a deadlock. */ + + if (!(fds = virBitmapNew(openmax))) + return -1; + +# ifdef __linux__ + if (virCommandMassCloseGetFDsLinux(cmd, fds) < 0) + return -1; +# else + if (virCommandMassCloseGetFDsGeneric(cmd, fds) < 0) return -1; - } +# endif - for (fd = 3; fd < openmax; fd++) { + fd = virBitmapNextSetBit(fds, -1); + for (; fd >= 0; fd = virBitmapNextSetBit(fds, fd)) { if (fd == childin || fd == childout || fd == childerr) continue; if (!virCommandFDIsSet(cmd, fd)) { - tmpfd = fd; + int tmpfd = fd; VIR_MASS_CLOSE(tmpfd); } else if (virSetInherit(fd, true) < 0) { virReportSystemError(errno, _("failed to preserve fd %d"), fd); -- 2.21.0

On Tue, Jul 16, 2019 at 10:54:36AM +0200, Michal Privoznik wrote:
When spawning a child process, between fork() and exec() we close all file descriptors and keep only those the caller wants us to pass onto the child. The problem is how we do that. Currently, we get the limit of opened files and then iterate through each one of them and either close() it or make it survive exec(). This approach is suboptimal (although, not that much in default configurations where the limit is pretty low - 1024). We have /proc where we can learn what FDs we hold open and thus we can selectively close only those.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/util/vircommand.c | 86 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 78 insertions(+), 8 deletions(-)
diff --git a/src/util/vircommand.c b/src/util/vircommand.c index 6cd7cbe065..bfc6c15cfb 100644 --- a/src/util/vircommand.c +++ b/src/util/vircommand.c @@ -418,27 +418,97 @@ virExecCommon(virCommandPtr cmd, gid_t *groups, int ngroups) static int virCommandMassClose(virCommandPtr cmd, int childin, int childout, int childerr) { + VIR_AUTOPTR(virBitmap) fds = NULL; int openmax = sysconf(_SC_OPEN_MAX); - int fd; - int tmpfd; + int fd = -1;
- if (openmax < 0) { - virReportSystemError(errno, "%s", - _("sysconf(_SC_OPEN_MAX) failed")); + /* In general, it is not save to call malloc() between fork() and exec()
s/save/safe/
+ * because the child might have forked at the worst possible time, i.e. + * when another thread was in malloc() and thus held its lock. That is to + * say, POSIX does not mandate malloc() to be async-safe. Fortunately, + * glibc developers are aware of this and made malloc() async-safe. + * Therefore we can safely allocate memory here (and transitively call + * opendir/readdir) without a deadlock. */ + + if (!(fds = virBitmapNew(openmax))) + return -1; + +# ifdef __linux__ + if (virCommandMassCloseGetFDsLinux(cmd, fds) < 0) + return -1; +# else + if (virCommandMassCloseGetFDsGeneric(cmd, fds) < 0) return -1; - } +# endif
- for (fd = 3; fd < openmax; fd++) { + fd = virBitmapNextSetBit(fds, -1); + for (; fd >= 0; fd = virBitmapNextSetBit(fds, fd)) {
fd >= 3 to make it match the previous behavior
if (fd == childin || fd == childout || fd == childerr) continue; if (!virCommandFDIsSet(cmd, fd)) { - tmpfd = fd; + int tmpfd = fd; VIR_MASS_CLOSE(tmpfd);
Reviewed-by: Ján Tomko <jtomko@redhat.com> Jano
participants (2)
-
Ján Tomko
-
Michal Privoznik