[PATCH 0/4] qemu: Fix saving of VMs and certain migration cases on root_squash NFS

See 3/4 for explanation. Peter Krempa (4): virfile: Modernize definition of virFileOpenForked/virFileOpenForceOwnerMode/virFileOpenAs virGetGroupList: Refactor and fix callers virFileOpenForked: Fix handling of return value from virSocketSendFD() NEWS: Mention migration/save bug on root_squash NFS NEWS.rst | 7 +++++ src/lxc/lxc_container.c | 4 +-- src/security/security_dac.c | 7 +---- src/util/vircommand.c | 3 +- src/util/virfile.c | 51 +++++++++++++++++---------------- src/util/virutil.c | 16 +++++------ tests/commandtest.c | 5 ++-- tools/virt-login-shell-helper.c | 3 +- 8 files changed, 49 insertions(+), 47 deletions(-) -- 2.45.1

Declare one argument per line and one variable per line and use boolean operators at the end of the line rather than at the beginning. Signed-off-by: Peter Krempa <pkrempa@redhat.com> --- src/util/virfile.c | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/src/util/virfile.c b/src/util/virfile.c index c769f7d650..f66ecd29a2 100644 --- a/src/util/virfile.c +++ b/src/util/virfile.c @@ -2330,8 +2330,12 @@ virFileAccessibleAs(const char *path, int mode, * opened as "fd" if it's not correct AND the flags say it should be * forced. */ static int -virFileOpenForceOwnerMode(const char *path, int fd, mode_t mode, - uid_t uid, gid_t gid, unsigned int flags) +virFileOpenForceOwnerMode(const char *path, + int fd, + mode_t mode, + uid_t uid, + gid_t gid, + unsigned int flags) { int ret = 0; struct stat st; @@ -2381,11 +2385,16 @@ virFileOpenForceOwnerMode(const char *path, int fd, mode_t mode, * buildVol backend function expects the file to be deleted on error. */ static int -virFileOpenForked(const char *path, int openflags, mode_t mode, - uid_t uid, gid_t gid, unsigned int flags) +virFileOpenForked(const char *path, + int openflags, + mode_t mode, + uid_t uid, + gid_t gid, + unsigned int flags) { pid_t pid; - int status = 0, ret = 0; + int status = 0; + int ret = 0; int recvfd_errno = 0; int fd = -1; int pair[2] = { -1, -1 }; @@ -2546,10 +2555,15 @@ virFileOpenForked(const char *path, int openflags, mode_t mode, * expects the file to be deleted on error. */ int -virFileOpenAs(const char *path, int openflags, mode_t mode, - uid_t uid, gid_t gid, unsigned int flags) +virFileOpenAs(const char *path, + int openflags, + mode_t mode, + uid_t uid, + gid_t gid, + unsigned int flags) { - int ret = 0, fd = -1; + int ret = 0; + int fd = -1; bool created = false; /* allow using -1 to mean "current value" */ @@ -2563,9 +2577,9 @@ virFileOpenAs(const char *path, int openflags, mode_t mode, if (!(flags & (VIR_FILE_OPEN_NOFORK|VIR_FILE_OPEN_FORK))) flags |= VIR_FILE_OPEN_NOFORK|VIR_FILE_OPEN_FORK; - if ((flags & VIR_FILE_OPEN_NOFORK) - || (geteuid() != 0) - || ((uid == 0) && (gid == 0))) { + if ((flags & VIR_FILE_OPEN_NOFORK) || + (geteuid() != 0) || + ((uid == 0) && (gid == 0))) { if ((fd = open(path, openflags, mode)) < 0) { ret = -errno; -- 2.45.1

On Wed, May 22, 2024 at 18:00:16 +0200, Peter Krempa wrote:
Declare one argument per line and one variable per line and use boolean operators at the end of the line rather than at the beginning.
Signed-off-by: Peter Krempa <pkrempa@redhat.com> --- src/util/virfile.c | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-)
Reviewed-by: Jiri Denemark <jdenemar@redhat.com>

Use contemporary style for declarations and automatic memory clearing for a helper string. Since the function can't fail any more, remove any mention of returning errno and remove error checks from all callers. Signed-off-by: Peter Krempa <pkrempa@redhat.com> --- src/lxc/lxc_container.c | 4 ++-- src/security/security_dac.c | 7 +------ src/util/vircommand.c | 3 +-- src/util/virfile.c | 8 -------- src/util/virutil.c | 16 ++++++++-------- tests/commandtest.c | 5 ++--- tools/virt-login-shell-helper.c | 3 +-- 7 files changed, 15 insertions(+), 31 deletions(-) diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c index 652697890f..7e460544fb 100644 --- a/src/lxc/lxc_container.c +++ b/src/lxc/lxc_container.c @@ -2059,8 +2059,8 @@ static int lxcContainerChild(void *data) /* TODO is it safe to call it here or should this call be moved in * front of the clone() as otherwise there might be a risk for a * deadlock */ - if ((ngroups = virGetGroupList(virCommandGetUID(cmd), virCommandGetGID(cmd), - &groups)) < 0) + ngroups = virGetGroupList(virCommandGetUID(cmd), virCommandGetGID(cmd), + &groups); goto cleanup; ret = 0; diff --git a/src/security/security_dac.c b/src/security/security_dac.c index 4e850e219e..669b90125c 100644 --- a/src/security/security_dac.c +++ b/src/security/security_dac.c @@ -524,14 +524,9 @@ static int virSecurityDACPreFork(virSecurityManager *mgr) { virSecurityDACData *priv = virSecurityManagerGetPrivateData(mgr); - int ngroups; g_clear_pointer(&priv->groups, g_free); - priv->ngroups = 0; - if ((ngroups = virGetGroupList(priv->user, priv->group, - &priv->groups)) < 0) - return -1; - priv->ngroups = ngroups; + priv->ngroups = virGetGroupList(priv->user, priv->group, &priv->groups); return 0; } diff --git a/src/util/vircommand.c b/src/util/vircommand.c index 81e74deee0..07bee9b12e 100644 --- a/src/util/vircommand.c +++ b/src/util/vircommand.c @@ -735,8 +735,7 @@ virExec(virCommand *cmd) childerr = null; } - if ((ngroups = virGetGroupList(cmd->uid, cmd->gid, &groups)) < 0) - goto cleanup; + ngroups = virGetGroupList(cmd->uid, cmd->gid, &groups); pid = virFork(); diff --git a/src/util/virfile.c b/src/util/virfile.c index f66ecd29a2..c4d22921ce 100644 --- a/src/util/virfile.c +++ b/src/util/virfile.c @@ -2285,8 +2285,6 @@ virFileAccessibleAs(const char *path, int mode, return access(path, mode); ngroups = virGetGroupList(uid, gid, &groups); - if (ngroups < 0) - return -1; pid = virFork(); @@ -2408,8 +2406,6 @@ virFileOpenForked(const char *path, * NFS servers. */ ngroups = virGetGroupList(uid, gid, &groups); - if (ngroups < 0) - return -errno; if (socketpair(AF_UNIX, SOCK_STREAM, 0, pair) < 0) { ret = -errno; @@ -2709,8 +2705,6 @@ virFileRemove(const char *path, gid = getegid(); ngroups = virGetGroupList(uid, gid, &groups); - if (ngroups < 0) - return -errno; pid = virFork(); @@ -2883,8 +2877,6 @@ virDirCreate(const char *path, gid = getegid(); ngroups = virGetGroupList(uid, gid, &groups); - if (ngroups < 0) - return -errno; pid = virFork(); diff --git a/src/util/virutil.c b/src/util/virutil.c index bd3bbe3f0d..dc5009f11d 100644 --- a/src/util/virutil.c +++ b/src/util/virutil.c @@ -880,14 +880,16 @@ VIR_WARNINGS_NO_POINTER_SIGN * storing a malloc'd result into @list. If uid is -1 or doesn't exist in the * system database querying of the supplementary groups is skipped. * - * Returns the size of the list on success, or -1 on failure with error - * reported and errno set. May not be called between fork and exec. + * Returns the size of the list. Doesn't have an error path. + * May not be called between fork and exec. * */ int -virGetGroupList(uid_t uid, gid_t gid, gid_t **list) +virGetGroupList(uid_t uid, + gid_t gid, + gid_t **list) { int ret = 0; - char *user = NULL; + g_autofree char *user = NULL; gid_t primary; *list = NULL; @@ -925,14 +927,12 @@ virGetGroupList(uid_t uid, gid_t gid, gid_t **list) for (i = 0; i < ret; i++) { if ((*list)[i] == gid) - goto cleanup; + return ret; } VIR_APPEND_ELEMENT(*list, i, gid); - ret = i; + return i; } - cleanup: - VIR_FREE(user); return ret; } diff --git a/tests/commandtest.c b/tests/commandtest.c index aa108ce583..08fbe1801a 100644 --- a/tests/commandtest.c +++ b/tests/commandtest.c @@ -908,9 +908,8 @@ static int test25(const void *unused G_GNUC_UNUSED) goto cleanup; } - if ((ngroups = virGetGroupList(virCommandGetUID(cmd), virCommandGetGID(cmd), - &groups)) < 0) - goto cleanup; + ngroups = virGetGroupList(virCommandGetUID(cmd), virCommandGetGID(cmd), + &groups); /* Now, fork and try to exec a nonexistent binary. */ pid = virFork(); diff --git a/tools/virt-login-shell-helper.c b/tools/virt-login-shell-helper.c index cb94c49720..1627ba85e5 100644 --- a/tools/virt-login-shell-helper.c +++ b/tools/virt-login-shell-helper.c @@ -260,8 +260,7 @@ main(int argc, char **argv) if (!(conf = virConfReadFile(login_shell_path, 0))) goto cleanup; - if ((ngroups = virGetGroupList(uid, gid, &groups)) < 0) - goto cleanup; + ngroups = virGetGroupList(uid, gid, &groups); if (virLoginShellAllowedUser(conf, name, groups, ngroups) < 0) goto cleanup; -- 2.45.1

On Wed, May 22, 2024 at 18:00:17 +0200, Peter Krempa wrote:
Use contemporary style for declarations and automatic memory clearing for a helper string.
Since the function can't fail any more, remove any mention of returning errno and remove error checks from all callers.
Signed-off-by: Peter Krempa <pkrempa@redhat.com> --- src/lxc/lxc_container.c | 4 ++-- src/security/security_dac.c | 7 +------ src/util/vircommand.c | 3 +-- src/util/virfile.c | 8 -------- src/util/virutil.c | 16 ++++++++-------- tests/commandtest.c | 5 ++--- tools/virt-login-shell-helper.c | 3 +-- 7 files changed, 15 insertions(+), 31 deletions(-)
diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c index 652697890f..7e460544fb 100644 --- a/src/lxc/lxc_container.c +++ b/src/lxc/lxc_container.c @@ -2059,8 +2059,8 @@ static int lxcContainerChild(void *data) /* TODO is it safe to call it here or should this call be moved in * front of the clone() as otherwise there might be a risk for a * deadlock */ - if ((ngroups = virGetGroupList(virCommandGetUID(cmd), virCommandGetGID(cmd), - &groups)) < 0) + ngroups = virGetGroupList(virCommandGetUID(cmd), virCommandGetGID(cmd), + &groups); goto cleanup;
Looks like leftover goto here.
ret = 0;
... Reviewed-by: Jiri Denemark <jdenemar@redhat.com>

Commit 91f4ebbac81bc3829da6d5a71d7520a6fc9e358e (v10.0.0-185-g91f4ebbac8) changed the return value of virSocketSendFD() from 0 to 1 on success. Unfortunately in 'virFileOpenForked' the return value was used to report the error back to the main process from the fork'd child. As process return codes are positive only, the code negates the value of 'ret' and reports it. This resulted in the parent thinking the process exited with failure: # virsh save avocado-vt-vm1 /mnt/save error: Failed to save domain 'avocado-vt-vm1' to /mnt/save error: Error from child process creating '/mnt/save': Unknown error 255 This error reproduces on NFS mounts with 'root_squash' enabled. I've also observed it in one specific migration case when root_squash NFS is used with following error: Failed to open file '/var/lib/libvirt/images/alpine.qcow2': Unknown error 255' To fix the issue the code is refactored so that it doesn't actually touch the 'ret' variable needlessly and assigns to it only on failure cases, which prevents the '1' to be propagated to the parent process as '255' after negating and storing in the process return code. Fixes: 91f4ebbac81bc3829da6d5a71d7520a6fc9e358e Resolves: https://issues.redhat.com/browse/RHEL-36721 Signed-off-by: Peter Krempa <pkrempa@redhat.com> --- src/util/virfile.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/util/virfile.c b/src/util/virfile.c index c4d22921ce..d820172405 100644 --- a/src/util/virfile.c +++ b/src/util/virfile.c @@ -2441,8 +2441,7 @@ virFileOpenForked(const char *path, created = true; /* File is successfully open. Set permissions if requested. */ - ret = virFileOpenForceOwnerMode(path, fd, mode, uid, gid, flags); - if (ret < 0) { + if (virFileOpenForceOwnerMode(path, fd, mode, uid, gid, flags) < 0) { ret = -errno; virReportSystemError(errno, _("child process failed to force owner mode file '%1$s'"), @@ -2450,9 +2449,7 @@ virFileOpenForked(const char *path, goto childerror; } - ret = virSocketSendFD(pair[1], fd); - - if (ret < 0) { + if (virSocketSendFD(pair[1], fd) < 0) { ret = -errno; virReportSystemError(errno, "%s", _("child process failed to send fd to parent")); -- 2.45.1

On Wed, May 22, 2024 at 18:00:18 +0200, Peter Krempa wrote:
Commit 91f4ebbac81bc3829da6d5a71d7520a6fc9e358e (v10.0.0-185-g91f4ebbac8) changed the return value of virSocketSendFD() from 0 to 1 on success.
Unfortunately in 'virFileOpenForked' the return value was used to report the error back to the main process from the fork'd child. As process return codes are positive only, the code negates the value of 'ret' and reports it. This resulted in the parent thinking the process exited with failure:
# virsh save avocado-vt-vm1 /mnt/save error: Failed to save domain 'avocado-vt-vm1' to /mnt/save error: Error from child process creating '/mnt/save': Unknown error 255
This error reproduces on NFS mounts with 'root_squash' enabled. I've also observed it in one specific migration case when root_squash NFS is used with following error:
Failed to open file '/var/lib/libvirt/images/alpine.qcow2': Unknown error 255'
To fix the issue the code is refactored so that it doesn't actually touch the 'ret' variable needlessly and assigns to it only on failure cases, which prevents the '1' to be propagated to the parent process as '255' after negating and storing in the process return code.
Fixes: 91f4ebbac81bc3829da6d5a71d7520a6fc9e358e Resolves: https://issues.redhat.com/browse/RHEL-36721 Signed-off-by: Peter Krempa <pkrempa@redhat.com> --- src/util/virfile.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-)
Reviewed-by: Jiri Denemark <jdenemar@redhat.com>

Signed-off-by: Peter Krempa <pkrempa@redhat.com> --- NEWS.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/NEWS.rst b/NEWS.rst index b6985980ba..4a532bb673 100644 --- a/NEWS.rst +++ b/NEWS.rst @@ -51,7 +51,14 @@ v10.4.0 (unreleased) $ virsh help list + * qemu: Fix ``virsh save`` and migration when storage in question is root_squashed NFS + Attempting to save a VM to a root_squash NFS mount or migrating with disks + hosted on such mount could, in some scenarios, result in error stating:: + + 'Unknown error 255' + + The bug was introduced in `v10.1.0 (2024-03-01)`_. v10.3.0 (2024-05-02) ==================== -- 2.45.1

On Wed, May 22, 2024 at 18:00:19 +0200, Peter Krempa wrote:
Signed-off-by: Peter Krempa <pkrempa@redhat.com> --- NEWS.rst | 7 +++++++ 1 file changed, 7 insertions(+)
diff --git a/NEWS.rst b/NEWS.rst index b6985980ba..4a532bb673 100644 --- a/NEWS.rst +++ b/NEWS.rst @@ -51,7 +51,14 @@ v10.4.0 (unreleased)
$ virsh help list
+ * qemu: Fix ``virsh save`` and migration when storage in question is root_squashed NFS
+ Attempting to save a VM to a root_squash NFS mount or migrating with disks + hosted on such mount could, in some scenarios, result in error stating:: + + 'Unknown error 255' + + The bug was introduced in `v10.1.0 (2024-03-01)`_.
One more empty line is needed here. Before this patch were three empty lines (too many) and now only one :-)
v10.3.0 (2024-05-02) ====================
Reviewed-by: Jiri Denemark <jdenemar@redhat.com>
participants (2)
-
Jiri Denemark
-
Peter Krempa