[PATCH] libvirt-guests.sh: fix log output with new systemd

Newer systemd is too smart, it detects the PIDs of the gettext calls and due to that a log lists libvirt-guests.sh with many different PIDs. Furthermore it struggles to collect the output in time, so it can be truncated and overall looks like: libvirt-guests.sh[37986]: Running guests on default URI: libvirt-guests.sh[37995]: Running guests on libvirt-guests.sh[37977]: R Gather the gettext result into a local variable and printing the value from libvirt-guests.sh itself fixes both issues. Fixes: https://bugs.launchpad.net/ubuntu/+source/libvirt/+bug/1875708 Signed-off-by: Christian Ehrhardt <christian.ehrhardt@canonical.com> --- tools/libvirt-guests.sh.in | 74 +++++++++++++++++++++----------------- 1 file changed, 42 insertions(+), 32 deletions(-) diff --git a/tools/libvirt-guests.sh.in b/tools/libvirt-guests.sh.in index 7af24dab3b..324abe3623 100644 --- a/tools/libvirt-guests.sh.in +++ b/tools/libvirt-guests.sh.in @@ -28,6 +28,16 @@ test ! -r "$sysconfdir"/rc.d/init.d/functions || # Make sure this file is recognized as having translations: _("dummy") . "@bindir@"/gettext.sh +# Avoid output being listed under gettext PID and being truncated +unbuffered_gettext () { + msg="$(gettext "$1")" + echo "$msg" +} +unbuffered_eval_gettext () { + msg="$(eval_gettext "$1")" + echo "$msg" +} + export TEXTDOMAIN="@PACKAGE@" TEXTDOMAINDIR="@localedir@" URIS=default @@ -91,7 +101,7 @@ test_connect() if run_virsh "$uri" connect 2>/dev/null; then return 0; else - eval_gettext "Can't connect to \$uri. Skipping." + unbuffered_eval_gettext "Can't connect to \$uri. Skipping." return 1 fi } @@ -160,7 +170,7 @@ start() { [ -f "$LISTFILE" ] || { started; return 0; } if [ "x$ON_BOOT" != xstart ]; then - gettext "libvirt-guests is configured not to start any guests on boot" + unbuffered_gettext "libvirt-guests is configured not to start any guests on boot" echo rm -f "$LISTFILE" started @@ -184,19 +194,19 @@ start() { done set +f if ! "$configured"; then - eval_gettext "Ignoring guests on \$uri URI"; echo + unbuffered_eval_gettext "Ignoring guests on \$uri URI"; echo continue fi test_connect "$uri" || continue - eval_gettext "Resuming guests on \$uri URI..."; echo + unbuffered_eval_gettext "Resuming guests on \$uri URI..."; echo for guest in $list; do local name=$(guest_name "$uri" "$guest") - eval_gettext "Resuming guest \$name: " + unbuffered_eval_gettext "Resuming guest \$name: " if guest_is_on "$uri" "$guest"; then if "$guest_running"; then - gettext "already active"; echo + unbuffered_gettext "already active"; echo else if "$isfirst"; then isfirst=false @@ -205,7 +215,7 @@ start() { fi retval run_virsh "$uri" start $bypass "$name" \ >/dev/null && \ - gettext "done"; echo + unbuffered_gettext "done"; echo if "$sync_time"; then run_virsh "$uri" domtime --sync "$name" >/dev/null fi @@ -226,7 +236,7 @@ suspend_guest() local uri=$1 local guest=$2 local name=$(guest_name "$uri" "$guest") - local label=$(eval_gettext "Suspending \$name: ") + local label=$(unbuffered_eval_gettext "Suspending \$name: ") local bypass= local slept=0 @@ -249,7 +259,7 @@ suspend_guest() fi fi done - retval wait "$virsh_pid" && printf '%s%s\n' "$label" "$(gettext "done")" + retval wait "$virsh_pid" && printf '%s%s\n' "$label" "$(unbuffered_gettext "done")" } # shutdown_guest URI GUEST @@ -265,16 +275,16 @@ shutdown_guest() local format= local slept= - eval_gettext "Starting shutdown on guest: \$name" + unbuffered_eval_gettext "Starting shutdown on guest: \$name" echo retval run_virsh "$uri" shutdown "$guest" >/dev/null || return if [ $timeout -gt 0 ]; then check_timeout=true - format=$(eval_gettext "Waiting for guest %s to shut down, %d seconds left\n") + format=$(unbuffered_eval_gettext "Waiting for guest %s to shut down, %d seconds left\n") else slept=0 - format=$(eval_gettext "Waiting for guest %s to shut down\n") + format=$(unbuffered_eval_gettext "Waiting for guest %s to shut down\n") fi while ! $check_timeout || [ "$timeout" -gt 0 ]; do sleep 1 @@ -296,9 +306,9 @@ shutdown_guest() if guest_is_on "$uri" "$guest"; then if "$guest_running"; then - eval_gettext "Shutdown of guest \$name failed to complete in time." + unbuffered_eval_gettext "Shutdown of guest \$name failed to complete in time." else - eval_gettext "Shutdown of guest \$name complete." + unbuffered_eval_gettext "Shutdown of guest \$name complete." fi echo fi @@ -313,7 +323,7 @@ shutdown_guest_async() local guest=$2 local name=$(guest_name "$uri" "$guest") - eval_gettext "Starting shutdown on guest: \$name" + unbuffered_eval_gettext "Starting shutdown on guest: \$name" echo retval run_virsh "$uri" shutdown "$guest" > /dev/null } @@ -339,7 +349,7 @@ check_guests_shutdown() guests_shutting_down= for guest in $guests_to_check; do if ! guest_is_on "$uri" "$guest" >/dev/null 2>&1; then - eval_gettext "Failed to determine state of guest: \$guest. Not tracking it anymore." + unbuffered_eval_gettext "Failed to determine state of guest: \$guest. Not tracking it anymore." echo continue fi @@ -366,7 +376,7 @@ print_guests_shutdown() local name=$(guest_name "$uri" "$guest") if [ -n "$name" ]; then - eval_gettext "Shutdown of guest \$name complete." + unbuffered_eval_gettext "Shutdown of guest \$name complete." echo fi done @@ -386,10 +396,10 @@ shutdown_guests_parallel() if [ $timeout -gt 0 ]; then check_timeout=true - format=$(eval_gettext "Waiting for %d guests to shut down, %d seconds left\n") + format=$(unbuffered_eval_gettext "Waiting for %d guests to shut down, %d seconds left\n") else slept=0 - format=$(eval_gettext "Waiting for %d guests to shut down\n") + format=$(unbuffered_eval_gettext "Waiting for %d guests to shut down\n") fi while [ -n "$on_shutdown" ] || [ -n "$guests" ]; do while [ -n "$guests" ] && @@ -417,7 +427,7 @@ shutdown_guests_parallel() fi timeout=$(($timeout - 1)) if [ $timeout -le 0 ]; then - eval_gettext "Timeout expired while shutting down domains"; echo + unbuffered_eval_gettext "Timeout expired while shutting down domains"; echo RETVAL=1 return fi @@ -447,7 +457,7 @@ stop() { if [ "/x$ON_SHUTDOWN" = xshutdown ]; then suspending=false if [ $SHUTDOWN_TIMEOUT -lt 0 ]; then - gettext "SHUTDOWN_TIMEOUT must be equal or greater than 0" + unbuffered_gettext "SHUTDOWN_TIMEOUT must be equal or greater than 0" echo RETVAL=6 return @@ -461,7 +471,7 @@ stop() { test_connect "$uri" || continue - eval_gettext "Running guests on \$uri URI: " + unbuffered_eval_gettext "Running guests on \$uri URI: " local list=$(list_guests "$uri") if [ $? -eq 0 ]; then @@ -473,7 +483,7 @@ stop() { done if "$empty"; then - gettext "no running guests." + unbuffered_gettext "no running guests." fi echo fi @@ -486,7 +496,7 @@ stop() { for uuid in $transient; do if "$empty"; then - eval_gettext "Not suspending transient guests on URI: \$uri: " + unbuffered_eval_gettext "Not suspending transient guests on URI: \$uri: " empty=false else printf ", " @@ -497,14 +507,14 @@ stop() { # reload domain list to contain only persistent guests list=$(list_guests "$uri" "--persistent") if [ $? -ne 0 ]; then - eval_gettext "Failed to list persistent guests on \$uri" + unbuffered_eval_gettext "Failed to list persistent guests on \$uri" echo RETVAL=1 set +f return fi else - gettext "Failed to list transient guests" + unbuffered_gettext "Failed to list transient guests" echo RETVAL=1 set +f @@ -521,9 +531,9 @@ stop() { if [ -s "$LISTFILE" ]; then while read uri list; do if "$suspending"; then - eval_gettext "Suspending guests on \$uri URI..."; echo + unbuffered_eval_gettext "Suspending guests on \$uri URI..."; echo else - eval_gettext "Shutting down guests on \$uri URI..."; echo + unbuffered_eval_gettext "Shutting down guests on \$uri URI..."; echo fi if [ "$PARALLEL_SHUTDOWN" -gt 1 ] && @@ -566,14 +576,14 @@ gueststatus() { # since there is no external daemon process matching this init script. rh_status() { if [ -f "$LISTFILE" ]; then - gettext "stopped, with saved guests"; echo + unbuffered_gettext "stopped, with saved guests"; echo RETVAL=3 else if [ -f "$VAR_SUBSYS_LIBVIRT_GUESTS" ]; then - gettext "started"; echo + unbuffered_gettext "started"; echo RETVAL=0 else - gettext "stopped, with no saved guests"; echo + unbuffered_gettext "stopped, with no saved guests"; echo RETVAL=3 fi fi @@ -583,7 +593,7 @@ rh_status() { # Display usage string, then exit with VAL (defaults to 2). usage() { local program_name=$0 - eval_gettext "Usage: \$program_name {start|stop|status|restart|"\ + unbuffered_eval_gettext "Usage: \$program_name {start|stop|status|restart|"\ "condrestart|try-restart|reload|force-reload|gueststatus|shutdown}"; echo exit ${1-2} } -- 2.26.0

On 4/30/20 11:59 AM, Christian Ehrhardt wrote:
Newer systemd is too smart, it detects the PIDs of the gettext calls and due to that a log lists libvirt-guests.sh with many different PIDs.
Furthermore it struggles to collect the output in time, so it can be truncated and overall looks like: libvirt-guests.sh[37986]: Running guests on default URI: libvirt-guests.sh[37995]: Running guests on libvirt-guests.sh[37977]: R
Gather the gettext result into a local variable and printing the value from libvirt-guests.sh itself fixes both issues.
Fixes: https://bugs.launchpad.net/ubuntu/+source/libvirt/+bug/1875708
Signed-off-by: Christian Ehrhardt <christian.ehrhardt@canonical.com> --- tools/libvirt-guests.sh.in | 74 +++++++++++++++++++++----------------- 1 file changed, 42 insertions(+), 32 deletions(-)
diff --git a/tools/libvirt-guests.sh.in b/tools/libvirt-guests.sh.in index 7af24dab3b..324abe3623 100644 --- a/tools/libvirt-guests.sh.in +++ b/tools/libvirt-guests.sh.in @@ -28,6 +28,16 @@ test ! -r "$sysconfdir"/rc.d/init.d/functions || # Make sure this file is recognized as having translations: _("dummy") . "@bindir@"/gettext.sh
+# Avoid output being listed under gettext PID and being truncated +unbuffered_gettext () { + msg="$(gettext "$1")" + echo "$msg" +} +unbuffered_eval_gettext () { + msg="$(eval_gettext "$1")" + echo "$msg" +} +
Ah, is this the case where gettext prints the text in many sequences whereas echo does it all at once? Or what do you think is happening here? I don't quite understand how buffering steps into this. Michal

On Thu, Apr 30, 2020 at 2:10 PM Michal Privoznik <mprivozn@redhat.com> wrote:
On 4/30/20 11:59 AM, Christian Ehrhardt wrote:
Newer systemd is too smart, it detects the PIDs of the gettext calls and due to that a log lists libvirt-guests.sh with many different PIDs.
Furthermore it struggles to collect the output in time, so it can be truncated and overall looks like: libvirt-guests.sh[37986]: Running guests on default URI: libvirt-guests.sh[37995]: Running guests on libvirt-guests.sh[37977]: R
Gather the gettext result into a local variable and printing the value from libvirt-guests.sh itself fixes both issues.
Fixes: https://bugs.launchpad.net/ubuntu/+source/libvirt/+bug/1875708
Signed-off-by: Christian Ehrhardt <christian.ehrhardt@canonical.com> --- tools/libvirt-guests.sh.in | 74 +++++++++++++++++++++----------------- 1 file changed, 42 insertions(+), 32 deletions(-)
diff --git a/tools/libvirt-guests.sh.in b/tools/libvirt-guests.sh.in index 7af24dab3b..324abe3623 100644 --- a/tools/libvirt-guests.sh.in +++ b/tools/libvirt-guests.sh.in @@ -28,6 +28,16 @@ test ! -r "$sysconfdir"/rc.d/init.d/functions || # Make sure this file is recognized as having translations: _("dummy") . "@bindir@"/gettext.sh
+# Avoid output being listed under gettext PID and being truncated +unbuffered_gettext () { + msg="$(gettext "$1")" + echo "$msg" +} +unbuffered_eval_gettext () { + msg="$(eval_gettext "$1")" + echo "$msg" +} +
Ah, is this the case where gettext prints the text in many sequences whereas echo does it all at once? Or what do you think is happening here? I don't quite understand how buffering steps into this.
You have two effects: - systemd tries to detect and report which PID the output is from when something it printed. Therefore it appears to a user as if the script itself would have been invoked multiple times while actually is will be the PIDs of the different gettext calls - I assume that to get this content it is hitchhiking the output streams somehow - the gettext processes are very very short lived and it seems when they are exiting some output of them might be lost. Maybe calling it "buffering" was wrong, but the change above resolves both issues, the odd PIDs and the truncation by printing from the actual script.
Michal
-- Christian Ehrhardt Staff Engineer, Ubuntu Server Canonical Ltd

On Thu, Apr 30, 2020 at 02:42:21PM +0200, Christian Ehrhardt wrote:
On Thu, Apr 30, 2020 at 2:10 PM Michal Privoznik <mprivozn@redhat.com> wrote:
On 4/30/20 11:59 AM, Christian Ehrhardt wrote:
Newer systemd is too smart, it detects the PIDs of the gettext calls and due to that a log lists libvirt-guests.sh with many different PIDs.
Furthermore it struggles to collect the output in time, so it can be truncated and overall looks like: libvirt-guests.sh[37986]: Running guests on default URI: libvirt-guests.sh[37995]: Running guests on libvirt-guests.sh[37977]: R
Gather the gettext result into a local variable and printing the value from libvirt-guests.sh itself fixes both issues.
Fixes: https://bugs.launchpad.net/ubuntu/+source/libvirt/+bug/1875708
Signed-off-by: Christian Ehrhardt <christian.ehrhardt@canonical.com> --- tools/libvirt-guests.sh.in | 74 +++++++++++++++++++++----------------- 1 file changed, 42 insertions(+), 32 deletions(-)
diff --git a/tools/libvirt-guests.sh.in b/tools/libvirt-guests.sh.in index 7af24dab3b..324abe3623 100644 --- a/tools/libvirt-guests.sh.in +++ b/tools/libvirt-guests.sh.in @@ -28,6 +28,16 @@ test ! -r "$sysconfdir"/rc.d/init.d/functions || # Make sure this file is recognized as having translations: _("dummy") . "@bindir@"/gettext.sh
+# Avoid output being listed under gettext PID and being truncated +unbuffered_gettext () { + msg="$(gettext "$1")" + echo "$msg" +} +unbuffered_eval_gettext () { + msg="$(eval_gettext "$1")" + echo "$msg" +} +
Ah, is this the case where gettext prints the text in many sequences whereas echo does it all at once? Or what do you think is happening here? I don't quite understand how buffering steps into this.
You have two effects: - systemd tries to detect and report which PID the output is from when something it printed. Therefore it appears to a user as if the script itself would have been invoked multiple times while actually is will be the PIDs of the different gettext calls - I assume that to get this content it is hitchhiking the output streams somehow - the gettext processes are very very short lived and it seems when they are exiting some output of them might be lost.
This doesn't pass a sniff test to me. systemd is reading stdout/stderr of the libvirt-guests script. How would it ever tell that certain lines from stdout/err are coming from different PIDs. Systemd isn't involved in spawning the gettext processes. It can see they come & go via cgroups pids, but that's about it. Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|

On Thu, Apr 30, 2020 at 2:51 PM Daniel P. Berrangé <berrange@redhat.com> wrote:
On Thu, Apr 30, 2020 at 02:42:21PM +0200, Christian Ehrhardt wrote:
On Thu, Apr 30, 2020 at 2:10 PM Michal Privoznik <mprivozn@redhat.com> wrote:
On 4/30/20 11:59 AM, Christian Ehrhardt wrote:
Newer systemd is too smart, it detects the PIDs of the gettext calls and due to that a log lists libvirt-guests.sh with many different PIDs.
Furthermore it struggles to collect the output in time, so it can be truncated and overall looks like: libvirt-guests.sh[37986]: Running guests on default URI: libvirt-guests.sh[37995]: Running guests on libvirt-guests.sh[37977]: R
Gather the gettext result into a local variable and printing the value from libvirt-guests.sh itself fixes both issues.
Fixes: https://bugs.launchpad.net/ubuntu/+source/libvirt/+bug/1875708
Signed-off-by: Christian Ehrhardt <christian.ehrhardt@canonical.com> --- tools/libvirt-guests.sh.in | 74 +++++++++++++++++++++----------------- 1 file changed, 42 insertions(+), 32 deletions(-)
diff --git a/tools/libvirt-guests.sh.in b/tools/libvirt-guests.sh.in index 7af24dab3b..324abe3623 100644 --- a/tools/libvirt-guests.sh.in +++ b/tools/libvirt-guests.sh.in @@ -28,6 +28,16 @@ test ! -r "$sysconfdir"/rc.d/init.d/functions || # Make sure this file is recognized as having translations: _("dummy") . "@bindir@"/gettext.sh
+# Avoid output being listed under gettext PID and being truncated +unbuffered_gettext () { + msg="$(gettext "$1")" + echo "$msg" +} +unbuffered_eval_gettext () { + msg="$(eval_gettext "$1")" + echo "$msg" +} +
Ah, is this the case where gettext prints the text in many sequences whereas echo does it all at once? Or what do you think is happening here? I don't quite understand how buffering steps into this.
You have two effects: - systemd tries to detect and report which PID the output is from when something it printed. Therefore it appears to a user as if the script itself would have been invoked multiple times while actually is will be the PIDs of the different gettext calls - I assume that to get this content it is hitchhiking the output streams somehow - the gettext processes are very very short lived and it seems when they are exiting some output of them might be lost.
This doesn't pass a sniff test to me.
systemd is reading stdout/stderr of the libvirt-guests script. How would it ever tell that certain lines from stdout/err are coming from different PIDs. Systemd isn't involved in spawning the gettext processes. It can see they come & go via cgroups pids, but that's about it.
Well it seems I have a reproducible symptom and a fix, but not the explanation why the latter fixes the former. I'll need to dive into some debug & analysis myself to explain it better. I'll be back here once I got time to do that in depth check. Until then whoever is affected (should be everyone) can give it a thought as well. Repro is as easy as One console: $ journalctl -f -u libvirt-guests Other console: $ systemctl stop libvirt-guests $ systemctl start libvirt-guests I see it with 245.4-4ubuntu3 (18.04) I'm not seeing it on 237-3ubuntu10.39 (20.04). Maybe it is a systemd bug after all? I'd be interested to hear if that is/isn't clobbering output for anyone else and what your systemd versions are?
Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
-- Christian Ehrhardt Staff Engineer, Ubuntu Server Canonical Ltd

On Thu, Apr 30, 2020 at 04:58:25PM +0200, Christian Ehrhardt wrote:
On Thu, Apr 30, 2020 at 2:51 PM Daniel P. Berrangé <berrange@redhat.com> wrote: Well it seems I have a reproducible symptom and a fix, but not the explanation why the latter fixes the former. I'll need to dive into some debug & analysis myself to explain it better. I'll be back here once I got time to do that in depth check.
Until then whoever is affected (should be everyone) can give it a thought as well. Repro is as easy as One console: $ journalctl -f -u libvirt-guests Other console: $ systemctl stop libvirt-guests $ systemctl start libvirt-guests
I see it with 245.4-4ubuntu3 (18.04) I'm not seeing it on 237-3ubuntu10.39 (20.04). Maybe it is a systemd bug after all? I'd be interested to hear if that is/isn't clobbering output for anyone else and what your systemd versions are?
FWIW, it works correctly on Fedora 31 with systemd 243. Apr 30 16:07:27 localhost.localdomain systemd[1]: Starting Suspend/Resume Running libvirt Guests... Apr 30 16:07:27 localhost.localdomain systemd[1]: Started Suspend/Resume Running libvirt Guests. Apr 30 16:08:33 localhost.localdomain systemd[1]: Stopping Suspend/Resume Running libvirt Guests... Apr 30 16:08:33 localhost.localdomain libvirt-guests.sh[2192485]: Running guests on default URI: memtest, memtest1 Apr 30 16:08:33 localhost.localdomain libvirt-guests.sh[2192485]: Suspending guests on default URI... Apr 30 16:08:33 localhost.localdomain libvirt-guests.sh[2192485]: Suspending memtest: ... Apr 30 16:08:34 localhost.localdomain libvirt-guests.sh[2192485]: Suspending memtest: done Apr 30 16:08:34 localhost.localdomain libvirt-guests.sh[2192485]: Suspending memtest1: ... Apr 30 16:08:35 localhost.localdomain libvirt-guests.sh[2192485]: Suspending memtest1: done Apr 30 16:08:35 localhost.localdomain systemd[1]: libvirt-guests.service: Succeeded. Apr 30 16:08:35 localhost.localdomain systemd[1]: Stopped Suspend/Resume Running libvirt Guests. I'll see if I can do a test with newer systemd too Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|

On Thu, Apr 30, 2020 at 5:10 PM Daniel P. Berrangé <berrange@redhat.com> wrote:
On Thu, Apr 30, 2020 at 04:58:25PM +0200, Christian Ehrhardt wrote:
On Thu, Apr 30, 2020 at 2:51 PM Daniel P. Berrangé <berrange@redhat.com> wrote: Well it seems I have a reproducible symptom and a fix, but not the explanation why the latter fixes the former. I'll need to dive into some debug & analysis myself to explain it better. I'll be back here once I got time to do that in depth check.
Until then whoever is affected (should be everyone) can give it a thought as well. Repro is as easy as One console: $ journalctl -f -u libvirt-guests Other console: $ systemctl stop libvirt-guests $ systemctl start libvirt-guests
I see it with 245.4-4ubuntu3 (18.04) I'm not seeing it on 237-3ubuntu10.39 (20.04). Maybe it is a systemd bug after all? I'd be interested to hear if that is/isn't clobbering output for anyone else and what your systemd versions are?
FWIW, it works correctly on Fedora 31 with systemd 243.
Eoan with 242-7ubuntu3.7 is good as well. I might need to try to get some interim versions from somewhere.
Apr 30 16:07:27 localhost.localdomain systemd[1]: Starting Suspend/Resume Running libvirt Guests... Apr 30 16:07:27 localhost.localdomain systemd[1]: Started Suspend/Resume Running libvirt Guests. Apr 30 16:08:33 localhost.localdomain systemd[1]: Stopping Suspend/Resume Running libvirt Guests... Apr 30 16:08:33 localhost.localdomain libvirt-guests.sh[2192485]: Running guests on default URI: memtest, memtest1 Apr 30 16:08:33 localhost.localdomain libvirt-guests.sh[2192485]: Suspending guests on default URI... Apr 30 16:08:33 localhost.localdomain libvirt-guests.sh[2192485]: Suspending memtest: ... Apr 30 16:08:34 localhost.localdomain libvirt-guests.sh[2192485]: Suspending memtest: done Apr 30 16:08:34 localhost.localdomain libvirt-guests.sh[2192485]: Suspending memtest1: ... Apr 30 16:08:35 localhost.localdomain libvirt-guests.sh[2192485]: Suspending memtest1: done Apr 30 16:08:35 localhost.localdomain systemd[1]: libvirt-guests.service: Succeeded. Apr 30 16:08:35 localhost.localdomain systemd[1]: Stopped Suspend/Resume Running libvirt Guests.
I'll see if I can do a test with newer systemd too
Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
-- Christian Ehrhardt Staff Engineer, Ubuntu Server Canonical Ltd

On Thu, Apr 30, 2020 at 06:28:08PM +0200, Christian Ehrhardt wrote:
On Thu, Apr 30, 2020 at 5:10 PM Daniel P. Berrangé <berrange@redhat.com> wrote:
On Thu, Apr 30, 2020 at 04:58:25PM +0200, Christian Ehrhardt wrote:
On Thu, Apr 30, 2020 at 2:51 PM Daniel P. Berrangé <berrange@redhat.com> wrote: Well it seems I have a reproducible symptom and a fix, but not the explanation why the latter fixes the former. I'll need to dive into some debug & analysis myself to explain it better. I'll be back here once I got time to do that in depth check.
Until then whoever is affected (should be everyone) can give it a thought as well. Repro is as easy as One console: $ journalctl -f -u libvirt-guests Other console: $ systemctl stop libvirt-guests $ systemctl start libvirt-guests
I see it with 245.4-4ubuntu3 (18.04) I'm not seeing it on 237-3ubuntu10.39 (20.04). Maybe it is a systemd bug after all? I'd be interested to hear if that is/isn't clobbering output for anyone else and what your systemd versions are?
FWIW, it works correctly on Fedora 31 with systemd 243.
Eoan with 242-7ubuntu3.7 is good as well. I might need to try to get some interim versions from somewhere.
I've reproduced on Fedora 33 rawhide with systemd 245 - the first place where it lists running guests is screwed up slightly: Apr 30 17:16:13 libvirt-fedora-rawhide systemd[1]: Stopping Suspend/Resume Running libvirt Guests... Apr 30 17:16:13 libvirt-fedora-rawhide libvirt-guests.sh[69903]: Running guests on default URI: Apr 30 17:16:13 libvirt-fedora-rawhide libvirt-guests.sh[69892]: Runningcore2 Apr 30 17:16:13 libvirt-fedora-rawhide libvirt-guests.sh[69934]: Suspending guests on default URI... Apr 30 17:16:13 libvirt-fedora-rawhide libvirt-guests.sh[69892]: SSuspending core1: ... Apr 30 17:16:14 libvirt-fedora-rawhide libvirt-guests.sh[69892]: Suspending core1: done Apr 30 17:16:14 libvirt-fedora-rawhide libvirt-guests.sh[69892]: Suspending core2: ... Apr 30 17:16:15 libvirt-fedora-rawhide libvirt-guests.sh[69892]: Suspending core2: done Apr 30 17:16:15 libvirt-fedora-rawhide systemd[1]: libvirt-guests.service: Succeeded. Apr 30 17:16:15 libvirt-fedora-rawhide systemd[1]: Stopped Suspend/Resume Running libvirt Guests. On resume it is even worse Apr 30 17:19:40 libvirt-fedora-rawhide systemd[1]: Starting Suspend/Resume Running libvirt Guests... Apr 30 17:19:40 libvirt-fedora-rawhide libvirt-guests.sh[70041]: Resuming guests on default URI... Apr 30 17:19:40 libvirt-fedora-rawhide libvirt-guests.sh[70030]: R Apr 30 17:19:40 libvirt-fedora-rawhide libvirt-guests.sh[70048]: R Apr 30 17:19:41 libvirt-fedora-rawhide libvirt-guests.sh[70048]: esuming guest core1: Apr 30 17:19:41 libvirt-fedora-rawhide libvirt-guests.sh[70079]: esum Apr 30 17:19:41 libvirt-fedora-rawhide libvirt-guests.sh[70030]: e Apr 30 17:19:41 libvirt-fedora-rawhide libvirt-guests.sh[70086]: e Apr 30 17:19:42 libvirt-fedora-rawhide libvirt-guests.sh[70086]: esuming guest core2: Apr 30 17:19:42 libvirt-fedora-rawhide libvirt-guests.sh[70119]: esum Apr 30 17:19:42 libvirt-fedora-rawhide libvirt-guests.sh[70030]: e Apr 30 17:19:42 libvirt-fedora-rawhide systemd[1]: Finished Suspend/Resume Running libvirt Guests. It is possible it isn't systemd related - could be other packages that are co-incidentally affecting it. Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|

On Thu, Apr 30, 2020 at 7:23 PM Daniel P. Berrangé <berrange@redhat.com> wrote:
On Thu, Apr 30, 2020 at 06:28:08PM +0200, Christian Ehrhardt wrote:
On Thu, Apr 30, 2020 at 5:10 PM Daniel P. Berrangé <berrange@redhat.com> wrote:
On Thu, Apr 30, 2020 at 04:58:25PM +0200, Christian Ehrhardt wrote:
On Thu, Apr 30, 2020 at 2:51 PM Daniel P. Berrangé <berrange@redhat.com> wrote: Well it seems I have a reproducible symptom and a fix, but not the explanation why the latter fixes the former. I'll need to dive into some debug & analysis myself to explain it better. I'll be back here once I got time to do that in depth check.
Until then whoever is affected (should be everyone) can give it a thought as well. Repro is as easy as One console: $ journalctl -f -u libvirt-guests Other console: $ systemctl stop libvirt-guests $ systemctl start libvirt-guests
I see it with 245.4-4ubuntu3 (18.04) I'm not seeing it on 237-3ubuntu10.39 (20.04). Maybe it is a systemd bug after all? I'd be interested to hear if that is/isn't clobbering output for anyone else and what your systemd versions are?
FWIW, it works correctly on Fedora 31 with systemd 243.
Eoan with 242-7ubuntu3.7 is good as well. I might need to try to get some interim versions from somewhere.
I've reproduced on Fedora 33 rawhide with systemd 245 - the first place where it lists running guests is screwed up slightly:
Apr 30 17:16:13 libvirt-fedora-rawhide systemd[1]: Stopping Suspend/Resume Running libvirt Guests... Apr 30 17:16:13 libvirt-fedora-rawhide libvirt-guests.sh[69903]: Running guests on default URI: Apr 30 17:16:13 libvirt-fedora-rawhide libvirt-guests.sh[69892]: Runningcore2 Apr 30 17:16:13 libvirt-fedora-rawhide libvirt-guests.sh[69934]: Suspending guests on default URI... Apr 30 17:16:13 libvirt-fedora-rawhide libvirt-guests.sh[69892]: SSuspending core1: ... Apr 30 17:16:14 libvirt-fedora-rawhide libvirt-guests.sh[69892]: Suspending core1: done Apr 30 17:16:14 libvirt-fedora-rawhide libvirt-guests.sh[69892]: Suspending core2: ... Apr 30 17:16:15 libvirt-fedora-rawhide libvirt-guests.sh[69892]: Suspending core2: done Apr 30 17:16:15 libvirt-fedora-rawhide systemd[1]: libvirt-guests.service: Succeeded. Apr 30 17:16:15 libvirt-fedora-rawhide systemd[1]: Stopped Suspend/Resume Running libvirt Guests.
On resume it is even worse
Apr 30 17:19:40 libvirt-fedora-rawhide systemd[1]: Starting Suspend/Resume Running libvirt Guests... Apr 30 17:19:40 libvirt-fedora-rawhide libvirt-guests.sh[70041]: Resuming guests on default URI... Apr 30 17:19:40 libvirt-fedora-rawhide libvirt-guests.sh[70030]: R Apr 30 17:19:40 libvirt-fedora-rawhide libvirt-guests.sh[70048]: R Apr 30 17:19:41 libvirt-fedora-rawhide libvirt-guests.sh[70048]: esuming guest core1: Apr 30 17:19:41 libvirt-fedora-rawhide libvirt-guests.sh[70079]: esum Apr 30 17:19:41 libvirt-fedora-rawhide libvirt-guests.sh[70030]: e Apr 30 17:19:41 libvirt-fedora-rawhide libvirt-guests.sh[70086]: e Apr 30 17:19:42 libvirt-fedora-rawhide libvirt-guests.sh[70086]: esuming guest core2: Apr 30 17:19:42 libvirt-fedora-rawhide libvirt-guests.sh[70119]: esum Apr 30 17:19:42 libvirt-fedora-rawhide libvirt-guests.sh[70030]: e Apr 30 17:19:42 libvirt-fedora-rawhide systemd[1]: Finished Suspend/Resume Running libvirt Guests.
It is possible it isn't systemd related - could be other packages that are co-incidentally affecting it.
Since launchpad holds all former builds still ready for download I could easily test a few versions. I up/downgraded just the following packages on an otherwise unmodified system: - libnss-systemd - libpam-systemd - libsystemd0 - systemd - systemd-container - systemd-sysv Those versions I did check: 245.4-4ubuntu3 bad 245.2-1ubuntu1 bad 244.3-1ubuntu1 bad 244.1-0ubuntu3 bad (bad on retry) 243-3ubuntu1 good (good (on retry) 242-7ubuntu3 good Retry means that I went and installed from good -> bad -> good and the behavior was the same. So none of the installs fixed it to then be good forever. It was consistently good <244 and bad >=244. Since I only installed the mentioned systemd packages and no others I'd say it is systemd. I can't break this up into which package, since they inter-depend each other. I guess it might be time to file a systemd bug for this - if not to fix then to understand what is going on so that we can make a better decision.
Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
-- Christian Ehrhardt Staff Engineer, Ubuntu Server Canonical Ltd

On Thu, Apr 30, 2020 at 07:36:54PM +0200, Christian Ehrhardt wrote:
On Thu, Apr 30, 2020 at 7:23 PM Daniel P. Berrangé <berrange@redhat.com> wrote:
On Thu, Apr 30, 2020 at 06:28:08PM +0200, Christian Ehrhardt wrote:
On Thu, Apr 30, 2020 at 5:10 PM Daniel P. Berrangé <berrange@redhat.com> wrote:
On Thu, Apr 30, 2020 at 04:58:25PM +0200, Christian Ehrhardt wrote:
On Thu, Apr 30, 2020 at 2:51 PM Daniel P. Berrangé <berrange@redhat.com> wrote: Well it seems I have a reproducible symptom and a fix, but not the explanation why the latter fixes the former. I'll need to dive into some debug & analysis myself to explain it better. I'll be back here once I got time to do that in depth check.
Until then whoever is affected (should be everyone) can give it a thought as well. Repro is as easy as One console: $ journalctl -f -u libvirt-guests Other console: $ systemctl stop libvirt-guests $ systemctl start libvirt-guests
I see it with 245.4-4ubuntu3 (18.04) I'm not seeing it on 237-3ubuntu10.39 (20.04). Maybe it is a systemd bug after all? I'd be interested to hear if that is/isn't clobbering output for anyone else and what your systemd versions are?
FWIW, it works correctly on Fedora 31 with systemd 243.
Eoan with 242-7ubuntu3.7 is good as well. I might need to try to get some interim versions from somewhere.
I've reproduced on Fedora 33 rawhide with systemd 245 - the first place where it lists running guests is screwed up slightly:
Apr 30 17:16:13 libvirt-fedora-rawhide systemd[1]: Stopping Suspend/Resume Running libvirt Guests... Apr 30 17:16:13 libvirt-fedora-rawhide libvirt-guests.sh[69903]: Running guests on default URI: Apr 30 17:16:13 libvirt-fedora-rawhide libvirt-guests.sh[69892]: Runningcore2 Apr 30 17:16:13 libvirt-fedora-rawhide libvirt-guests.sh[69934]: Suspending guests on default URI... Apr 30 17:16:13 libvirt-fedora-rawhide libvirt-guests.sh[69892]: SSuspending core1: ... Apr 30 17:16:14 libvirt-fedora-rawhide libvirt-guests.sh[69892]: Suspending core1: done Apr 30 17:16:14 libvirt-fedora-rawhide libvirt-guests.sh[69892]: Suspending core2: ... Apr 30 17:16:15 libvirt-fedora-rawhide libvirt-guests.sh[69892]: Suspending core2: done Apr 30 17:16:15 libvirt-fedora-rawhide systemd[1]: libvirt-guests.service: Succeeded. Apr 30 17:16:15 libvirt-fedora-rawhide systemd[1]: Stopped Suspend/Resume Running libvirt Guests.
On resume it is even worse
Apr 30 17:19:40 libvirt-fedora-rawhide systemd[1]: Starting Suspend/Resume Running libvirt Guests... Apr 30 17:19:40 libvirt-fedora-rawhide libvirt-guests.sh[70041]: Resuming guests on default URI... Apr 30 17:19:40 libvirt-fedora-rawhide libvirt-guests.sh[70030]: R Apr 30 17:19:40 libvirt-fedora-rawhide libvirt-guests.sh[70048]: R Apr 30 17:19:41 libvirt-fedora-rawhide libvirt-guests.sh[70048]: esuming guest core1: Apr 30 17:19:41 libvirt-fedora-rawhide libvirt-guests.sh[70079]: esum Apr 30 17:19:41 libvirt-fedora-rawhide libvirt-guests.sh[70030]: e Apr 30 17:19:41 libvirt-fedora-rawhide libvirt-guests.sh[70086]: e Apr 30 17:19:42 libvirt-fedora-rawhide libvirt-guests.sh[70086]: esuming guest core2: Apr 30 17:19:42 libvirt-fedora-rawhide libvirt-guests.sh[70119]: esum Apr 30 17:19:42 libvirt-fedora-rawhide libvirt-guests.sh[70030]: e Apr 30 17:19:42 libvirt-fedora-rawhide systemd[1]: Finished Suspend/Resume Running libvirt Guests.
It is possible it isn't systemd related - could be other packages that are co-incidentally affecting it.
Since launchpad holds all former builds still ready for download I could easily test a few versions.
I up/downgraded just the following packages on an otherwise unmodified system: - libnss-systemd - libpam-systemd - libsystemd0 - systemd - systemd-container - systemd-sysv
Those versions I did check:
245.4-4ubuntu3 bad 245.2-1ubuntu1 bad 244.3-1ubuntu1 bad 244.1-0ubuntu3 bad (bad on retry) 243-3ubuntu1 good (good (on retry) 242-7ubuntu3 good
Retry means that I went and installed from good -> bad -> good and the behavior was the same. So none of the installs fixed it to then be good forever. It was consistently good <244 and bad >=244.
Based on that, a possibly interesting change between v243..v244 was this: commit 09d0b46ab61bebafe5bdc1be95ee153dfb13d6bc Author: Lorenz Bauer <lmb@cloudflare.com> Date: Mon Nov 4 16:35:46 2019 +0000 journal: refresh cached credentials of stdout streams journald assumes that getsockopt(SO_PEERCRED) correctly identifies the process on the remote end of the socket. However, this is incorrect according to man 7 socket: The returned credentials are those that were in effect at the time of the call to connect(2) or socketpair(2). This becomes a problem when a new process inherits the stdout stream from a parent. First, log messages from the child process will be attributed to the parent. Second, the struct ucred used by journald becomes invalid as soon as the parent exits. Further sendmsg calls then fail with ENOENT. Logs for the child process then vanish from the journal. Fix this by using recvmsg on the stdout stream, and refreshing the cached struct ucred if SCM_CREDENTIALS indicate a new process. but I can't see an obvious mistake the code which would cause the behaviour we see. Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|

On Thu, Apr 30, 2020 at 7:59 PM Daniel P. Berrangé <berrange@redhat.com> wrote:
On Thu, Apr 30, 2020 at 07:36:54PM +0200, Christian Ehrhardt wrote:
On Thu, Apr 30, 2020 at 7:23 PM Daniel P. Berrangé <berrange@redhat.com> wrote:
On Thu, Apr 30, 2020 at 06:28:08PM +0200, Christian Ehrhardt wrote:
On Thu, Apr 30, 2020 at 5:10 PM Daniel P. Berrangé <berrange@redhat.com> wrote:
On Thu, Apr 30, 2020 at 04:58:25PM +0200, Christian Ehrhardt wrote:
On Thu, Apr 30, 2020 at 2:51 PM Daniel P. Berrangé <berrange@redhat.com> wrote: Well it seems I have a reproducible symptom and a fix, but not the explanation why the latter fixes the former. I'll need to dive into some debug & analysis myself to explain it better. I'll be back here once I got time to do that in depth check.
Until then whoever is affected (should be everyone) can give it a thought as well. Repro is as easy as One console: $ journalctl -f -u libvirt-guests Other console: $ systemctl stop libvirt-guests $ systemctl start libvirt-guests
I see it with 245.4-4ubuntu3 (18.04) I'm not seeing it on 237-3ubuntu10.39 (20.04). Maybe it is a systemd bug after all? I'd be interested to hear if that is/isn't clobbering output for anyone else and what your systemd versions are?
FWIW, it works correctly on Fedora 31 with systemd 243.
Eoan with 242-7ubuntu3.7 is good as well. I might need to try to get some interim versions from somewhere.
I've reproduced on Fedora 33 rawhide with systemd 245 - the first place where it lists running guests is screwed up slightly:
Apr 30 17:16:13 libvirt-fedora-rawhide systemd[1]: Stopping Suspend/Resume Running libvirt Guests... Apr 30 17:16:13 libvirt-fedora-rawhide libvirt-guests.sh[69903]: Running guests on default URI: Apr 30 17:16:13 libvirt-fedora-rawhide libvirt-guests.sh[69892]: Runningcore2 Apr 30 17:16:13 libvirt-fedora-rawhide libvirt-guests.sh[69934]: Suspending guests on default URI... Apr 30 17:16:13 libvirt-fedora-rawhide libvirt-guests.sh[69892]: SSuspending core1: ... Apr 30 17:16:14 libvirt-fedora-rawhide libvirt-guests.sh[69892]: Suspending core1: done Apr 30 17:16:14 libvirt-fedora-rawhide libvirt-guests.sh[69892]: Suspending core2: ... Apr 30 17:16:15 libvirt-fedora-rawhide libvirt-guests.sh[69892]: Suspending core2: done Apr 30 17:16:15 libvirt-fedora-rawhide systemd[1]: libvirt-guests.service: Succeeded. Apr 30 17:16:15 libvirt-fedora-rawhide systemd[1]: Stopped Suspend/Resume Running libvirt Guests.
On resume it is even worse
Apr 30 17:19:40 libvirt-fedora-rawhide systemd[1]: Starting Suspend/Resume Running libvirt Guests... Apr 30 17:19:40 libvirt-fedora-rawhide libvirt-guests.sh[70041]: Resuming guests on default URI... Apr 30 17:19:40 libvirt-fedora-rawhide libvirt-guests.sh[70030]: R Apr 30 17:19:40 libvirt-fedora-rawhide libvirt-guests.sh[70048]: R Apr 30 17:19:41 libvirt-fedora-rawhide libvirt-guests.sh[70048]: esuming guest core1: Apr 30 17:19:41 libvirt-fedora-rawhide libvirt-guests.sh[70079]: esum Apr 30 17:19:41 libvirt-fedora-rawhide libvirt-guests.sh[70030]: e Apr 30 17:19:41 libvirt-fedora-rawhide libvirt-guests.sh[70086]: e Apr 30 17:19:42 libvirt-fedora-rawhide libvirt-guests.sh[70086]: esuming guest core2: Apr 30 17:19:42 libvirt-fedora-rawhide libvirt-guests.sh[70119]: esum Apr 30 17:19:42 libvirt-fedora-rawhide libvirt-guests.sh[70030]: e Apr 30 17:19:42 libvirt-fedora-rawhide systemd[1]: Finished Suspend/Resume Running libvirt Guests.
It is possible it isn't systemd related - could be other packages that are co-incidentally affecting it.
Since launchpad holds all former builds still ready for download I could easily test a few versions.
I up/downgraded just the following packages on an otherwise unmodified system: - libnss-systemd - libpam-systemd - libsystemd0 - systemd - systemd-container - systemd-sysv
Those versions I did check:
245.4-4ubuntu3 bad 245.2-1ubuntu1 bad 244.3-1ubuntu1 bad 244.1-0ubuntu3 bad (bad on retry) 243-3ubuntu1 good (good (on retry) 242-7ubuntu3 good
Retry means that I went and installed from good -> bad -> good and the behavior was the same. So none of the installs fixed it to then be good forever. It was consistently good <244 and bad >=244.
Based on that, a possibly interesting change between v243..v244 was this:
commit 09d0b46ab61bebafe5bdc1be95ee153dfb13d6bc Author: Lorenz Bauer <lmb@cloudflare.com> Date: Mon Nov 4 16:35:46 2019 +0000
journal: refresh cached credentials of stdout streams
FYI - reported to systemd https://github.com/systemd/systemd/issues/15654
journald assumes that getsockopt(SO_PEERCRED) correctly identifies the process on the remote end of the socket. However, this is incorrect according to man 7 socket:
The returned credentials are those that were in effect at the time of the call to connect(2) or socketpair(2).
This becomes a problem when a new process inherits the stdout stream from a parent. First, log messages from the child process will be attributed to the parent. Second, the struct ucred used by journald becomes invalid as soon as the parent exits. Further sendmsg calls then fail with ENOENT. Logs for the child process then vanish from the journal.
Fix this by using recvmsg on the stdout stream, and refreshing the cached struct ucred if SCM_CREDENTIALS indicate a new process.
but I can't see an obvious mistake the code which would cause the behaviour we see.
Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
-- Christian Ehrhardt Staff Engineer, Ubuntu Server Canonical Ltd

On Thu, Apr 30, 2020 at 06:22:28PM +0100, Daniel P. Berrangé wrote:
On Thu, Apr 30, 2020 at 06:28:08PM +0200, Christian Ehrhardt wrote:
On Thu, Apr 30, 2020 at 5:10 PM Daniel P. Berrangé <berrange@redhat.com> wrote:
On Thu, Apr 30, 2020 at 04:58:25PM +0200, Christian Ehrhardt wrote:
On Thu, Apr 30, 2020 at 2:51 PM Daniel P. Berrangé <berrange@redhat.com> wrote: Well it seems I have a reproducible symptom and a fix, but not the explanation why the latter fixes the former. I'll need to dive into some debug & analysis myself to explain it better. I'll be back here once I got time to do that in depth check.
Until then whoever is affected (should be everyone) can give it a thought as well. Repro is as easy as One console: $ journalctl -f -u libvirt-guests Other console: $ systemctl stop libvirt-guests $ systemctl start libvirt-guests
I see it with 245.4-4ubuntu3 (18.04) I'm not seeing it on 237-3ubuntu10.39 (20.04). Maybe it is a systemd bug after all? I'd be interested to hear if that is/isn't clobbering output for anyone else and what your systemd versions are?
FWIW, it works correctly on Fedora 31 with systemd 243.
Eoan with 242-7ubuntu3.7 is good as well. I might need to try to get some interim versions from somewhere.
I've reproduced on Fedora 33 rawhide with systemd 245 - the first place where it lists running guests is screwed up slightly:
I strace'd libvirt-guests.sh and the data sent looks fine. I strace'd systemd-journald and the data it receives looks fine, but the data systemd-journald writes into the journal, however, is completely fubar: recvmsg(32, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="Resuming guest core1: ", iov_len=2054}], msg_iovlen=1, msg_control=[{cmsg_len=28, cmsg_level=SOL_SOCKET, cmsg_type=SCM_CREDENTIALS, cmsg_data={pid=70624, uid=0, gid=0}}], msg_controllen=32, msg_flags=MSG_CTRUNC|MSG_CMSG_CLOEXEC}, MSG_DONTWAIT|MSG_CMSG_CLOEXEC) = 22 writev(33, [{iov_base="[ 2741.146550] ", iov_len=15}, {iov_base="libvirt-guests.sh", iov_len=17}, {iov_base="[70605]: ", iov_len=9}, {iov_base="R", iov_len=1}, {iov_base="\n", iov_len=1}], 5) = 43 writev(33, [{iov_base="[ 2741.163787] ", iov_len=15}, {iov_base="libvirt-guests.sh", iov_len=17}, {iov_base="[70624]: ", iov_len=9}, {iov_base="R", iov_len=1}, {iov_base="\n", iov_len=1}], 5) = 43 writev(33, [{iov_base="[ 2742.518626] ", iov_len=15}, {iov_base="libvirt-guests.sh", iov_len=17}, {iov_base="[70624]: ", iov_len=9}, {iov_base="esuming guest core1:", iov_len=20}, {iov_base="\n", iov_len=1}], 5) = 62 writev(33, [{iov_base="[ 2742.531644] ", iov_len=15}, {iov_base="libvirt-guests.sh", iov_len=17}, {iov_base="[70655]: ", iov_len=9}, {iov_base="esum", iov_len=4}, {iov_base="\n", iov_len=1}], 5) = 46 recvmsg(32, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="Resuming guest core2: ", iov_len=2054}], msg_iovlen=1, msg_control=[{cmsg_len=28, cmsg_level=SOL_SOCKET, cmsg_type=SCM_CREDENTIALS, cmsg_data={pid=70662, uid=0, gid=0}}], msg_controllen=32, msg_flags=MSG_CTRUNC|MSG_CMSG_CLOEXEC}, MSG_DONTWAIT|MSG_CMSG_CLOEXEC) = 22 writev(33, [{iov_base="[ 2742.687436] ", iov_len=15}, {iov_base="libvirt-guests.sh", iov_len=17}, {iov_base="[70605]: ", iov_len=9}, {iov_base="e", iov_len=1}, {iov_base="\n", iov_len=1}], 5) = 43 writev(33, [{iov_base="[ 2742.690716] ", iov_len=15}, {iov_base="libvirt-guests.sh", iov_len=17}, {iov_base="[70662]: ", iov_len=9}, {iov_base="e", iov_len=1}, {iov_base="\n", iov_len=1}], 5) = 43 writev(33, [{iov_base="[ 2744.015012] ", iov_len=15}, {iov_base="libvirt-guests.sh", iov_len=17}, {iov_base="[70662]: ", iov_len=9}, {iov_base="esuming guest core2:", iov_len=20}, {iov_base="\n", iov_len=1}], 5) = 62 writev(33, [{iov_base="[ 2744.019848] ", iov_len=15}, {iov_base="libvirt-guests.sh", iov_len=17}, {iov_base="[70694]: ", iov_len=9}, {iov_base="esum", iov_len=4}, {iov_base="\n", iov_len=1}], 5) = 46 writev(33, [{iov_base="[ 2744.036819] ", iov_len=15}, {iov_base="libvirt-guests.sh", iov_len=17}, {iov_base="[70605]: ", iov_len=9}, {iov_base="e", iov_len=1}, {iov_base="\n", iov_len=1}], 5) = 43 So I think we found a regression in journald making it mangle log messages. I smells like it is corrupting some buffers somewhere. Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
participants (3)
-
Christian Ehrhardt
-
Daniel P. Berrangé
-
Michal Privoznik