[libvirt] [PATCH 1/3] Allow multiple monitor devices

Right now only one monitor device can be enabled at a time. In order to support asynchronous notification of events, I would like to introduce a 'wait' command that waits for an event to occur. This implies that we need an additional monitor session to allow commands to still be executed while waiting for an asynchronous notification. Signed-off-by: Anthony Liguori <aliguori@us.ibm.com> diff --git a/vl.c b/vl.c index 4bd173f..f78cabb 100644 --- a/vl.c +++ b/vl.c @@ -184,6 +184,9 @@ int main(int argc, char **argv) /* Max number of bluetooth switches on the commandline. */ #define MAX_BT_CMDLINE 10 +/* Maximum number of monitor devices */ +#define MAX_MONITOR_DEVICES 10 + /* XXX: use a two level table to limit memory usage */ #define MAX_IOPORTS 65536 @@ -4252,8 +4255,9 @@ int main(int argc, char **argv, char **envp) int hda_index; int optind; const char *r, *optarg; - CharDriverState *monitor_hd = NULL; - const char *monitor_device; + CharDriverState *monitor_hds[MAX_MONITOR_DEVICES]; + const char *monitor_devices[MAX_MONITOR_DEVICES]; + int monitor_device_index; const char *serial_devices[MAX_SERIAL_PORTS]; int serial_device_index; const char *parallel_devices[MAX_PARALLEL_PORTS]; @@ -4324,7 +4328,6 @@ int main(int argc, char **argv, char **envp) kernel_cmdline = ""; cyls = heads = secs = 0; translation = BIOS_ATA_TRANSLATION_AUTO; - monitor_device = "vc:80Cx24C"; serial_devices[0] = "vc:80Cx24C"; for(i = 1; i < MAX_SERIAL_PORTS; i++) @@ -4340,6 +4343,11 @@ int main(int argc, char **argv, char **envp) virtio_consoles[i] = NULL; virtio_console_index = 0; + monitor_devices[0] = "vc:80Cx24C"; + for (i = 1; i < MAX_MONITOR_DEVICES; i++) + monitor_devices[i] = NULL; + monitor_device_index = 0; + usb_devices_index = 0; nb_net_clients = 0; @@ -4723,7 +4731,12 @@ int main(int argc, char **argv, char **envp) break; } case QEMU_OPTION_monitor: - monitor_device = optarg; + if (monitor_device_index >= MAX_MONITOR_DEVICES) { + fprintf(stderr, "qemu: too many monitor devices\n"); + exit(1); + } + monitor_devices[monitor_device_index] = optarg; + monitor_device_index++; break; case QEMU_OPTION_serial: if (serial_device_index >= MAX_SERIAL_PORTS) { @@ -4974,8 +4987,8 @@ int main(int argc, char **argv, char **envp) serial_devices[0] = "stdio"; if (parallel_device_index == 0) parallel_devices[0] = "null"; - if (strncmp(monitor_device, "vc", 2) == 0) - monitor_device = "stdio"; + if (strncmp(monitor_devices[0], "vc", 2) == 0) + monitor_devices[0] = "stdio"; } #ifndef _WIN32 @@ -5184,14 +5197,14 @@ int main(int argc, char **argv, char **envp) #endif /* Maintain compatibility with multiple stdio monitors */ - if (!strcmp(monitor_device,"stdio")) { + if (!strcmp(monitor_devices[0],"stdio")) { for (i = 0; i < MAX_SERIAL_PORTS; i++) { const char *devname = serial_devices[i]; if (devname && !strcmp(devname,"mon:stdio")) { - monitor_device = NULL; + monitor_devices[0] = NULL; break; } else if (devname && !strcmp(devname,"stdio")) { - monitor_device = NULL; + monitor_devices[0] = NULL; serial_devices[i] = "mon:stdio"; break; } @@ -5208,11 +5221,20 @@ int main(int argc, char **argv, char **envp) } } - if (monitor_device) { - monitor_hd = qemu_chr_open("monitor", monitor_device, NULL); - if (!monitor_hd) { - fprintf(stderr, "qemu: could not open monitor device '%s'\n", monitor_device); - exit(1); + for (i = 0; i < MAX_MONITOR_DEVICES; i++) { + const char *devname = monitor_devices[i]; + if (devname && strcmp(devname, "none")) { + char label[32]; + if (i == 0) + snprintf(label, sizeof(label), "monitor"); + else + snprintf(label, sizeof(label), "monitor%d", i); + monitor_hds[i] = qemu_chr_open(label, devname, NULL); + if (!monitor_hds[i]) { + fprintf(stderr, "qemu: could not open monitor device '%s'\n", + devname); + exit(1); + } } } @@ -5335,8 +5357,13 @@ int main(int argc, char **argv, char **envp) text_consoles_set_display(display_state); qemu_chr_initial_reset(); - if (monitor_device && monitor_hd) - monitor_init(monitor_hd, MONITOR_USE_READLINE | MONITOR_IS_DEFAULT); + for (i = 0; i < MAX_MONITOR_DEVICES; i++) { + if (monitor_devices[i] && monitor_hds[i]) { + monitor_init(monitor_hds[i], + MONITOR_USE_READLINE | + ((i == 0) ? MONITOR_IS_DEFAULT : 0)); + } + } for(i = 0; i < MAX_SERIAL_PORTS; i++) { const char *devname = serial_devices[i];

The wait command will pause the monitor the command was issued in until a new event becomes available. Events are queued if there isn't a waiter present. The wait command completes after a single event is available. Today, we queue events indefinitely but in the future, I suspect we'll drop events that are older than a certain amount of time to avoid infinitely allocating memory for long running VMs. To make use of the new notification mechanism, this patch introduces a qemu_notify_event() API. This API takes three parameters: a class which is meant to classify the type of event being generated, a name which is meant to distinguish which event in the class that has been generated, and a details parameters which is meant to allow events to send arbitrary data with a given event. Signed-off-by: Anthony Liguori <aliguori@us.ibm.com> diff --git a/Makefile b/Makefile index 633774e..b87870b 100644 --- a/Makefile +++ b/Makefile @@ -84,7 +84,7 @@ OBJS+=usb-serial.o usb-net.o OBJS+=sd.o ssi-sd.o OBJS+=bt.o bt-host.o bt-vhci.o bt-l2cap.o bt-sdp.o bt-hci.o bt-hid.o usb-bt.o OBJS+=buffered_file.o migration.o migration-tcp.o net.o qemu-sockets.o -OBJS+=qemu-char.o aio.o net-checksum.o savevm.o cache-utils.o +OBJS+=qemu-char.o aio.o net-checksum.o savevm.o cache-utils.o wait.o ifdef CONFIG_BRLAPI OBJS+= baum.o diff --git a/monitor.c b/monitor.c index ca1c11c..5245f7c 100644 --- a/monitor.c +++ b/monitor.c @@ -42,6 +42,7 @@ #include "migration.h" #include "kvm.h" #include "acl.h" +#include "wait.h" //#define DEBUG //#define DEBUG_COMPLETION @@ -1745,6 +1746,7 @@ static const mon_cmd_t mon_cmds[] = { "acl allow vnc.username fred\n" "acl deny vnc.username bob\n" "acl reset vnc.username\n" }, + { "wait", "", do_wait, "", "wait for an asynchronous event to occur" }, { NULL, NULL, }, }; diff --git a/wait.c b/wait.c new file mode 100644 index 0000000..092433a --- /dev/null +++ b/wait.c @@ -0,0 +1,97 @@ +/* + * Asynchronous monitor notification support + * + * Copyright IBM, Corp. 2009 + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#include "qemu-common.h" +#include "monitor.h" +#include "sys-queue.h" +#include "osdep.h" +#include "wait.h" + +typedef struct WaitEvent +{ + qemu_timeval timestamp; + char *class; + char *name; + char *details; + TAILQ_ENTRY(WaitEvent) node; +} WaitEvent; + +typedef struct PendingWaiter +{ + Monitor *mon; + TAILQ_ENTRY(PendingWaiter) node; +} PendingWaiter; + +static TAILQ_HEAD(, WaitEvent) pending_events = TAILQ_HEAD_INITIALIZER(pending_events); +static TAILQ_HEAD(, PendingWaiter) pending_waiters = TAILQ_HEAD_INITIALIZER(pending_waiters); + +static void dispatch_event(PendingWaiter *w, WaitEvent *e) +{ + monitor_printf(w->mon, "%ld.%06ld: %s: %s\n%s", + e->timestamp.tv_sec, e->timestamp.tv_usec, + e->class, e->name, + e->details); + monitor_resume(w->mon); +} + +static void try_to_process_events(void) +{ + while (!TAILQ_EMPTY(&pending_events) && !TAILQ_EMPTY(&pending_waiters)) { + WaitEvent *e; + PendingWaiter *w; + + e = TAILQ_FIRST(&pending_events); + TAILQ_REMOVE(&pending_events, e, node); + + w = TAILQ_FIRST(&pending_waiters); + TAILQ_REMOVE(&pending_waiters, w, node); + + dispatch_event(w, e); + + qemu_free(e->details); + qemu_free(e->name); + qemu_free(e->class); + + qemu_free(w); + } +} + +void qemu_notify_event(const char *class, const char *name, const char *details) +{ + WaitEvent *e; + + e = qemu_mallocz(sizeof(*e)); + + qemu_gettimeofday(&e->timestamp); + e->class = qemu_strdup(class); + e->name = qemu_strdup(name); + e->details = qemu_strdup(details); + + TAILQ_INSERT_TAIL(&pending_events, e, node); + + try_to_process_events(); +} + +void do_wait(Monitor *mon) +{ + PendingWaiter *w; + + w = qemu_mallocz(sizeof(*w)); + w->mon = mon; + + TAILQ_INSERT_TAIL(&pending_waiters, w, node); + + monitor_suspend(w->mon); + + try_to_process_events(); +} diff --git a/wait.h b/wait.h new file mode 100644 index 0000000..51b6467 --- /dev/null +++ b/wait.h @@ -0,0 +1,22 @@ +/* + * Asynchronous monitor notification support + * + * Copyright IBM, Corp. 2009 + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_WAIT_H +#define QEMU_WAIT_H + +#include "monitor.h" + +void qemu_notify_event(const char *class, const char *name, const char *details); +void do_wait(Monitor *mon); + +#endif

On Wed, Apr 08, 2009 at 09:16:43AM -0500, Anthony Liguori wrote:
The wait command will pause the monitor the command was issued in until a new event becomes available. Events are queued if there isn't a waiter present. The wait command completes after a single event is available.
Today, we queue events indefinitely but in the future, I suspect we'll drop events that are older than a certain amount of time to avoid infinitely allocating memory for long running VMs.
To make use of the new notification mechanism, this patch introduces a qemu_notify_event() API. This API takes three parameters: a class which is meant to classify the type of event being generated, a name which is meant to distinguish which event in the class that has been generated, and a details parameters which is meant to allow events to send arbitrary data with a given event.
Perhaps we should have the ability to turn on/off events, via a 'notify EVENT' command, and a way turn off the prompt on the channel used for receiving events. So if I was interested in RTC change, and VNC client connection events, on the main monitor command channel we'd do: (qemu) notify rtc-change rtc-change notification enabled (qemu) notify vnc-client vnc-client notification enabled (qemu) And then in the 2nd monitor channel, a single 'wait' command would turn off the monitor prompt and make the channel dedicated for just events, one per line (qemu) wait rtc-change UTC+0100 vnc-client connect 192.46.12.4:9353 vnc-client disconnect 192.46.12.4:9353 vnc-client connect 192.46.12.2:9353 vnc-client disconnect 192.46.12.2:9353 Daniel -- |: Red Hat, Engineering, London -o- http://people.redhat.com/berrange/ :| |: http://libvirt.org -o- http://virt-manager.org -o- http://ovirt.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

Daniel P. Berrange wrote:
On Wed, Apr 08, 2009 at 09:16:43AM -0500, Anthony Liguori wrote:
The wait command will pause the monitor the command was issued in until a new event becomes available. Events are queued if there isn't a waiter present. The wait command completes after a single event is available.
Today, we queue events indefinitely but in the future, I suspect we'll drop events that are older than a certain amount of time to avoid infinitely allocating memory for long running VMs.
To make use of the new notification mechanism, this patch introduces a qemu_notify_event() API. This API takes three parameters: a class which is meant to classify the type of event being generated, a name which is meant to distinguish which event in the class that has been generated, and a details parameters which is meant to allow events to send arbitrary data with a given event.
Perhaps we should have the ability to turn on/off events, via a 'notify EVENT' command, and a way turn off the prompt on the channel used for receiving events.
So if I was interested in RTC change, and VNC client connection events, on the main monitor command channel we'd do:
(qemu) notify rtc-change rtc-change notification enabled (qemu) notify vnc-client vnc-client notification enabled (qemu)
So you want to mask out event types? I think you could do this with the actual wait command either inclusively: (qemu) wait "rtc-change vnc-client" ... Or exclusively: (qemu) wait -x "rtc-change vnc-client" ...
And then in the 2nd monitor channel, a single 'wait' command would turn off the monitor prompt and make the channel dedicated for just events, one per line
(qemu) wait rtc-change UTC+0100 vnc-client connect 192.46.12.4:9353 vnc-client disconnect 192.46.12.4:9353 vnc-client connect 192.46.12.2:9353 vnc-client disconnect 192.46.12.2:9353
N.B. Right now, wait returns only a single event. This is because the output format is: (qemu) wait 1239200822.748241: vm-state: stop (qemu) But vm-state doesn't have any details, if it had details it would be: (qemu) wait 1239200822.748241: vm-state: stop The virtual machine has stopped. (qemu) Since everyone already parses commands like this, I think the format makes sense. It implies that the event dispatch code has to sit constantly issuing wait commands. In my next version of the patch, I expire old events (older than 10 minutes), and also add a -d flag to poll for events vs. wait. Regards, Anthony Liguori
Daniel
-- Regards, Anthony Liguori

On 04/08/09 16:33, Daniel P. Berrange wrote:
On Wed, Apr 08, 2009 at 09:16:43AM -0500, Anthony Liguori wrote:
The wait command will pause the monitor the command was issued in until a new event becomes available. Events are queued if there isn't a waiter present. The wait command completes after a single event is available.
Today, we queue events indefinitely but in the future, I suspect we'll drop events that are older than a certain amount of time to avoid infinitely allocating memory for long running VMs.
To make use of the new notification mechanism, this patch introduces a qemu_notify_event() API. This API takes three parameters: a class which is meant to classify the type of event being generated, a name which is meant to distinguish which event in the class that has been generated, and a details parameters which is meant to allow events to send arbitrary data with a given event.
Perhaps we should have the ability to turn on/off events, via a 'notify EVENT' command, and a way turn off the prompt on the channel used for receiving events.
That would nicely solve the "queue events indefinitely" issue. By default no events are generated. Apps which want receive them (and thus receive them) can enable them as needed.
And then in the 2nd monitor channel, a single 'wait' command would turn off the monitor prompt and make the channel dedicated for just events, one per line
(qemu) wait rtc-change UTC+0100 vnc-client connect 192.46.12.4:9353 vnc-client disconnect 192.46.12.4:9353 vnc-client connect 192.46.12.2:9353 vnc-client disconnect 192.46.12.2:9353
IMHO this is more useful than having "wait" just get one event. You'll need a dedicated monitor channel for events anyway, so with one-event-per-wait the management app would have to issue wait in a loop. BTW: "wait" is quite generic. Maybe we should name the commands notify-*, i.e. have notify-enable $class notify-disable $class notify-getevents cheers, Gerd

Gerd Hoffmann wrote:
On 04/08/09 16:33, Daniel P. Berrange wrote:
On Wed, Apr 08, 2009 at 09:16:43AM -0500, Anthony Liguori wrote:
The wait command will pause the monitor the command was issued in until a new event becomes available. Events are queued if there isn't a waiter present. The wait command completes after a single event is available.
Today, we queue events indefinitely but in the future, I suspect we'll drop events that are older than a certain amount of time to avoid infinitely allocating memory for long running VMs.
To make use of the new notification mechanism, this patch introduces a qemu_notify_event() API. This API takes three parameters: a class which is meant to classify the type of event being generated, a name which is meant to distinguish which event in the class that has been generated, and a details parameters which is meant to allow events to send arbitrary data with a given event.
Perhaps we should have the ability to turn on/off events, via a 'notify EVENT' command, and a way turn off the prompt on the channel used for receiving events.
That would nicely solve the "queue events indefinitely" issue. By default no events are generated. Apps which want receive them (and thus receive them) can enable them as needed.
Sounds reasonable to me as well.
And then in the 2nd monitor channel, a single 'wait' command would turn off the monitor prompt and make the channel dedicated for just events, one per line
(qemu) wait rtc-change UTC+0100 vnc-client connect 192.46.12.4:9353 vnc-client disconnect 192.46.12.4:9353 vnc-client connect 192.46.12.2:9353 vnc-client disconnect 192.46.12.2:9353
IMHO this is more useful than having "wait" just get one event. You'll need a dedicated monitor channel for events anyway, so with one-event-per-wait the management app would have to issue wait in a loop.
But doesn't it have to _loop_ anyway? If wait returned multiple events, the management app would have to loop over the results and then anyway over the actual wait to get the next chunk - thus twice. To me, one event per wait invocation looks simpler to handle.
BTW: "wait" is quite generic. Maybe we should name the commands notify-*, i.e. have
notify-enable $class notify-disable $class notify-getevents
My 2 cents: event_enable $class1[,...] event_disable $class1[,...] with a special class 'all' and event_wait to finally collect the queued and enabled events. There is just the question what to do with queued events of a certain class that gets disabled before the events were dequeued. Purge them selectively or let the user do this via event_event? I'm not a fan of cleanup via magic timeouts / event aging. Jan -- Siemens AG, Corporate Technology, CT SE 2 Corporate Competence Center Embedded Linux

Gerd Hoffmann wrote:
On 04/08/09 16:33, Daniel P. Berrange wrote:
On Wed, Apr 08, 2009 at 09:16:43AM -0500, Anthony Liguori wrote:
The wait command will pause the monitor the command was issued in until a new event becomes available. Events are queued if there isn't a waiter present. The wait command completes after a single event is available.
Today, we queue events indefinitely but in the future, I suspect we'll drop events that are older than a certain amount of time to avoid infinitely allocating memory for long running VMs.
To make use of the new notification mechanism, this patch introduces a qemu_notify_event() API. This API takes three parameters: a class which is meant to classify the type of event being generated, a name which is meant to distinguish which event in the class that has been generated, and a details parameters which is meant to allow events to send arbitrary data with a given event.
Perhaps we should have the ability to turn on/off events, via a 'notify EVENT' command, and a way turn off the prompt on the channel used for receiving events.
That would nicely solve the "queue events indefinitely" issue. By default no events are generated. Apps which want receive them (and thus receive them) can enable them as needed.
It doesn't. When an app enables events, we would start queuing them, but if it didn't consume them in a timely manner (or at all), we would start leaking memory badly. We want to be robust even in the face of poorly written management apps/scripts so we need some expiration function too.
And then in the 2nd monitor channel, a single 'wait' command would turn off the monitor prompt and make the channel dedicated for just events, one per line
(qemu) wait rtc-change UTC+0100 vnc-client connect 192.46.12.4:9353 vnc-client disconnect 192.46.12.4:9353 vnc-client connect 192.46.12.2:9353 vnc-client disconnect 192.46.12.2:9353
IMHO this is more useful than having "wait" just get one event. You'll need a dedicated monitor channel for events anyway, so with one-event-per-wait the management app would have to issue wait in a loop.
There two issues with this syntax. The first is that 'notify EVENT' logically acts on the global event set. That's not necessarily what you want. For instance, libvirt may attach to a monitor and issue a 'wait "vm-state vnc-events"' and I may have some wiz-bang app that wants to connect on another monitor, and issue a 'wait "watchdog-events"'. My super-deluxe app may sit watching for watchdog events to do some sort of fancy RAS stuff or something like that. The 'notify EVENT' model makes this difficult unless you have notify act only on the current monitor session. Monitor "sessions" are ill-defined though b/c of things like tcp:// reconnects so I wouldn't want to do that. The second issue is that there is no clear way to deliminate events other than a new line. If we wanted to send data along with events, we really want to be able to span multiple lines. Especially if we want that data to be in the same format as some of the existing monitor commands. You could get around this by introducing a new deliminator like '.' but everyone can already handle '(qemu)'. That said, I can see a few advantages in the above model. Obviously, the ability to read a large chunk of output and then parse multiple events in a single round trip is a big positive. You could get that with my syntax by sending multiple wait commands at once but that's a bit hackish. Also, I think where the above really shines is if you're a human user and you want to see all the events while debugging. You really don't want to keep typing wait in the monitor. So as a compromise, I think we need to introduce a mode where we can do the above but I'd like to wait until after the first round of these go in. I'm thinking along the lines of 'wait N' where N can be -1 to signify an unlimited number of events or something like that.
BTW: "wait" is quite generic. Maybe we should name the commands notify-*, i.e. have
Good point, I like wait_event personally. Regards, Anthony Liguori
notify-enable $class notify-disable $class notify-getevents
cheers, Gerd
-- Regards, Anthony Liguori

Anthony Liguori wrote:
It doesn't. When an app enables events, we would start queuing them, but if it didn't consume them in a timely manner (or at all), we would start leaking memory badly.
We want to be robust even in the face of poorly written management apps/scripts so we need some expiration function too.
What happens when an app stops reading the monitor channel for a little while, and there's enough monitor output to fill TCP buffers or terminal buffers? Does it block QEMU? Does QEMU drop arbitrary bytes from the stream, corrupting the output syntax? If you send events only to the monitor which requests them, then you could say that they are sent immediately to that monitor, and if the app stops reading the monitor, whatever normally happens when it stops reading happens to these events. In other words, no need for arbitrary expiration time. Makes it determinstic at least.
And then in the 2nd monitor channel, a single 'wait' command would turn off the monitor prompt and make the channel dedicated for just events, one per line
(qemu) wait rtc-change UTC+0100 vnc-client connect 192.46.12.4:9353 vnc-client disconnect 192.46.12.4:9353 vnc-client connect 192.46.12.2:9353 vnc-client disconnect 192.46.12.2:9353
IMHO this is more useful than having "wait" just get one event. You'll need a dedicated monitor channel for events anyway, so with one-event-per-wait the management app would have to issue wait in a loop.
There two issues with this syntax. The first is that 'notify EVENT' logically acts on the global event set. That's not necessarily what you want. For instance, libvirt may attach to a monitor and issue a 'wait "vm-state vnc-events"' and I may have some wiz-bang app that wants to connect on another monitor, and issue a 'wait "watchdog-events"'. My super-deluxe app may sit watching for watchdog events to do some sort of fancy RAS stuff or something like that.
I like this idea a lot. Specifically I like the idea that separate monitoring apps can operate independently, even watching the same events if they need to. A natural way to support that is per-monitor (connection?) event sets. To reliably track state, monitoring apps which aren't in control of the VM themselves (just monitoring) will need to do this: 1. Request events. 2. _Then_ check the current state of things they care about. (E.g. is the VM running) 3. _Then_ listen for new events since step 1. Otherwise you get races similar to those signal/select races. That argues for (qemu) notify event-type-list ok (qemu) query blah blah... results (qemu) wait Rather than (qemu) wait event-type-list As the latter form cannot accomodate a race-free monitoring pattern unless you have a second connection which does the state query after the first "wait" has been issued. It would be silly to force monitoring apps to open two monitor connections just to view some state of QEMU, when one is enough. Also, the latter form (wait event-type-list) _must_ output something like "ok, events follow" after it has registered for the events, otherwise a monitoring app does not know when it's safe to query state on a second connection to avoid races.
The 'notify EVENT' model makes this difficult unless you have notify act only on the current monitor session.
That would be nice!
Monitor "sessions" are ill-defined though b/c of things like tcp:// reconnects so I wouldn't want to do that.
Oh dear. Is defining it insurmountable? Why can't each TCP (re)connection be a new monitor?
BTW: "wait" is quite generic. Maybe we should name the commands notify-*, i.e. have
Good point, I like wait_event personally.
Me too. And request_event, rather than notify. And a way to remove items from the event set. -- Jamie

Jamie Lokier wrote:
Anthony Liguori wrote:
It doesn't. When an app enables events, we would start queuing them, but if it didn't consume them in a timely manner (or at all), we would start leaking memory badly.
We want to be robust even in the face of poorly written management apps/scripts so we need some expiration function too.
What happens when an app stops reading the monitor channel for a little while, and there's enough monitor output to fill TCP buffers or terminal buffers? Does it block QEMU? Does QEMU drop arbitrary bytes from the stream, corrupting the output syntax?
Depends on the type of character device. They all have different properties in this regard. Basically, you're stuck in a losing proposition. Either you drop output, buffer memory indefinitely, or put the application to sleep. Different character devices make different trade offs.
If you send events only to the monitor which requests them, then you could say that they are sent immediately to that monitor, and if the app stops reading the monitor, whatever normally happens when it stops reading happens to these events.
In other words, no need for arbitrary expiration time. Makes it determinstic at least.
You're basically saying that if something isn't connected, drop them. If it is connected, do a monitor_printf() such that you're never queuing events. Entirely reasonable and I've considered it. However, I do like the idea though of QEMU queuing events for a certain period of time. Not everyone always has something connected to a monitor. I may notice that my NFS server (which runs in a VM) is not responding, VNC to the system, switch to the monitor, and take a look at the event log. If I can get the past 10 minutes of events, I may see something useful like a host IO failure.
Monitor "sessions" are ill-defined though b/c of things like tcp:// reconnects so I wouldn't want to do that.
Oh dear. Is defining it insurmountable?
Why can't each TCP (re)connection be a new monitor?
You get a notification on reconnect but not on disconnect. Basically CharDriverState is not designed around a connect model. The fact that it has any notion of reconnect today is really a big hack. CharDriverState could definitely use a rewrite. It hasn't aged well at all. Regards, Anthony Liguori

On Wed, 2009-04-08 at 14:35 -0500, Anthony Liguori wrote:
You're basically saying that if something isn't connected, drop them. If it is connected, do a monitor_printf() such that you're never queuing events. Entirely reasonable and I've considered it.
However, I do like the idea though of QEMU queuing events for a certain period of time. Not everyone always has something connected to a monitor. I may notice that my NFS server (which runs in a VM) is not responding, VNC to the system, switch to the monitor, and take a look at the event log. If I can get the past 10 minutes of events, I may see something useful like a host IO failure.
"10 minutes" is the red flag for me. Why not 5 minutes? 60 minutes? 24 hours? The fact that it's so arbitrary suggests it doesn't really belong. If you care, you can attach a logging daemon that keeps the last 10 minutes worth of data... -- Hollis Blanchard IBM Linux Technology Center

Hollis Blanchard wrote:
On Wed, 2009-04-08 at 14:35 -0500, Anthony Liguori wrote:
You're basically saying that if something isn't connected, drop them. If it is connected, do a monitor_printf() such that you're never queuing events. Entirely reasonable and I've considered it.
However, I do like the idea though of QEMU queuing events for a certain period of time. Not everyone always has something connected to a monitor. I may notice that my NFS server (which runs in a VM) is not responding, VNC to the system, switch to the monitor, and take a look at the event log. If I can get the past 10 minutes of events, I may see something useful like a host IO failure.
"10 minutes" is the red flag for me. Why not 5 minutes? 60 minutes? 24 hours? The fact that it's so arbitrary suggests it doesn't really belong. If you care, you can attach a logging daemon that keeps the last 10 minutes worth of data...
It has to be some finite amount. You're right, it's arbitrary, but so is every other memory limitation we have in QEMU. You could make it user configurable but that's just punting the problem. You have to do some level of buffering. It's unavoidable. If you aren't buffering at the event level, you buffer at the socket level, etc. Regards, Anthony Liguori

On Wed, 2009-04-08 at 16:14 -0500, Anthony Liguori wrote:
Hollis Blanchard wrote:
On Wed, 2009-04-08 at 14:35 -0500, Anthony Liguori wrote:
You're basically saying that if something isn't connected, drop them. If it is connected, do a monitor_printf() such that you're never queuing events. Entirely reasonable and I've considered it.
However, I do like the idea though of QEMU queuing events for a certain period of time. Not everyone always has something connected to a monitor. I may notice that my NFS server (which runs in a VM) is not responding, VNC to the system, switch to the monitor, and take a look at the event log. If I can get the past 10 minutes of events, I may see something useful like a host IO failure.
"10 minutes" is the red flag for me. Why not 5 minutes? 60 minutes? 24 hours? The fact that it's so arbitrary suggests it doesn't really belong. If you care, you can attach a logging daemon that keeps the last 10 minutes worth of data...
It has to be some finite amount. You're right, it's arbitrary, but so is every other memory limitation we have in QEMU. You could make it user configurable but that's just punting the problem.
You have to do some level of buffering. It's unavoidable. If you aren't buffering at the event level, you buffer at the socket level, etc.
If the socket will buffer it, why do you *also* want to buffer in qemu (adding code and complexity)? -- Hollis Blanchard IBM Linux Technology Center

Hollis Blanchard wrote:
On Wed, 2009-04-08 at 16:14 -0500, Anthony Liguori wrote:
It has to be some finite amount. You're right, it's arbitrary, but so is every other memory limitation we have in QEMU. You could make it user configurable but that's just punting the problem.
You have to do some level of buffering. It's unavoidable. If you aren't buffering at the event level, you buffer at the socket level, etc.
If the socket will buffer it, why do you *also* want to buffer in qemu (adding code and complexity)?
If you fill the socket buffer, you have two choices. You can sleep, which is unacceptable in QEMU since we're single threaded, or you can drop data. If you drop data in something like the monitor, it will lead to corruption which is unrecoverable for a management tool. You have to push that information higher up the stack so that the thing pushing data can make more intelligent decisions about what to drop. In this case, we're dropping anything older than 10 minutes. We'll probably need an max-number of events too but right now, it's based on time. Regards, Anthony Liguori

Anthony Liguori wrote:
However, I do like the idea though of QEMU queuing events for a certain period of time. Not everyone always has something connected to a monitor. I may notice that my NFS server (which runs in a VM) is not responding, VNC to the system, switch to the monitor, and take a look at the event log. If I can get the past 10 minutes of events, I may see something useful like a host IO failure.
If you want finite and deterministic behavior, the only way to achieve it is by using a finite number of events. Even better, you can pre-allocate the events themselves in a large array avoiding runtime memory operations and just use them as a ring. The fixed number of monitor events could be a command line or config option. And if no monitor has connected since overflow, deliberately corrupt the first event. (qemu) wait monitor warning !!Message buffer has overflowed=Event log truncated!!

On Wed, Apr 08, 2009 at 08:06:11PM +0100, Jamie Lokier wrote:
Anthony Liguori wrote:
It doesn't. When an app enables events, we would start queuing them, but if it didn't consume them in a timely manner (or at all), we would start leaking memory badly.
We want to be robust even in the face of poorly written management apps/scripts so we need some expiration function too.
What happens when an app stops reading the monitor channel for a little while, and there's enough monitor output to fill TCP buffers or terminal buffers? Does it block QEMU? Does QEMU drop arbitrary bytes from the stream, corrupting the output syntax?
One scheme would be to have a small buffer - enough to store say 10 events. If the monitor is blocking for write, and the buffer is full then start to discard all further events. When the buffer has more space again, then send an explicit 'overflow' event informing the app that stuff has been dropped from the event queue. In normal circumstances the app would never see this message, but if there was some unexpected problem causing the app to not process events quickly enough, then at least it would be able to then detect that qemu has discarded alot of events, and re-synchronize its state by running appropriate 'info' commands. Regards, Daniel -- |: Red Hat, Engineering, London -o- http://people.redhat.com/berrange/ :| |: http://libvirt.org -o- http://virt-manager.org -o- http://ovirt.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

Daniel P. Berrange wrote:
One scheme would be to have a small buffer - enough to store say 10 events. If the monitor is blocking for write, and the buffer is full then start to discard all further events. When the buffer has more space again, then send an explicit 'overflow' event informing the app that stuff has been dropped from the event queue.
In normal circumstances the app would never see this message, but if there was some unexpected problem causing the app to not process events quickly enough, then at least it would be able to then detect that qemu has discarded alot of events, and re-synchronize its state by running appropriate 'info' commands.
This is pretty much what Linux real-time queued I/O signals do. It's ugly but works. :-) -- Jamie

On 04/08/09 19:44, Anthony Liguori wrote:
We want to be robust even in the face of poorly written management apps/scripts so we need some expiration function too.
Well, if you want protect against broken apps, then yes, you'll have to expire events ...
There two issues with this syntax. The first is that 'notify EVENT' logically acts on the global event set. That's not necessarily what you want.
OK, having per-monitor events certainly makes sense.
The second issue is that there is no clear way to deliminate events other than a new line. If we wanted to send data along with events, we really want to be able to span multiple lines. Especially if we want that data to be in the same format as some of the existing monitor commands. You could get around this by introducing a new deliminator like '.' but everyone can already handle '(qemu)'.
Point.
Also, I think where the above really shines is if you're a human user and you want to see all the events while debugging. You really don't want to keep typing wait in the monitor.
So as a compromise, I think we need to introduce a mode where we can do the above but I'd like to wait until after the first round of these go in. I'm thinking along the lines of 'wait N' where N can be -1 to signify an unlimited number of events or something like that.
Hmm. Why would you want to use -- say -- "wait 3" ? It probably will be either "loop forever" or "single event" mode in practice. We might also have a "single event, but don't block if there isn't any" mode. cheers, Gerd

Gerd Hoffmann wrote:
Hmm. Why would you want to use -- say -- "wait 3" ? It probably will be either "loop forever" or "single event" mode in practice. We might also have a "single event, but don't block if there isn't any" mode.
Yeah, I came to the same conclusion. We just need a wait-forever command. I'll have it in the next set of patches. -- Regards, Anthony Liguori

This patch adds notification whenever a vm starts or stops. Signed-off-by: Anthony Liguori <aliguori@us.ibm.com> diff --git a/vl.c b/vl.c index f78cabb..ca0bcf9 100644 --- a/vl.c +++ b/vl.c @@ -164,6 +164,8 @@ int main(int argc, char **argv) #include "libslirp.h" #endif +#include "wait.h" + //#define DEBUG_UNUSED_IOPORT //#define DEBUG_IOPORT //#define DEBUG_NET @@ -3545,6 +3547,11 @@ static void vm_state_notify(int running, int reason) { VMChangeStateEntry *e; + if (running) + qemu_notify_event("vm-state", "start", ""); + else + qemu_notify_event("vm-state", "stop", ""); + for (e = vm_change_state_head.lh_first; e; e = e->entries.le_next) { e->cb(e->opaque, running, reason); }

Anthony Liguori wrote:
Right now only one monitor device can be enabled at a time. In order to support
I guess you are talking about -monitor provided instances here. There can already be multiple instances (multiplexed ones or the one provided via gdb).
asynchronous notification of events, I would like to introduce a 'wait' command that waits for an event to occur. This implies that we need an additional monitor session to allow commands to still be executed while waiting for an asynchronous notification.
Need to have a closer look at the actual patch later, but the description confuses me. 'wait' itself makes sense, though. Jan
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
diff --git a/vl.c b/vl.c index 4bd173f..f78cabb 100644 --- a/vl.c +++ b/vl.c @@ -184,6 +184,9 @@ int main(int argc, char **argv) /* Max number of bluetooth switches on the commandline. */ #define MAX_BT_CMDLINE 10
+/* Maximum number of monitor devices */ +#define MAX_MONITOR_DEVICES 10 + /* XXX: use a two level table to limit memory usage */ #define MAX_IOPORTS 65536
@@ -4252,8 +4255,9 @@ int main(int argc, char **argv, char **envp) int hda_index; int optind; const char *r, *optarg; - CharDriverState *monitor_hd = NULL; - const char *monitor_device; + CharDriverState *monitor_hds[MAX_MONITOR_DEVICES]; + const char *monitor_devices[MAX_MONITOR_DEVICES]; + int monitor_device_index; const char *serial_devices[MAX_SERIAL_PORTS]; int serial_device_index; const char *parallel_devices[MAX_PARALLEL_PORTS]; @@ -4324,7 +4328,6 @@ int main(int argc, char **argv, char **envp) kernel_cmdline = ""; cyls = heads = secs = 0; translation = BIOS_ATA_TRANSLATION_AUTO; - monitor_device = "vc:80Cx24C";
serial_devices[0] = "vc:80Cx24C"; for(i = 1; i < MAX_SERIAL_PORTS; i++) @@ -4340,6 +4343,11 @@ int main(int argc, char **argv, char **envp) virtio_consoles[i] = NULL; virtio_console_index = 0;
+ monitor_devices[0] = "vc:80Cx24C"; + for (i = 1; i < MAX_MONITOR_DEVICES; i++) + monitor_devices[i] = NULL; + monitor_device_index = 0; + usb_devices_index = 0;
nb_net_clients = 0; @@ -4723,7 +4731,12 @@ int main(int argc, char **argv, char **envp) break; } case QEMU_OPTION_monitor: - monitor_device = optarg; + if (monitor_device_index >= MAX_MONITOR_DEVICES) { + fprintf(stderr, "qemu: too many monitor devices\n"); + exit(1); + } + monitor_devices[monitor_device_index] = optarg; + monitor_device_index++; break; case QEMU_OPTION_serial: if (serial_device_index >= MAX_SERIAL_PORTS) { @@ -4974,8 +4987,8 @@ int main(int argc, char **argv, char **envp) serial_devices[0] = "stdio"; if (parallel_device_index == 0) parallel_devices[0] = "null"; - if (strncmp(monitor_device, "vc", 2) == 0) - monitor_device = "stdio"; + if (strncmp(monitor_devices[0], "vc", 2) == 0) + monitor_devices[0] = "stdio"; }
#ifndef _WIN32 @@ -5184,14 +5197,14 @@ int main(int argc, char **argv, char **envp) #endif
/* Maintain compatibility with multiple stdio monitors */ - if (!strcmp(monitor_device,"stdio")) { + if (!strcmp(monitor_devices[0],"stdio")) { for (i = 0; i < MAX_SERIAL_PORTS; i++) { const char *devname = serial_devices[i]; if (devname && !strcmp(devname,"mon:stdio")) { - monitor_device = NULL; + monitor_devices[0] = NULL; break; } else if (devname && !strcmp(devname,"stdio")) { - monitor_device = NULL; + monitor_devices[0] = NULL; serial_devices[i] = "mon:stdio"; break; } @@ -5208,11 +5221,20 @@ int main(int argc, char **argv, char **envp) } }
- if (monitor_device) { - monitor_hd = qemu_chr_open("monitor", monitor_device, NULL); - if (!monitor_hd) { - fprintf(stderr, "qemu: could not open monitor device '%s'\n", monitor_device); - exit(1); + for (i = 0; i < MAX_MONITOR_DEVICES; i++) { + const char *devname = monitor_devices[i]; + if (devname && strcmp(devname, "none")) { + char label[32]; + if (i == 0) + snprintf(label, sizeof(label), "monitor"); + else + snprintf(label, sizeof(label), "monitor%d", i); + monitor_hds[i] = qemu_chr_open(label, devname, NULL); + if (!monitor_hds[i]) { + fprintf(stderr, "qemu: could not open monitor device '%s'\n", + devname); + exit(1); + } } }
@@ -5335,8 +5357,13 @@ int main(int argc, char **argv, char **envp) text_consoles_set_display(display_state); qemu_chr_initial_reset();
- if (monitor_device && monitor_hd) - monitor_init(monitor_hd, MONITOR_USE_READLINE | MONITOR_IS_DEFAULT); + for (i = 0; i < MAX_MONITOR_DEVICES; i++) { + if (monitor_devices[i] && monitor_hds[i]) { + monitor_init(monitor_hds[i], + MONITOR_USE_READLINE | + ((i == 0) ? MONITOR_IS_DEFAULT : 0)); + } + }
for(i = 0; i < MAX_SERIAL_PORTS; i++) { const char *devname = serial_devices[i];
-- Siemens AG, Corporate Technology, CT SE 2 Corporate Competence Center Embedded Linux
participants (8)
-
Anthony Liguori
-
Anthony Liguori
-
Daniel P. Berrange
-
Gerd Hoffmann
-
Hollis Blanchard
-
Jamie Lokier
-
Jan Kiszka
-
Zachary Amsden